- mode=train | |
- task=blocksworld1246 | |
- algorithm=sgrpo | |
- model=llama3 | |
- algorithm.training.per_device_train_batch_size=2 | |
- algorithm.training.curriculum_schedule=classic | |
- algorithm.training.scheduler_params.mu_exp=0.5 | |
- algorithm.training.scheduler_params.sigma=0.5 | |
- algorithm.training.vllm_gpu_memory_utilization=0.5 | |