|
mode: train |
|
experiment: |
|
dataset_size: 6000 |
|
dataset_seed: 1234 |
|
test_size: 0.1 |
|
hf_token: ${oc.env:HF_TOKEN,null} |
|
output: |
|
root_path: ${oc.env:ROOT_PATH} |
|
run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_${algorithm.training.scheduler_params.min_prob}_${algorithm.training.max_steps} |
|
lora: |
|
r: 32 |
|
alpha: 64 |
|
dropout: 0.1 |
|
target_modules: |
|
- q_proj |
|
- v_proj |
|
task_type: CAUSAL_LM |
|
occupy_gpu_memory: false |
|
occupy_gpu_memory_gb: 50 |
|
gpu_device: cuda:0 |
|
model: |
|
family: meta-llama |
|
trim: Llama-3.2-3B-Instruct |
|
name: ${model.family}/${model.trim} |
|
trust_remote_code: true |
|
torch_dtype: bfloat16 |
|
attn_implementation: flash_attention_2 |
|
task: |
|
name: blocksworld1246 |
|
data_files: |
|
- data/blocksworld/train_set-1-complete-correct.json |
|
- data/blocksworld/train_set-2-complete-correct.json |
|
- data/blocksworld/train_set-4-complete-correct.json |
|
- data/blocksworld/train_set-6-complete-correct.json |
|
icl_examples_file: data/blocksworld/train_set-2-more_with_trace.json |
|
use_icl_examples: false |
|
training: |
|
max_prompt_length: 1600 |
|
max_completion_length: 512 |
|
inference: |
|
checkpoint: 1200 |
|
steps: 4 |
|
temperature: 0.0 |
|
sc_num: 1 |
|
use_icl: false |
|
icl_num: 2 |
|
prompt_path: prompts/blocksworld/pool_prompt_v1.json |
|
data_path: data/blocksworld/split_v1/split_v1_step_{steps}_data.json |
|
config_file: data/blocksworld/bw_config.yaml |
|
domain_file: data/blocksworld/generated_domain.pddl |
|
pass_at_k: 1 |
|
num_shot: 4 |
|
resume: 0 |
|
max_new_tokens: 512 |
|
max_batch_size: 64 |
|
algorithm: |
|
name: sgrpo |
|
training: |
|
learning_rate: 1.0e-06 |
|
lr_scheduler_type: cosine |
|
logging_steps: 10 |
|
max_steps: 300 |
|
curriculum: true |
|
curriculum_schedule: classic |
|
scheduler_params: |
|
mu_exp: 0.5 |
|
sigma: 0.5 |
|
min_prob: true |
|
per_device_train_batch_size: 2 |
|
gradient_accumulation_steps: 4 |
|
gradient_checkpointing: true |
|
bf16: true |
|
num_generations: 8 |
|
beta: 0.001 |
|
use_vllm: true |
|
vllm_gpu_memory_utilization: 0.5 |
|
report_to: |
|
- wandb |
|
push_to_hub: true |
|
save_strategy: steps |
|
save_steps: ${algorithm.training.max_steps} |
|
eval_strategy: steps |
|
tf32: true |
|
|