shubhamprshr's picture
Training in progress, step 300
c3e7cbc verified
mode: train
experiment:
dataset_size: 6000
dataset_seed: 1234
test_size: 0.1
hf_token: ${oc.env:HF_TOKEN,null}
output:
root_path: ${oc.env:ROOT_PATH}
run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_${algorithm.training.scheduler_params.min_prob}_${algorithm.training.max_steps}
lora:
r: 32
alpha: 64
dropout: 0.1
target_modules:
- q_proj
- v_proj
task_type: CAUSAL_LM
occupy_gpu_memory: false
occupy_gpu_memory_gb: 50
gpu_device: cuda:0
model:
family: meta-llama
trim: Llama-3.2-3B-Instruct
name: ${model.family}/${model.trim}
trust_remote_code: true
torch_dtype: bfloat16
attn_implementation: flash_attention_2
task:
name: blocksworld1246
data_files:
- data/blocksworld/train_set-1-complete-correct.json
- data/blocksworld/train_set-2-complete-correct.json
- data/blocksworld/train_set-4-complete-correct.json
- data/blocksworld/train_set-6-complete-correct.json
icl_examples_file: data/blocksworld/train_set-2-more_with_trace.json
use_icl_examples: false
training:
max_prompt_length: 1600
max_completion_length: 512
inference:
checkpoint: 1200
steps: 4
temperature: 0.0
sc_num: 1
use_icl: false
icl_num: 2
prompt_path: prompts/blocksworld/pool_prompt_v1.json
data_path: data/blocksworld/split_v1/split_v1_step_{steps}_data.json
config_file: data/blocksworld/bw_config.yaml
domain_file: data/blocksworld/generated_domain.pddl
pass_at_k: 1
num_shot: 4
resume: 0
max_new_tokens: 512
max_batch_size: 64
algorithm:
name: sgrpo
training:
learning_rate: 1.0e-06
lr_scheduler_type: cosine
logging_steps: 10
max_steps: 300
curriculum: true
curriculum_schedule: classic
scheduler_params:
mu_exp: 0.5
sigma: 0.5
min_prob: true
per_device_train_batch_size: 2
gradient_accumulation_steps: 4
gradient_checkpointing: true
bf16: true
num_generations: 8
beta: 0.001
use_vllm: true
vllm_gpu_memory_utilization: 0.5
report_to:
- wandb
push_to_hub: true
save_strategy: steps
save_steps: ${algorithm.training.max_steps}
eval_strategy: steps
tf32: true