dataset_args: path: distilabel-internal-testing/ultrafeedback-critique-sft-v0.1 format_args: prompt_format: chatml model_args: pretrained_model_name_or_path: teknium/OpenHermes-2.5-Mistral-7B torch_dtype: bfloat16 wandb_args: entity: argilla-io project: criticon name: criticon-sft-full-v0.1 training_args: # `trl.SFTTrainer` max_seq_length: 2048 # `transformers.Trainer` bf16: true do_eval: true do_train: true evaluation_strategy: steps eval_steps: 500 gradient_accumulation_steps: 2 gradient_checkpointing: true hub_model_id: distilabel-internal-testing/criticon-sft-v0.1 hub_model_revision: v1.0 hub_strategy: every_save hub_private_repo: true push_to_hub: true learning_rate: 5.0e-06 logging_steps: 10 lr_scheduler_type: cosine num_train_epochs: 2 optim: paged_adamw_32bit output_dir: data/criticon per_device_train_batch_size: 8 per_device_eval_batch_size: 8 save_strategy: epoch save_total_limit: null # load_best_model_at_end: true seed: 42 warmup_ratio: 0.1 report_to: - wandb use_accelerate: true use_unsloth: false