{ "epoch": 2.9724770642201834, "eval_logits/chosen": -2.20833683013916, "eval_logits/rejected": -2.1962075233459473, "eval_logps/chosen": -315.7890319824219, "eval_logps/rejected": -252.42320251464844, "eval_loss": 0.4124302268028259, "eval_rewards/accuracies": 0.8557692170143127, "eval_rewards/chosen": 3.4337260723114014, "eval_rewards/margins": 3.355928659439087, "eval_rewards/rejected": 0.07779762148857117, "eval_runtime": 53.6255, "eval_samples_per_second": 14.34, "eval_steps_per_second": 0.242, "total_flos": 954757539692544.0, "train_loss": 0.21779433532077588, "train_runtime": 2944.3137, "train_samples_per_second": 7.049, "train_steps_per_second": 0.028 }