{ "epoch": 2.9724770642201834, "eval_logits/chosen": -2.20833683013916, "eval_logits/rejected": -2.1962075233459473, "eval_logps/chosen": -315.7890319824219, "eval_logps/rejected": -252.42320251464844, "eval_loss": 0.4124302268028259, "eval_rewards/accuracies": 0.8557692170143127, "eval_rewards/chosen": 3.4337260723114014, "eval_rewards/margins": 3.355928659439087, "eval_rewards/rejected": 0.07779762148857117, "eval_runtime": 53.6255, "eval_samples_per_second": 14.34, "eval_steps_per_second": 0.242 }