{ "epoch": 2.994871794871795, "eval_logits/chosen": -2.296416759490967, "eval_logits/rejected": -2.2604804039001465, "eval_logps/chosen": -191.28500366210938, "eval_logps/rejected": -219.8085479736328, "eval_loss": 0.4512675702571869, "eval_rewards/accuracies": 0.8211538195610046, "eval_rewards/chosen": -3.2807652950286865, "eval_rewards/margins": 2.812060594558716, "eval_rewards/rejected": -6.092825889587402, "eval_runtime": 509.575, "eval_samples_per_second": 16.316, "eval_steps_per_second": 0.255 }