{ "epoch": 2.983177570093458, "eval_logits/chosen": -2.3048903942108154, "eval_logits/rejected": -2.273998737335205, "eval_logps/chosen": -252.85711669921875, "eval_logps/rejected": -248.92103576660156, "eval_loss": 0.4957020878791809, "eval_rewards/accuracies": 0.7916666865348816, "eval_rewards/chosen": -0.43204134702682495, "eval_rewards/margins": 2.6231741905212402, "eval_rewards/rejected": -3.05521559715271, "eval_runtime": 251.7994, "eval_samples_per_second": 15.087, "eval_steps_per_second": 0.238 }