{ "epoch": 3.0, "eval_logits/chosen": -2.3030953407287598, "eval_logits/rejected": -2.2684733867645264, "eval_logps/chosen": -232.67649841308594, "eval_logps/rejected": -234.53941345214844, "eval_loss": 0.5567358136177063, "eval_rewards/accuracies": 0.8020833134651184, "eval_rewards/chosen": -0.00392953073605895, "eval_rewards/margins": 2.367516040802002, "eval_rewards/rejected": -2.371445655822754, "eval_runtime": 202.0305, "eval_samples_per_second": 15.047, "eval_steps_per_second": 0.238 }