{ "epoch": 2.993762993762994, "eval_logits/chosen": -2.102522134780884, "eval_logits/rejected": -2.0705835819244385, "eval_logps/chosen": -239.31826782226562, "eval_logps/rejected": -230.43333435058594, "eval_loss": 0.5266440510749817, "eval_rewards/accuracies": 0.7986111044883728, "eval_rewards/chosen": 0.6410874724388123, "eval_rewards/margins": 2.544097900390625, "eval_rewards/rejected": -1.903010368347168, "eval_runtime": 227.0367, "eval_samples_per_second": 15.064, "eval_steps_per_second": 0.238, "total_flos": 4244978440077312.0, "train_loss": 0.33394469337330923, "train_runtime": 13841.1213, "train_samples_per_second": 6.669, "train_steps_per_second": 0.026 }