{ "epoch": 0.8571428571428571, "eval_logits/chosen": -2.6940619945526123, "eval_logits/rejected": -2.7640316486358643, "eval_logps/chosen": -248.7039794921875, "eval_logps/rejected": -237.63433837890625, "eval_loss": 0.6914129853248596, "eval_rewards/accuracies": 0.4375, "eval_rewards/chosen": 0.012479877099394798, "eval_rewards/margins": -0.0028497937601059675, "eval_rewards/rejected": 0.01532967109233141, "eval_runtime": 8.293, "eval_samples_per_second": 11.697, "eval_steps_per_second": 0.241, "total_flos": 34910836359168.0, "train_loss": 0.6931970119476318, "train_runtime": 142.1148, "train_samples_per_second": 6.108, "train_steps_per_second": 0.021 }