|
{ |
|
"epoch": 2.994871794871795, |
|
"eval_logits/chosen": -2.296416759490967, |
|
"eval_logits/rejected": -2.2604804039001465, |
|
"eval_logps/chosen": -191.28500366210938, |
|
"eval_logps/rejected": -219.8085479736328, |
|
"eval_loss": 0.4512675702571869, |
|
"eval_rewards/accuracies": 0.8211538195610046, |
|
"eval_rewards/chosen": -3.2807652950286865, |
|
"eval_rewards/margins": 2.812060594558716, |
|
"eval_rewards/rejected": -6.092825889587402, |
|
"eval_runtime": 509.575, |
|
"eval_samples_per_second": 16.316, |
|
"eval_steps_per_second": 0.255, |
|
"total_flos": 1.0330118169821184e+16, |
|
"train_loss": 0.2632859356310134, |
|
"train_runtime": 40450.3897, |
|
"train_samples_per_second": 5.549, |
|
"train_steps_per_second": 0.022 |
|
} |