|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.993607305936073, |
|
"eval_steps": 500, |
|
"global_step": 546, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2191780821917808, |
|
"grad_norm": 5.121945381164551, |
|
"learning_rate": 9.997441533484527e-05, |
|
"log_odds_chosen": 7.0944390296936035, |
|
"log_odds_ratio": -0.043758511543273926, |
|
"logits/chosen": -11.323039054870605, |
|
"logits/rejected": -11.33891487121582, |
|
"logps/chosen": -0.5756699442863464, |
|
"logps/rejected": -6.692573547363281, |
|
"loss": 0.721, |
|
"nll_loss": 0.716595470905304, |
|
"rewards/accuracies": 0.9791666865348816, |
|
"rewards/chosen": -0.057566989213228226, |
|
"rewards/margins": 0.6116903424263, |
|
"rewards/rejected": -0.6692573428153992, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4383561643835616, |
|
"grad_norm": 6.375761032104492, |
|
"learning_rate": 9.573779290555039e-05, |
|
"log_odds_chosen": 7.421543598175049, |
|
"log_odds_ratio": -0.0334482379257679, |
|
"logits/chosen": -12.235654830932617, |
|
"logits/rejected": -12.264952659606934, |
|
"logps/chosen": -0.5262126326560974, |
|
"logps/rejected": -6.922983169555664, |
|
"loss": 0.6832, |
|
"nll_loss": 0.679860532283783, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0526212677359581, |
|
"rewards/margins": 0.6396770477294922, |
|
"rewards/rejected": -0.6922982931137085, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6575342465753424, |
|
"grad_norm": 5.337998390197754, |
|
"learning_rate": 8.484270686572078e-05, |
|
"log_odds_chosen": 7.344568729400635, |
|
"log_odds_ratio": -0.047614481300115585, |
|
"logits/chosen": -11.1071138381958, |
|
"logits/rejected": -11.16305160522461, |
|
"logps/chosen": -0.6026201844215393, |
|
"logps/rejected": -7.023316860198975, |
|
"loss": 0.7666, |
|
"nll_loss": 0.7618839144706726, |
|
"rewards/accuracies": 0.9791666865348816, |
|
"rewards/chosen": -0.06026201695203781, |
|
"rewards/margins": 0.6420697569847107, |
|
"rewards/rejected": -0.7023317217826843, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8767123287671232, |
|
"grad_norm": 7.20003080368042, |
|
"learning_rate": 6.887525329304994e-05, |
|
"log_odds_chosen": 6.544267177581787, |
|
"log_odds_ratio": -0.08398038893938065, |
|
"logits/chosen": -9.961735725402832, |
|
"logits/rejected": -9.985981941223145, |
|
"logps/chosen": -0.7504870295524597, |
|
"logps/rejected": -6.561529636383057, |
|
"loss": 0.8312, |
|
"nll_loss": 0.8227914571762085, |
|
"rewards/accuracies": 0.9583333134651184, |
|
"rewards/chosen": -0.07504869997501373, |
|
"rewards/margins": 0.5811043381690979, |
|
"rewards/rejected": -0.6561529636383057, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0949771689497716, |
|
"grad_norm": 7.483155727386475, |
|
"learning_rate": 5.015995861989287e-05, |
|
"log_odds_chosen": 6.511773586273193, |
|
"log_odds_ratio": -0.04247206076979637, |
|
"logits/chosen": -11.252288818359375, |
|
"logits/rejected": -11.195887565612793, |
|
"logps/chosen": -0.6266089677810669, |
|
"logps/rejected": -6.222264289855957, |
|
"loss": 0.7339, |
|
"nll_loss": 0.7327039241790771, |
|
"rewards/accuracies": 0.9707112908363342, |
|
"rewards/chosen": -0.06266089528799057, |
|
"rewards/margins": 0.5595656037330627, |
|
"rewards/rejected": -0.6222264766693115, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.3141552511415524, |
|
"grad_norm": 8.435914993286133, |
|
"learning_rate": 3.142137732571437e-05, |
|
"log_odds_chosen": 7.987664699554443, |
|
"log_odds_ratio": -0.019010299816727638, |
|
"logits/chosen": -13.736985206604004, |
|
"logits/rejected": -13.638700485229492, |
|
"logps/chosen": -0.38932979106903076, |
|
"logps/rejected": -7.014037609100342, |
|
"loss": 0.543, |
|
"nll_loss": 0.5411479473114014, |
|
"rewards/accuracies": 0.9958333373069763, |
|
"rewards/chosen": -0.038932979106903076, |
|
"rewards/margins": 0.6624708771705627, |
|
"rewards/rejected": -0.701403796672821, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.5333333333333332, |
|
"grad_norm": 9.413529396057129, |
|
"learning_rate": 1.538745393372281e-05, |
|
"log_odds_chosen": 8.11983871459961, |
|
"log_odds_ratio": -0.027920575812458992, |
|
"logits/chosen": -13.768086433410645, |
|
"logits/rejected": -13.71419906616211, |
|
"logps/chosen": -0.4214509427547455, |
|
"logps/rejected": -7.212196350097656, |
|
"loss": 0.5881, |
|
"nll_loss": 0.5852769613265991, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.04214509576559067, |
|
"rewards/margins": 0.6790744662284851, |
|
"rewards/rejected": -0.7212197184562683, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.752511415525114, |
|
"grad_norm": 9.286341667175293, |
|
"learning_rate": 4.392391488217712e-06, |
|
"log_odds_chosen": 8.560954093933105, |
|
"log_odds_ratio": -0.020617039874196053, |
|
"logits/chosen": -14.232094764709473, |
|
"logits/rejected": -14.112393379211426, |
|
"logps/chosen": -0.40070831775665283, |
|
"logps/rejected": -7.628507614135742, |
|
"loss": 0.5485, |
|
"nll_loss": 0.5464364290237427, |
|
"rewards/accuracies": 0.9958333373069763, |
|
"rewards/chosen": -0.04007083177566528, |
|
"rewards/margins": 0.7227799892425537, |
|
"rewards/rejected": -0.762850821018219, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.9716894977168948, |
|
"grad_norm": 8.754582405090332, |
|
"learning_rate": 3.684053523947406e-08, |
|
"log_odds_chosen": 8.664142608642578, |
|
"log_odds_ratio": -0.022193601354956627, |
|
"logits/chosen": -14.154144287109375, |
|
"logits/rejected": -14.020852088928223, |
|
"logps/chosen": -0.3810846507549286, |
|
"logps/rejected": -7.6645283699035645, |
|
"loss": 0.5579, |
|
"nll_loss": 0.5556921362876892, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.0381084680557251, |
|
"rewards/margins": 0.7283445000648499, |
|
"rewards/rejected": -0.7664528489112854, |
|
"step": 540 |
|
} |
|
], |
|
"logging_steps": 60, |
|
"max_steps": 546, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|