|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.8363636363636364, |
|
"eval_steps": 40, |
|
"global_step": 39, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 109.26669040334116, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.7417855262756348, |
|
"logits/rejected": -2.731990098953247, |
|
"logps/chosen": -402.4177551269531, |
|
"logps/rejected": -259.22088623046875, |
|
"loss": 0.6813, |
|
"rewards/accuracies": 0.3687500059604645, |
|
"rewards/chosen": 0.03968711942434311, |
|
"rewards/margins": 0.023939523845911026, |
|
"rewards/rejected": 0.015747597441077232, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 54.44026233316852, |
|
"learning_rate": 1e-06, |
|
"logits/chosen": -2.695871353149414, |
|
"logits/rejected": -2.6486518383026123, |
|
"logps/chosen": -395.3858947753906, |
|
"logps/rejected": -254.0742645263672, |
|
"loss": 0.4811, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.3482853174209595, |
|
"rewards/margins": 0.7143442630767822, |
|
"rewards/rejected": 0.6339409947395325, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 40.622864628442045, |
|
"learning_rate": 9.284285880837946e-07, |
|
"logits/chosen": -2.4416801929473877, |
|
"logits/rejected": -2.394608736038208, |
|
"logps/chosen": -397.1449890136719, |
|
"logps/rejected": -282.96759033203125, |
|
"loss": 0.3285, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 4.510091781616211, |
|
"rewards/margins": 2.8856072425842285, |
|
"rewards/rejected": 1.6244843006134033, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 16.90254803263314, |
|
"learning_rate": 7.342042203498951e-07, |
|
"logits/chosen": -2.289841413497925, |
|
"logits/rejected": -2.265059232711792, |
|
"logps/chosen": -343.917724609375, |
|
"logps/rejected": -253.5391082763672, |
|
"loss": 0.1267, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 5.056896686553955, |
|
"rewards/margins": 4.160580158233643, |
|
"rewards/rejected": 0.896316647529602, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 29.584850630021336, |
|
"learning_rate": 4.7293054570729126e-07, |
|
"logits/chosen": -2.163829803466797, |
|
"logits/rejected": -2.1356260776519775, |
|
"logps/chosen": -357.2837829589844, |
|
"logps/rejected": -278.4024963378906, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 5.750279426574707, |
|
"rewards/margins": 5.663321495056152, |
|
"rewards/rejected": 0.08695853501558304, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 7.589156329261643, |
|
"learning_rate": 2.1940646731880885e-07, |
|
"logits/chosen": -2.1467955112457275, |
|
"logits/rejected": -2.1011250019073486, |
|
"logps/chosen": -361.9977111816406, |
|
"logps/rejected": -266.29840087890625, |
|
"loss": 0.0614, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 5.7993059158325195, |
|
"rewards/margins": 5.681790351867676, |
|
"rewards/rejected": 0.11751584708690643, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 4.596558322612688, |
|
"learning_rate": 4.621229016452155e-08, |
|
"logits/chosen": -2.1508798599243164, |
|
"logits/rejected": -2.1145217418670654, |
|
"logps/chosen": -361.5357360839844, |
|
"logps/rejected": -262.2801208496094, |
|
"loss": 0.0241, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": 6.36221981048584, |
|
"rewards/margins": 6.471512794494629, |
|
"rewards/rejected": -0.10929267108440399, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.8363636363636364, |
|
"step": 39, |
|
"total_flos": 459455468666880.0, |
|
"train_loss": 0.2336020725659835, |
|
"train_runtime": 1351.9222, |
|
"train_samples_per_second": 7.685, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 39, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 40, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 459455468666880.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|