taguser's picture
Add files using upload-large-folder tool
c34227c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 12.973544973544973,
"eval_steps": 500,
"global_step": 299,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.21164021164021163,
"grad_norm": 0.01096427347511053,
"learning_rate": 5.4347826086956525e-06,
"loss": 0.3143,
"step": 5
},
{
"epoch": 0.42328042328042326,
"grad_norm": 0.013951408676803112,
"learning_rate": 1.0869565217391305e-05,
"loss": 0.3576,
"step": 10
},
{
"epoch": 0.6349206349206349,
"grad_norm": 0.01673784852027893,
"learning_rate": 1.630434782608696e-05,
"loss": 0.3639,
"step": 15
},
{
"epoch": 0.8465608465608465,
"grad_norm": 0.019934486597776413,
"learning_rate": 2.173913043478261e-05,
"loss": 0.3458,
"step": 20
},
{
"epoch": 1.0846560846560847,
"grad_norm": 0.02395496889948845,
"learning_rate": 2.7173913043478262e-05,
"loss": 0.4603,
"step": 25
},
{
"epoch": 1.2962962962962963,
"grad_norm": 0.017271244898438454,
"learning_rate": 3.260869565217392e-05,
"loss": 0.3034,
"step": 30
},
{
"epoch": 1.507936507936508,
"grad_norm": 0.023819392547011375,
"learning_rate": 3.804347826086957e-05,
"loss": 0.3317,
"step": 35
},
{
"epoch": 1.7195767195767195,
"grad_norm": 0.021737879142165184,
"learning_rate": 4.347826086956522e-05,
"loss": 0.3382,
"step": 40
},
{
"epoch": 1.9312169312169312,
"grad_norm": 0.023687848821282387,
"learning_rate": 4.891304347826087e-05,
"loss": 0.3288,
"step": 45
},
{
"epoch": 2.1693121693121693,
"grad_norm": 0.024081828072667122,
"learning_rate": 4.9988484157560136e-05,
"loss": 0.3521,
"step": 50
},
{
"epoch": 2.380952380952381,
"grad_norm": 0.03236062824726105,
"learning_rate": 4.994171922976348e-05,
"loss": 0.3485,
"step": 55
},
{
"epoch": 2.5925925925925926,
"grad_norm": 0.03175266832113266,
"learning_rate": 4.9859052738933966e-05,
"loss": 0.2979,
"step": 60
},
{
"epoch": 2.804232804232804,
"grad_norm": 0.030012985691428185,
"learning_rate": 4.974060367671783e-05,
"loss": 0.3055,
"step": 65
},
{
"epoch": 3.0423280423280423,
"grad_norm": 0.10118062049150467,
"learning_rate": 4.958654254084355e-05,
"loss": 0.3521,
"step": 70
},
{
"epoch": 3.253968253968254,
"grad_norm": 0.02641722746193409,
"learning_rate": 4.9397091089704364e-05,
"loss": 0.3041,
"step": 75
},
{
"epoch": 3.4656084656084656,
"grad_norm": 0.03561725839972496,
"learning_rate": 4.9172522023155154e-05,
"loss": 0.2778,
"step": 80
},
{
"epoch": 3.677248677248677,
"grad_norm": 0.04121287539601326,
"learning_rate": 4.8913158589983374e-05,
"loss": 0.2698,
"step": 85
},
{
"epoch": 3.888888888888889,
"grad_norm": 0.04533043131232262,
"learning_rate": 4.8619374122618854e-05,
"loss": 0.2704,
"step": 90
},
{
"epoch": 4.1269841269841265,
"grad_norm": 0.04685904085636139,
"learning_rate": 4.8291591499752365e-05,
"loss": 0.3338,
"step": 95
},
{
"epoch": 4.338624338624339,
"grad_norm": 0.06093249469995499,
"learning_rate": 4.793028253763633e-05,
"loss": 0.2794,
"step": 100
},
{
"epoch": 4.550264550264551,
"grad_norm": 0.060147590935230255,
"learning_rate": 4.7535967310943955e-05,
"loss": 0.2724,
"step": 105
},
{
"epoch": 4.761904761904762,
"grad_norm": 0.05850926414132118,
"learning_rate": 4.710921340416431e-05,
"loss": 0.2547,
"step": 110
},
{
"epoch": 4.973544973544973,
"grad_norm": 0.0650046169757843,
"learning_rate": 4.665063509461097e-05,
"loss": 0.241,
"step": 115
},
{
"epoch": 5.211640211640212,
"grad_norm": 0.07884380221366882,
"learning_rate": 4.616089246822003e-05,
"loss": 0.291,
"step": 120
},
{
"epoch": 5.423280423280423,
"grad_norm": 0.07263286411762238,
"learning_rate": 4.564069046941049e-05,
"loss": 0.256,
"step": 125
},
{
"epoch": 5.634920634920634,
"grad_norm": 0.08540436625480652,
"learning_rate": 4.509077788637446e-05,
"loss": 0.2362,
"step": 130
},
{
"epoch": 5.8465608465608465,
"grad_norm": 0.07003481686115265,
"learning_rate": 4.4511946273257846e-05,
"loss": 0.1973,
"step": 135
},
{
"epoch": 6.084656084656085,
"grad_norm": 0.08097032457590103,
"learning_rate": 4.390502881078296e-05,
"loss": 0.3011,
"step": 140
},
{
"epoch": 6.296296296296296,
"grad_norm": 0.07847806811332703,
"learning_rate": 4.3270899106953105e-05,
"loss": 0.2066,
"step": 145
},
{
"epoch": 6.507936507936508,
"grad_norm": 0.10350590199232101,
"learning_rate": 4.261046993956531e-05,
"loss": 0.2236,
"step": 150
},
{
"epoch": 6.71957671957672,
"grad_norm": 0.11323926597833633,
"learning_rate": 4.192469194234148e-05,
"loss": 0.2215,
"step": 155
},
{
"epoch": 6.931216931216931,
"grad_norm": 0.09281644225120544,
"learning_rate": 4.12145522365689e-05,
"loss": 0.2251,
"step": 160
},
{
"epoch": 7.169312169312169,
"grad_norm": 0.09772001951932907,
"learning_rate": 4.048107301022005e-05,
"loss": 0.2554,
"step": 165
},
{
"epoch": 7.380952380952381,
"grad_norm": 0.10654748976230621,
"learning_rate": 3.9725310046596595e-05,
"loss": 0.2058,
"step": 170
},
{
"epoch": 7.592592592592593,
"grad_norm": 0.08744902163743973,
"learning_rate": 3.894835120461584e-05,
"loss": 0.1731,
"step": 175
},
{
"epoch": 7.804232804232804,
"grad_norm": 0.11190956830978394,
"learning_rate": 3.815131485292678e-05,
"loss": 0.1955,
"step": 180
},
{
"epoch": 8.042328042328043,
"grad_norm": 0.3187606632709503,
"learning_rate": 3.733534826011008e-05,
"loss": 0.1913,
"step": 185
},
{
"epoch": 8.253968253968253,
"grad_norm": 0.12188898772001266,
"learning_rate": 3.6501625943278805e-05,
"loss": 0.1777,
"step": 190
},
{
"epoch": 8.465608465608465,
"grad_norm": 0.09240734577178955,
"learning_rate": 3.5651347977457214e-05,
"loss": 0.1776,
"step": 195
},
{
"epoch": 8.677248677248677,
"grad_norm": 0.12097247689962387,
"learning_rate": 3.478573826817099e-05,
"loss": 0.1812,
"step": 200
},
{
"epoch": 8.88888888888889,
"grad_norm": 0.14317023754119873,
"learning_rate": 3.390604278973543e-05,
"loss": 0.1636,
"step": 205
},
{
"epoch": 9.126984126984127,
"grad_norm": 0.1255073994398117,
"learning_rate": 3.301352779177743e-05,
"loss": 0.2084,
"step": 210
},
{
"epoch": 9.338624338624339,
"grad_norm": 0.15191367268562317,
"learning_rate": 3.21094779765728e-05,
"loss": 0.159,
"step": 215
},
{
"epoch": 9.55026455026455,
"grad_norm": 0.14779330790042877,
"learning_rate": 3.11951946498225e-05,
"loss": 0.1521,
"step": 220
},
{
"epoch": 9.761904761904763,
"grad_norm": 0.14298541843891144,
"learning_rate": 3.027199384752962e-05,
"loss": 0.1485,
"step": 225
},
{
"epoch": 9.973544973544973,
"grad_norm": 0.14848679304122925,
"learning_rate": 2.9341204441673266e-05,
"loss": 0.1488,
"step": 230
},
{
"epoch": 10.211640211640212,
"grad_norm": 0.15250274538993835,
"learning_rate": 2.840416622740617e-05,
"loss": 0.1604,
"step": 235
},
{
"epoch": 10.423280423280424,
"grad_norm": 0.2765931487083435,
"learning_rate": 2.7462227994529217e-05,
"loss": 0.1455,
"step": 240
},
{
"epoch": 10.634920634920634,
"grad_norm": 0.16372708976268768,
"learning_rate": 2.6516745586018965e-05,
"loss": 0.1236,
"step": 245
},
{
"epoch": 10.846560846560847,
"grad_norm": 0.3377860486507416,
"learning_rate": 2.556907994640264e-05,
"loss": 0.1393,
"step": 250
},
{
"epoch": 11.084656084656086,
"grad_norm": 0.1836676001548767,
"learning_rate": 2.4620595162789936e-05,
"loss": 0.1473,
"step": 255
},
{
"epoch": 11.296296296296296,
"grad_norm": 0.15243783593177795,
"learning_rate": 2.3672656501381272e-05,
"loss": 0.1366,
"step": 260
},
{
"epoch": 11.507936507936508,
"grad_norm": 0.1481999307870865,
"learning_rate": 2.2726628442278826e-05,
"loss": 0.1039,
"step": 265
},
{
"epoch": 11.71957671957672,
"grad_norm": 0.17082872986793518,
"learning_rate": 2.1783872715429228e-05,
"loss": 0.1196,
"step": 270
},
{
"epoch": 11.93121693121693,
"grad_norm": 0.16833926737308502,
"learning_rate": 2.084574634052465e-05,
"loss": 0.1245,
"step": 275
},
{
"epoch": 12.16931216931217,
"grad_norm": 0.18557614088058472,
"learning_rate": 1.991359967368416e-05,
"loss": 0.1174,
"step": 280
},
{
"epoch": 12.380952380952381,
"grad_norm": 0.13334429264068604,
"learning_rate": 1.8988774463726543e-05,
"loss": 0.0977,
"step": 285
},
{
"epoch": 12.592592592592592,
"grad_norm": 0.18030914664268494,
"learning_rate": 1.8072601920832786e-05,
"loss": 0.1147,
"step": 290
},
{
"epoch": 12.804232804232804,
"grad_norm": 0.19089557230472565,
"learning_rate": 1.7166400800377948e-05,
"loss": 0.1132,
"step": 295
}
],
"logging_steps": 5,
"max_steps": 460,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 23,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.8159025439689933e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}