cutelemonlili's picture
Add files using upload-large-folder tool
fc8f712 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 62,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03225806451612903,
"grad_norm": 31.199046559439136,
"learning_rate": 9.993582535855265e-06,
"loss": 1.6497,
"step": 1
},
{
"epoch": 0.06451612903225806,
"grad_norm": 17.46234723437062,
"learning_rate": 9.974346616959476e-06,
"loss": 1.1688,
"step": 2
},
{
"epoch": 0.0967741935483871,
"grad_norm": 17.2132760171181,
"learning_rate": 9.942341621640558e-06,
"loss": 1.2213,
"step": 3
},
{
"epoch": 0.12903225806451613,
"grad_norm": 8.814975880215384,
"learning_rate": 9.897649706262474e-06,
"loss": 0.852,
"step": 4
},
{
"epoch": 0.16129032258064516,
"grad_norm": 9.032603485694555,
"learning_rate": 9.840385594331022e-06,
"loss": 0.8313,
"step": 5
},
{
"epoch": 0.1935483870967742,
"grad_norm": 5.078284135758567,
"learning_rate": 9.770696282000245e-06,
"loss": 0.6852,
"step": 6
},
{
"epoch": 0.22580645161290322,
"grad_norm": 3.427300813881487,
"learning_rate": 9.688760660735403e-06,
"loss": 0.5022,
"step": 7
},
{
"epoch": 0.25806451612903225,
"grad_norm": 4.2152309516284845,
"learning_rate": 9.594789058101154e-06,
"loss": 0.5572,
"step": 8
},
{
"epoch": 0.2903225806451613,
"grad_norm": 3.085786333052321,
"learning_rate": 9.48902269785371e-06,
"loss": 0.4309,
"step": 9
},
{
"epoch": 0.3225806451612903,
"grad_norm": 3.125642693243869,
"learning_rate": 9.371733080722911e-06,
"loss": 0.5173,
"step": 10
},
{
"epoch": 0.3548387096774194,
"grad_norm": 3.277088815175521,
"learning_rate": 9.243221287473755e-06,
"loss": 0.4889,
"step": 11
},
{
"epoch": 0.3870967741935484,
"grad_norm": 2.357479595943573,
"learning_rate": 9.103817206036383e-06,
"loss": 0.3478,
"step": 12
},
{
"epoch": 0.41935483870967744,
"grad_norm": 2.0539004083223156,
"learning_rate": 8.953878684688492e-06,
"loss": 0.3496,
"step": 13
},
{
"epoch": 0.45161290322580644,
"grad_norm": 2.52448299705888,
"learning_rate": 8.793790613463956e-06,
"loss": 0.398,
"step": 14
},
{
"epoch": 0.4838709677419355,
"grad_norm": 2.355140249988772,
"learning_rate": 8.6239639361456e-06,
"loss": 0.3568,
"step": 15
},
{
"epoch": 0.5161290322580645,
"grad_norm": 2.4469993078971326,
"learning_rate": 8.444834595378434e-06,
"loss": 0.3884,
"step": 16
},
{
"epoch": 0.5483870967741935,
"grad_norm": 2.52296370250433,
"learning_rate": 8.256862413611113e-06,
"loss": 0.3657,
"step": 17
},
{
"epoch": 0.5806451612903226,
"grad_norm": 3.3355903081537135,
"learning_rate": 8.060529912738316e-06,
"loss": 0.5187,
"step": 18
},
{
"epoch": 0.6129032258064516,
"grad_norm": 2.484683322570902,
"learning_rate": 7.856341075473963e-06,
"loss": 0.4143,
"step": 19
},
{
"epoch": 0.6451612903225806,
"grad_norm": 1.7487341996348138,
"learning_rate": 7.644820051634813e-06,
"loss": 0.2972,
"step": 20
},
{
"epoch": 0.6774193548387096,
"grad_norm": 2.0583109478127204,
"learning_rate": 7.4265098126554065e-06,
"loss": 0.3197,
"step": 21
},
{
"epoch": 0.7096774193548387,
"grad_norm": 2.0344563885911575,
"learning_rate": 7.201970757788172e-06,
"loss": 0.3395,
"step": 22
},
{
"epoch": 0.7419354838709677,
"grad_norm": 2.058990191861192,
"learning_rate": 6.971779275566593e-06,
"loss": 0.3394,
"step": 23
},
{
"epoch": 0.7741935483870968,
"grad_norm": 1.9074433438418341,
"learning_rate": 6.736526264224101e-06,
"loss": 0.3442,
"step": 24
},
{
"epoch": 0.8064516129032258,
"grad_norm": 2.0162577599888323,
"learning_rate": 6.496815614866792e-06,
"loss": 0.2795,
"step": 25
},
{
"epoch": 0.8387096774193549,
"grad_norm": 2.4742505970882163,
"learning_rate": 6.2532626612936035e-06,
"loss": 0.3968,
"step": 26
},
{
"epoch": 0.8709677419354839,
"grad_norm": 2.207808258328297,
"learning_rate": 6.006492600443301e-06,
"loss": 0.3594,
"step": 27
},
{
"epoch": 0.9032258064516129,
"grad_norm": 1.7696328864440694,
"learning_rate": 5.757138887522884e-06,
"loss": 0.3114,
"step": 28
},
{
"epoch": 0.9354838709677419,
"grad_norm": 2.0275212616065645,
"learning_rate": 5.505841609937162e-06,
"loss": 0.3478,
"step": 29
},
{
"epoch": 0.967741935483871,
"grad_norm": 1.6298425948210777,
"learning_rate": 5.253245844193564e-06,
"loss": 0.2626,
"step": 30
},
{
"epoch": 1.0,
"grad_norm": 1.5467460791719727,
"learning_rate": 5e-06,
"loss": 0.2845,
"step": 31
},
{
"epoch": 1.032258064516129,
"grad_norm": 1.5586457967216936,
"learning_rate": 4.746754155806437e-06,
"loss": 0.2419,
"step": 32
},
{
"epoch": 1.064516129032258,
"grad_norm": 1.888001852915147,
"learning_rate": 4.49415839006284e-06,
"loss": 0.2208,
"step": 33
},
{
"epoch": 1.096774193548387,
"grad_norm": 1.4011698191222255,
"learning_rate": 4.2428611124771184e-06,
"loss": 0.2144,
"step": 34
},
{
"epoch": 1.129032258064516,
"grad_norm": 1.7487392529140704,
"learning_rate": 3.993507399556699e-06,
"loss": 0.2356,
"step": 35
},
{
"epoch": 1.1612903225806452,
"grad_norm": 2.111614480418175,
"learning_rate": 3.7467373387063973e-06,
"loss": 0.2874,
"step": 36
},
{
"epoch": 1.1935483870967742,
"grad_norm": 1.317740002801753,
"learning_rate": 3.5031843851332105e-06,
"loss": 0.1728,
"step": 37
},
{
"epoch": 1.2258064516129032,
"grad_norm": 1.7399051773302943,
"learning_rate": 3.2634737357758994e-06,
"loss": 0.2604,
"step": 38
},
{
"epoch": 1.2580645161290323,
"grad_norm": 1.8124616181419573,
"learning_rate": 3.0282207244334084e-06,
"loss": 0.2461,
"step": 39
},
{
"epoch": 1.2903225806451613,
"grad_norm": 2.2245741657359934,
"learning_rate": 2.7980292422118282e-06,
"loss": 0.2703,
"step": 40
},
{
"epoch": 1.3225806451612903,
"grad_norm": 1.6789960223565612,
"learning_rate": 2.573490187344596e-06,
"loss": 0.2038,
"step": 41
},
{
"epoch": 1.3548387096774195,
"grad_norm": 1.79239256078375,
"learning_rate": 2.3551799483651894e-06,
"loss": 0.2239,
"step": 42
},
{
"epoch": 1.3870967741935485,
"grad_norm": 3.076290808429015,
"learning_rate": 2.1436589245260375e-06,
"loss": 0.2957,
"step": 43
},
{
"epoch": 1.4193548387096775,
"grad_norm": 1.5602198674820165,
"learning_rate": 1.9394700872616856e-06,
"loss": 0.2107,
"step": 44
},
{
"epoch": 1.4516129032258065,
"grad_norm": 1.577639906284918,
"learning_rate": 1.74313758638889e-06,
"loss": 0.2238,
"step": 45
},
{
"epoch": 1.4838709677419355,
"grad_norm": 1.5054440694141868,
"learning_rate": 1.555165404621567e-06,
"loss": 0.2035,
"step": 46
},
{
"epoch": 1.5161290322580645,
"grad_norm": 1.92724369253792,
"learning_rate": 1.3760360638544012e-06,
"loss": 0.256,
"step": 47
},
{
"epoch": 1.5483870967741935,
"grad_norm": 1.52533329195425,
"learning_rate": 1.2062093865360458e-06,
"loss": 0.2078,
"step": 48
},
{
"epoch": 1.5806451612903225,
"grad_norm": 2.3222181174482044,
"learning_rate": 1.046121315311508e-06,
"loss": 0.2611,
"step": 49
},
{
"epoch": 1.6129032258064515,
"grad_norm": 1.4315191687571915,
"learning_rate": 8.961827939636198e-07,
"loss": 0.1965,
"step": 50
},
{
"epoch": 1.6451612903225805,
"grad_norm": 1.5414636545885392,
"learning_rate": 7.567787125262449e-07,
"loss": 0.2,
"step": 51
},
{
"epoch": 1.6774193548387095,
"grad_norm": 1.4835478438790177,
"learning_rate": 6.282669192770896e-07,
"loss": 0.2509,
"step": 52
},
{
"epoch": 1.7096774193548387,
"grad_norm": 1.4946263792553225,
"learning_rate": 5.109773021462921e-07,
"loss": 0.2085,
"step": 53
},
{
"epoch": 1.7419354838709677,
"grad_norm": 1.7678787704558188,
"learning_rate": 4.05210941898847e-07,
"loss": 0.2467,
"step": 54
},
{
"epoch": 1.7741935483870968,
"grad_norm": 1.4867440873899518,
"learning_rate": 3.112393392645985e-07,
"loss": 0.2363,
"step": 55
},
{
"epoch": 1.8064516129032258,
"grad_norm": 1.4803775636293333,
"learning_rate": 2.2930371799975593e-07,
"loss": 0.1855,
"step": 56
},
{
"epoch": 1.838709677419355,
"grad_norm": 1.4490266945389694,
"learning_rate": 1.5961440566897913e-07,
"loss": 0.1821,
"step": 57
},
{
"epoch": 1.870967741935484,
"grad_norm": 1.7145630167827153,
"learning_rate": 1.0235029373752758e-07,
"loss": 0.2157,
"step": 58
},
{
"epoch": 1.903225806451613,
"grad_norm": 1.5925676389651224,
"learning_rate": 5.7658378359443104e-08,
"loss": 0.2158,
"step": 59
},
{
"epoch": 1.935483870967742,
"grad_norm": 1.4959223286590795,
"learning_rate": 2.5653383040524228e-08,
"loss": 0.2202,
"step": 60
},
{
"epoch": 1.967741935483871,
"grad_norm": 2.013681318495715,
"learning_rate": 6.417464144736208e-09,
"loss": 0.2533,
"step": 61
},
{
"epoch": 2.0,
"grad_norm": 1.4127405994063447,
"learning_rate": 0.0,
"loss": 0.2159,
"step": 62
},
{
"epoch": 2.0,
"step": 62,
"total_flos": 2138633404416.0,
"train_loss": 0.370798627936071,
"train_runtime": 218.5623,
"train_samples_per_second": 2.251,
"train_steps_per_second": 0.284
}
],
"logging_steps": 1,
"max_steps": 62,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 70000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2138633404416.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}