|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.48828125, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00048828125, |
|
"grad_norm": 11.68676471710205, |
|
"learning_rate": 1.6129032258064518e-07, |
|
"loss": 0.6894, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0009765625, |
|
"grad_norm": 16.37053871154785, |
|
"learning_rate": 3.2258064516129035e-07, |
|
"loss": 0.7171, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00146484375, |
|
"grad_norm": 23.564491271972656, |
|
"learning_rate": 4.838709677419355e-07, |
|
"loss": 0.7123, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.001953125, |
|
"grad_norm": 16.051462173461914, |
|
"learning_rate": 6.451612903225807e-07, |
|
"loss": 0.7445, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00244140625, |
|
"grad_norm": 13.484965324401855, |
|
"learning_rate": 8.064516129032258e-07, |
|
"loss": 0.7697, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0029296875, |
|
"grad_norm": 12.733880043029785, |
|
"learning_rate": 9.67741935483871e-07, |
|
"loss": 0.6796, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00341796875, |
|
"grad_norm": 11.081924438476562, |
|
"learning_rate": 1.1290322580645162e-06, |
|
"loss": 0.6711, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00390625, |
|
"grad_norm": 11.96164321899414, |
|
"learning_rate": 1.2903225806451614e-06, |
|
"loss": 0.6916, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00439453125, |
|
"grad_norm": 8.69968318939209, |
|
"learning_rate": 1.4516129032258066e-06, |
|
"loss": 0.6125, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0048828125, |
|
"grad_norm": 8.749759674072266, |
|
"learning_rate": 1.6129032258064516e-06, |
|
"loss": 0.5684, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00537109375, |
|
"grad_norm": 9.206546783447266, |
|
"learning_rate": 1.774193548387097e-06, |
|
"loss": 0.5901, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.005859375, |
|
"grad_norm": 6.172158718109131, |
|
"learning_rate": 1.935483870967742e-06, |
|
"loss": 0.5147, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00634765625, |
|
"grad_norm": 5.583189010620117, |
|
"learning_rate": 2.096774193548387e-06, |
|
"loss": 0.5078, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0068359375, |
|
"grad_norm": 8.174113273620605, |
|
"learning_rate": 2.2580645161290324e-06, |
|
"loss": 0.5151, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.00732421875, |
|
"grad_norm": 11.44507122039795, |
|
"learning_rate": 2.4193548387096776e-06, |
|
"loss": 0.5215, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0078125, |
|
"grad_norm": 4.763265132904053, |
|
"learning_rate": 2.580645161290323e-06, |
|
"loss": 0.5062, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.00830078125, |
|
"grad_norm": 7.144759178161621, |
|
"learning_rate": 2.7419354838709676e-06, |
|
"loss": 0.5313, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0087890625, |
|
"grad_norm": 4.595753192901611, |
|
"learning_rate": 2.903225806451613e-06, |
|
"loss": 0.4514, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00927734375, |
|
"grad_norm": 5.988632678985596, |
|
"learning_rate": 3.0645161290322584e-06, |
|
"loss": 0.468, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.009765625, |
|
"grad_norm": 5.993471145629883, |
|
"learning_rate": 3.225806451612903e-06, |
|
"loss": 0.4231, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01025390625, |
|
"grad_norm": 5.629610538482666, |
|
"learning_rate": 3.3870967741935484e-06, |
|
"loss": 0.4748, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0107421875, |
|
"grad_norm": 5.070748329162598, |
|
"learning_rate": 3.548387096774194e-06, |
|
"loss": 0.4851, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01123046875, |
|
"grad_norm": 5.008419990539551, |
|
"learning_rate": 3.7096774193548392e-06, |
|
"loss": 0.4251, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01171875, |
|
"grad_norm": 5.048961162567139, |
|
"learning_rate": 3.870967741935484e-06, |
|
"loss": 0.4423, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01220703125, |
|
"grad_norm": 3.505443811416626, |
|
"learning_rate": 4.032258064516129e-06, |
|
"loss": 0.4165, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0126953125, |
|
"grad_norm": 4.471498966217041, |
|
"learning_rate": 4.193548387096774e-06, |
|
"loss": 0.4132, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01318359375, |
|
"grad_norm": 3.593733310699463, |
|
"learning_rate": 4.35483870967742e-06, |
|
"loss": 0.38, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.013671875, |
|
"grad_norm": 7.17294979095459, |
|
"learning_rate": 4.516129032258065e-06, |
|
"loss": 0.3956, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01416015625, |
|
"grad_norm": 15.088685989379883, |
|
"learning_rate": 4.67741935483871e-06, |
|
"loss": 0.4425, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0146484375, |
|
"grad_norm": 4.4346113204956055, |
|
"learning_rate": 4.838709677419355e-06, |
|
"loss": 0.3911, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01513671875, |
|
"grad_norm": 4.740771293640137, |
|
"learning_rate": 5e-06, |
|
"loss": 0.423, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.015625, |
|
"grad_norm": 3.4211642742156982, |
|
"learning_rate": 5.161290322580646e-06, |
|
"loss": 0.4183, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.01611328125, |
|
"grad_norm": 5.500433444976807, |
|
"learning_rate": 5.322580645161291e-06, |
|
"loss": 0.3956, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0166015625, |
|
"grad_norm": 4.092607021331787, |
|
"learning_rate": 5.483870967741935e-06, |
|
"loss": 0.4028, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.01708984375, |
|
"grad_norm": 12.963457107543945, |
|
"learning_rate": 5.645161290322582e-06, |
|
"loss": 0.3862, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.017578125, |
|
"grad_norm": 4.550689697265625, |
|
"learning_rate": 5.806451612903226e-06, |
|
"loss": 0.4078, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01806640625, |
|
"grad_norm": 3.3017280101776123, |
|
"learning_rate": 5.967741935483872e-06, |
|
"loss": 0.4334, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0185546875, |
|
"grad_norm": 4.2097954750061035, |
|
"learning_rate": 6.129032258064517e-06, |
|
"loss": 0.342, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.01904296875, |
|
"grad_norm": 2.9576752185821533, |
|
"learning_rate": 6.290322580645162e-06, |
|
"loss": 0.3824, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.01953125, |
|
"grad_norm": 6.747947692871094, |
|
"learning_rate": 6.451612903225806e-06, |
|
"loss": 0.3952, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02001953125, |
|
"grad_norm": 2.851712942123413, |
|
"learning_rate": 6.612903225806452e-06, |
|
"loss": 0.4143, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0205078125, |
|
"grad_norm": 3.3788578510284424, |
|
"learning_rate": 6.774193548387097e-06, |
|
"loss": 0.3733, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02099609375, |
|
"grad_norm": 4.708284378051758, |
|
"learning_rate": 6.935483870967743e-06, |
|
"loss": 0.3955, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.021484375, |
|
"grad_norm": 3.0566701889038086, |
|
"learning_rate": 7.096774193548388e-06, |
|
"loss": 0.402, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02197265625, |
|
"grad_norm": 4.440851211547852, |
|
"learning_rate": 7.258064516129033e-06, |
|
"loss": 0.361, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0224609375, |
|
"grad_norm": 2.7747905254364014, |
|
"learning_rate": 7.4193548387096784e-06, |
|
"loss": 0.3896, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02294921875, |
|
"grad_norm": 3.510695695877075, |
|
"learning_rate": 7.580645161290323e-06, |
|
"loss": 0.364, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0234375, |
|
"grad_norm": 20.806020736694336, |
|
"learning_rate": 7.741935483870968e-06, |
|
"loss": 0.3849, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02392578125, |
|
"grad_norm": 3.569124698638916, |
|
"learning_rate": 7.903225806451613e-06, |
|
"loss": 0.3569, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0244140625, |
|
"grad_norm": 2.8412413597106934, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 0.362, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02490234375, |
|
"grad_norm": 3.287231683731079, |
|
"learning_rate": 8.225806451612904e-06, |
|
"loss": 0.3941, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.025390625, |
|
"grad_norm": 2.849888563156128, |
|
"learning_rate": 8.387096774193549e-06, |
|
"loss": 0.3906, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02587890625, |
|
"grad_norm": 6.925948619842529, |
|
"learning_rate": 8.548387096774194e-06, |
|
"loss": 0.3783, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0263671875, |
|
"grad_norm": 2.9347381591796875, |
|
"learning_rate": 8.70967741935484e-06, |
|
"loss": 0.4156, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.02685546875, |
|
"grad_norm": 3.695150375366211, |
|
"learning_rate": 8.870967741935484e-06, |
|
"loss": 0.3586, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.02734375, |
|
"grad_norm": 7.241847038269043, |
|
"learning_rate": 9.03225806451613e-06, |
|
"loss": 0.3693, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.02783203125, |
|
"grad_norm": 2.603956699371338, |
|
"learning_rate": 9.193548387096775e-06, |
|
"loss": 0.4109, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0283203125, |
|
"grad_norm": 3.0118958950042725, |
|
"learning_rate": 9.35483870967742e-06, |
|
"loss": 0.4096, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.02880859375, |
|
"grad_norm": 5.108702182769775, |
|
"learning_rate": 9.516129032258065e-06, |
|
"loss": 0.3786, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.029296875, |
|
"grad_norm": 3.0591766834259033, |
|
"learning_rate": 9.67741935483871e-06, |
|
"loss": 0.3979, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02978515625, |
|
"grad_norm": 3.5517218112945557, |
|
"learning_rate": 9.838709677419356e-06, |
|
"loss": 0.3847, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0302734375, |
|
"grad_norm": 3.091423988342285, |
|
"learning_rate": 1e-05, |
|
"loss": 0.35, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03076171875, |
|
"grad_norm": 2.7133779525756836, |
|
"learning_rate": 9.999993744224208e-06, |
|
"loss": 0.3592, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 2.4324684143066406, |
|
"learning_rate": 9.999974976912485e-06, |
|
"loss": 0.3616, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03173828125, |
|
"grad_norm": 2.872821807861328, |
|
"learning_rate": 9.999943698111792e-06, |
|
"loss": 0.3741, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0322265625, |
|
"grad_norm": 2.9383156299591064, |
|
"learning_rate": 9.999899907900399e-06, |
|
"loss": 0.3732, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03271484375, |
|
"grad_norm": 3.5359489917755127, |
|
"learning_rate": 9.999843606387883e-06, |
|
"loss": 0.4053, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.033203125, |
|
"grad_norm": 3.5608558654785156, |
|
"learning_rate": 9.999774793715126e-06, |
|
"loss": 0.4197, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03369140625, |
|
"grad_norm": 2.5407004356384277, |
|
"learning_rate": 9.999693470054321e-06, |
|
"loss": 0.354, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0341796875, |
|
"grad_norm": 3.4264254570007324, |
|
"learning_rate": 9.999599635608964e-06, |
|
"loss": 0.3936, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03466796875, |
|
"grad_norm": 3.0363235473632812, |
|
"learning_rate": 9.999493290613859e-06, |
|
"loss": 0.3753, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.03515625, |
|
"grad_norm": 2.2824833393096924, |
|
"learning_rate": 9.999374435335113e-06, |
|
"loss": 0.3813, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03564453125, |
|
"grad_norm": 2.445328712463379, |
|
"learning_rate": 9.999243070070137e-06, |
|
"loss": 0.4237, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0361328125, |
|
"grad_norm": 5.150700092315674, |
|
"learning_rate": 9.99909919514765e-06, |
|
"loss": 0.3892, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03662109375, |
|
"grad_norm": 4.1412272453308105, |
|
"learning_rate": 9.998942810927673e-06, |
|
"loss": 0.3675, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.037109375, |
|
"grad_norm": 5.456881999969482, |
|
"learning_rate": 9.998773917801526e-06, |
|
"loss": 0.3955, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.03759765625, |
|
"grad_norm": 2.2837321758270264, |
|
"learning_rate": 9.998592516191832e-06, |
|
"loss": 0.3477, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0380859375, |
|
"grad_norm": 2.237900972366333, |
|
"learning_rate": 9.998398606552513e-06, |
|
"loss": 0.3771, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03857421875, |
|
"grad_norm": 2.6276211738586426, |
|
"learning_rate": 9.998192189368795e-06, |
|
"loss": 0.3989, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0390625, |
|
"grad_norm": 3.5280210971832275, |
|
"learning_rate": 9.997973265157192e-06, |
|
"loss": 0.3726, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03955078125, |
|
"grad_norm": 8.555140495300293, |
|
"learning_rate": 9.997741834465526e-06, |
|
"loss": 0.397, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0400390625, |
|
"grad_norm": 2.1885085105895996, |
|
"learning_rate": 9.997497897872904e-06, |
|
"loss": 0.4058, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04052734375, |
|
"grad_norm": 3.0636098384857178, |
|
"learning_rate": 9.997241455989735e-06, |
|
"loss": 0.3866, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.041015625, |
|
"grad_norm": 3.7982375621795654, |
|
"learning_rate": 9.996972509457711e-06, |
|
"loss": 0.3877, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04150390625, |
|
"grad_norm": 2.4791505336761475, |
|
"learning_rate": 9.996691058949826e-06, |
|
"loss": 0.3789, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0419921875, |
|
"grad_norm": 3.917693614959717, |
|
"learning_rate": 9.996397105170353e-06, |
|
"loss": 0.3737, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04248046875, |
|
"grad_norm": 2.3083252906799316, |
|
"learning_rate": 9.996090648854856e-06, |
|
"loss": 0.3658, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.04296875, |
|
"grad_norm": 1.9872547388076782, |
|
"learning_rate": 9.995771690770184e-06, |
|
"loss": 0.3819, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04345703125, |
|
"grad_norm": 1.8703923225402832, |
|
"learning_rate": 9.995440231714469e-06, |
|
"loss": 0.37, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0439453125, |
|
"grad_norm": 2.7573578357696533, |
|
"learning_rate": 9.995096272517122e-06, |
|
"loss": 0.3876, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04443359375, |
|
"grad_norm": 2.177542209625244, |
|
"learning_rate": 9.99473981403884e-06, |
|
"loss": 0.434, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.044921875, |
|
"grad_norm": 1.9339114427566528, |
|
"learning_rate": 9.99437085717159e-06, |
|
"loss": 0.333, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04541015625, |
|
"grad_norm": 2.9820590019226074, |
|
"learning_rate": 9.993989402838618e-06, |
|
"loss": 0.3321, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0458984375, |
|
"grad_norm": 2.0244717597961426, |
|
"learning_rate": 9.99359545199444e-06, |
|
"loss": 0.3153, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.04638671875, |
|
"grad_norm": 2.0268101692199707, |
|
"learning_rate": 9.993189005624842e-06, |
|
"loss": 0.3663, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.046875, |
|
"grad_norm": 1.920785903930664, |
|
"learning_rate": 9.992770064746882e-06, |
|
"loss": 0.3419, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04736328125, |
|
"grad_norm": 3.2875781059265137, |
|
"learning_rate": 9.992338630408877e-06, |
|
"loss": 0.3406, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0478515625, |
|
"grad_norm": 3.7749016284942627, |
|
"learning_rate": 9.991894703690414e-06, |
|
"loss": 0.3555, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.04833984375, |
|
"grad_norm": 4.618077754974365, |
|
"learning_rate": 9.991438285702332e-06, |
|
"loss": 0.4001, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.048828125, |
|
"grad_norm": 2.468576192855835, |
|
"learning_rate": 9.99096937758673e-06, |
|
"loss": 0.4258, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04931640625, |
|
"grad_norm": 5.204842567443848, |
|
"learning_rate": 9.990487980516962e-06, |
|
"loss": 0.4107, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0498046875, |
|
"grad_norm": 3.3488011360168457, |
|
"learning_rate": 9.989994095697636e-06, |
|
"loss": 0.3658, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05029296875, |
|
"grad_norm": 4.41386079788208, |
|
"learning_rate": 9.989487724364602e-06, |
|
"loss": 0.3705, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.05078125, |
|
"grad_norm": 2.9542033672332764, |
|
"learning_rate": 9.988968867784958e-06, |
|
"loss": 0.3955, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05126953125, |
|
"grad_norm": 2.3820998668670654, |
|
"learning_rate": 9.988437527257044e-06, |
|
"loss": 0.3652, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0517578125, |
|
"grad_norm": 2.352477550506592, |
|
"learning_rate": 9.987893704110441e-06, |
|
"loss": 0.3545, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.05224609375, |
|
"grad_norm": 7.221553802490234, |
|
"learning_rate": 9.987337399705964e-06, |
|
"loss": 0.3616, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.052734375, |
|
"grad_norm": 2.3267176151275635, |
|
"learning_rate": 9.986768615435655e-06, |
|
"loss": 0.3868, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.05322265625, |
|
"grad_norm": 1.9337338209152222, |
|
"learning_rate": 9.986187352722792e-06, |
|
"loss": 0.3664, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0537109375, |
|
"grad_norm": 2.2121877670288086, |
|
"learning_rate": 9.985593613021873e-06, |
|
"loss": 0.3731, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05419921875, |
|
"grad_norm": 1.9584633111953735, |
|
"learning_rate": 9.98498739781862e-06, |
|
"loss": 0.3805, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.0546875, |
|
"grad_norm": 2.3601884841918945, |
|
"learning_rate": 9.984368708629972e-06, |
|
"loss": 0.3328, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.05517578125, |
|
"grad_norm": 2.705298662185669, |
|
"learning_rate": 9.98373754700408e-06, |
|
"loss": 0.3573, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0556640625, |
|
"grad_norm": 4.535929203033447, |
|
"learning_rate": 9.98309391452031e-06, |
|
"loss": 0.3853, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.05615234375, |
|
"grad_norm": 2.4388949871063232, |
|
"learning_rate": 9.982437812789224e-06, |
|
"loss": 0.3389, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.056640625, |
|
"grad_norm": 3.7873549461364746, |
|
"learning_rate": 9.981769243452595e-06, |
|
"loss": 0.3745, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.05712890625, |
|
"grad_norm": 2.1249921321868896, |
|
"learning_rate": 9.981088208183392e-06, |
|
"loss": 0.3854, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.0576171875, |
|
"grad_norm": 3.2426087856292725, |
|
"learning_rate": 9.980394708685777e-06, |
|
"loss": 0.3743, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.05810546875, |
|
"grad_norm": 2.349886178970337, |
|
"learning_rate": 9.979688746695099e-06, |
|
"loss": 0.3477, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.05859375, |
|
"grad_norm": 2.6616315841674805, |
|
"learning_rate": 9.978970323977895e-06, |
|
"loss": 0.3497, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05908203125, |
|
"grad_norm": 2.284364938735962, |
|
"learning_rate": 9.978239442331881e-06, |
|
"loss": 0.3987, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.0595703125, |
|
"grad_norm": 2.347794532775879, |
|
"learning_rate": 9.977496103585949e-06, |
|
"loss": 0.3375, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.06005859375, |
|
"grad_norm": 2.8935320377349854, |
|
"learning_rate": 9.976740309600166e-06, |
|
"loss": 0.3943, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.060546875, |
|
"grad_norm": 2.3763160705566406, |
|
"learning_rate": 9.97597206226576e-06, |
|
"loss": 0.3703, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.06103515625, |
|
"grad_norm": 2.1485118865966797, |
|
"learning_rate": 9.975191363505127e-06, |
|
"loss": 0.3604, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0615234375, |
|
"grad_norm": 4.019608020782471, |
|
"learning_rate": 9.974398215271814e-06, |
|
"loss": 0.3345, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.06201171875, |
|
"grad_norm": 4.793520450592041, |
|
"learning_rate": 9.973592619550528e-06, |
|
"loss": 0.3583, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 2.3743088245391846, |
|
"learning_rate": 9.972774578357118e-06, |
|
"loss": 0.3612, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.06298828125, |
|
"grad_norm": 2.3221397399902344, |
|
"learning_rate": 9.971944093738575e-06, |
|
"loss": 0.3759, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.0634765625, |
|
"grad_norm": 2.639760971069336, |
|
"learning_rate": 9.971101167773032e-06, |
|
"loss": 0.3749, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06396484375, |
|
"grad_norm": 2.3176326751708984, |
|
"learning_rate": 9.97024580256975e-06, |
|
"loss": 0.3324, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.064453125, |
|
"grad_norm": 2.5662341117858887, |
|
"learning_rate": 9.969378000269117e-06, |
|
"loss": 0.3956, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.06494140625, |
|
"grad_norm": 3.271336793899536, |
|
"learning_rate": 9.968497763042644e-06, |
|
"loss": 0.3702, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0654296875, |
|
"grad_norm": 2.0121848583221436, |
|
"learning_rate": 9.96760509309296e-06, |
|
"loss": 0.3644, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.06591796875, |
|
"grad_norm": 2.1467254161834717, |
|
"learning_rate": 9.9666999926538e-06, |
|
"loss": 0.3444, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06640625, |
|
"grad_norm": 2.985793113708496, |
|
"learning_rate": 9.96578246399001e-06, |
|
"loss": 0.4015, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.06689453125, |
|
"grad_norm": 2.158658504486084, |
|
"learning_rate": 9.964852509397527e-06, |
|
"loss": 0.3809, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.0673828125, |
|
"grad_norm": 4.1197919845581055, |
|
"learning_rate": 9.963910131203386e-06, |
|
"loss": 0.3874, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.06787109375, |
|
"grad_norm": 2.2979846000671387, |
|
"learning_rate": 9.962955331765712e-06, |
|
"loss": 0.342, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.068359375, |
|
"grad_norm": 2.2568418979644775, |
|
"learning_rate": 9.961988113473708e-06, |
|
"loss": 0.3223, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06884765625, |
|
"grad_norm": 2.358520030975342, |
|
"learning_rate": 9.961008478747655e-06, |
|
"loss": 0.374, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.0693359375, |
|
"grad_norm": 2.6409096717834473, |
|
"learning_rate": 9.960016430038903e-06, |
|
"loss": 0.3705, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.06982421875, |
|
"grad_norm": 2.167280673980713, |
|
"learning_rate": 9.959011969829867e-06, |
|
"loss": 0.3302, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.0703125, |
|
"grad_norm": 2.3867969512939453, |
|
"learning_rate": 9.957995100634016e-06, |
|
"loss": 0.3251, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.07080078125, |
|
"grad_norm": 2.305117130279541, |
|
"learning_rate": 9.956965824995873e-06, |
|
"loss": 0.3593, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0712890625, |
|
"grad_norm": 2.1817824840545654, |
|
"learning_rate": 9.955924145491005e-06, |
|
"loss": 0.3371, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.07177734375, |
|
"grad_norm": 4.12109375, |
|
"learning_rate": 9.954870064726017e-06, |
|
"loss": 0.3771, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.072265625, |
|
"grad_norm": 3.0079329013824463, |
|
"learning_rate": 9.953803585338548e-06, |
|
"loss": 0.3636, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.07275390625, |
|
"grad_norm": 2.473532199859619, |
|
"learning_rate": 9.95272470999726e-06, |
|
"loss": 0.3692, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.0732421875, |
|
"grad_norm": 3.1922385692596436, |
|
"learning_rate": 9.95163344140183e-06, |
|
"loss": 0.3773, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07373046875, |
|
"grad_norm": 6.991460800170898, |
|
"learning_rate": 9.950529782282955e-06, |
|
"loss": 0.2813, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.07421875, |
|
"grad_norm": 2.9967305660247803, |
|
"learning_rate": 9.949413735402332e-06, |
|
"loss": 0.3565, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.07470703125, |
|
"grad_norm": 1.8642289638519287, |
|
"learning_rate": 9.948285303552654e-06, |
|
"loss": 0.3715, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.0751953125, |
|
"grad_norm": 2.169416904449463, |
|
"learning_rate": 9.947144489557612e-06, |
|
"loss": 0.3507, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.07568359375, |
|
"grad_norm": 2.5897326469421387, |
|
"learning_rate": 9.945991296271874e-06, |
|
"loss": 0.3508, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.076171875, |
|
"grad_norm": 1.8967130184173584, |
|
"learning_rate": 9.944825726581085e-06, |
|
"loss": 0.318, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.07666015625, |
|
"grad_norm": 1.998544454574585, |
|
"learning_rate": 9.943647783401867e-06, |
|
"loss": 0.3757, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.0771484375, |
|
"grad_norm": 2.5188403129577637, |
|
"learning_rate": 9.942457469681794e-06, |
|
"loss": 0.3551, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.07763671875, |
|
"grad_norm": 2.2102835178375244, |
|
"learning_rate": 9.941254788399406e-06, |
|
"loss": 0.3499, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 3.3190438747406006, |
|
"learning_rate": 9.940039742564182e-06, |
|
"loss": 0.3586, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07861328125, |
|
"grad_norm": 6.675033092498779, |
|
"learning_rate": 9.938812335216543e-06, |
|
"loss": 0.3892, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.0791015625, |
|
"grad_norm": 3.091517925262451, |
|
"learning_rate": 9.937572569427844e-06, |
|
"loss": 0.3434, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.07958984375, |
|
"grad_norm": 2.7739408016204834, |
|
"learning_rate": 9.936320448300364e-06, |
|
"loss": 0.3366, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.080078125, |
|
"grad_norm": 4.218409538269043, |
|
"learning_rate": 9.935055974967299e-06, |
|
"loss": 0.3129, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.08056640625, |
|
"grad_norm": 2.2632052898406982, |
|
"learning_rate": 9.933779152592752e-06, |
|
"loss": 0.3507, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.0810546875, |
|
"grad_norm": 2.3607664108276367, |
|
"learning_rate": 9.93248998437173e-06, |
|
"loss": 0.3598, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.08154296875, |
|
"grad_norm": 2.2539124488830566, |
|
"learning_rate": 9.931188473530132e-06, |
|
"loss": 0.404, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.08203125, |
|
"grad_norm": 2.049994945526123, |
|
"learning_rate": 9.929874623324741e-06, |
|
"loss": 0.3534, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.08251953125, |
|
"grad_norm": 4.720448017120361, |
|
"learning_rate": 9.92854843704322e-06, |
|
"loss": 0.3492, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.0830078125, |
|
"grad_norm": 2.1875171661376953, |
|
"learning_rate": 9.927209918004095e-06, |
|
"loss": 0.3765, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08349609375, |
|
"grad_norm": 6.087578773498535, |
|
"learning_rate": 9.92585906955676e-06, |
|
"loss": 0.3519, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.083984375, |
|
"grad_norm": 6.033719539642334, |
|
"learning_rate": 9.924495895081455e-06, |
|
"loss": 0.3493, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08447265625, |
|
"grad_norm": 4.239842414855957, |
|
"learning_rate": 9.923120397989265e-06, |
|
"loss": 0.3566, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.0849609375, |
|
"grad_norm": 3.4344899654388428, |
|
"learning_rate": 9.92173258172211e-06, |
|
"loss": 0.3291, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.08544921875, |
|
"grad_norm": 2.5044116973876953, |
|
"learning_rate": 9.920332449752741e-06, |
|
"loss": 0.368, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.0859375, |
|
"grad_norm": 2.5513086318969727, |
|
"learning_rate": 9.91892000558472e-06, |
|
"loss": 0.3715, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.08642578125, |
|
"grad_norm": 3.1087024211883545, |
|
"learning_rate": 9.917495252752418e-06, |
|
"loss": 0.3421, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.0869140625, |
|
"grad_norm": 4.5129194259643555, |
|
"learning_rate": 9.916058194821013e-06, |
|
"loss": 0.3348, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.08740234375, |
|
"grad_norm": 2.54546856880188, |
|
"learning_rate": 9.914608835386468e-06, |
|
"loss": 0.3741, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.087890625, |
|
"grad_norm": 3.379059314727783, |
|
"learning_rate": 9.913147178075531e-06, |
|
"loss": 0.3633, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08837890625, |
|
"grad_norm": 2.6582908630371094, |
|
"learning_rate": 9.911673226545721e-06, |
|
"loss": 0.3626, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.0888671875, |
|
"grad_norm": 2.116603374481201, |
|
"learning_rate": 9.910186984485321e-06, |
|
"loss": 0.3627, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.08935546875, |
|
"grad_norm": 3.2947633266448975, |
|
"learning_rate": 9.908688455613374e-06, |
|
"loss": 0.3264, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.08984375, |
|
"grad_norm": 2.313702344894409, |
|
"learning_rate": 9.90717764367966e-06, |
|
"loss": 0.3285, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.09033203125, |
|
"grad_norm": 2.2801687717437744, |
|
"learning_rate": 9.9056545524647e-06, |
|
"loss": 0.3573, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.0908203125, |
|
"grad_norm": 3.657966375350952, |
|
"learning_rate": 9.904119185779744e-06, |
|
"loss": 0.3711, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.09130859375, |
|
"grad_norm": 22.30857276916504, |
|
"learning_rate": 9.902571547466753e-06, |
|
"loss": 0.3995, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.091796875, |
|
"grad_norm": 2.184039831161499, |
|
"learning_rate": 9.901011641398398e-06, |
|
"loss": 0.3654, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.09228515625, |
|
"grad_norm": 4.786393165588379, |
|
"learning_rate": 9.89943947147805e-06, |
|
"loss": 0.3859, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.0927734375, |
|
"grad_norm": 2.666750431060791, |
|
"learning_rate": 9.897855041639764e-06, |
|
"loss": 0.3888, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09326171875, |
|
"grad_norm": 2.0390570163726807, |
|
"learning_rate": 9.896258355848277e-06, |
|
"loss": 0.3488, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 2.618748188018799, |
|
"learning_rate": 9.894649418098992e-06, |
|
"loss": 0.3513, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.09423828125, |
|
"grad_norm": 2.525346040725708, |
|
"learning_rate": 9.89302823241797e-06, |
|
"loss": 0.3689, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.0947265625, |
|
"grad_norm": 2.0813663005828857, |
|
"learning_rate": 9.89139480286192e-06, |
|
"loss": 0.3718, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.09521484375, |
|
"grad_norm": 3.025359630584717, |
|
"learning_rate": 9.88974913351819e-06, |
|
"loss": 0.3786, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.095703125, |
|
"grad_norm": 2.8500590324401855, |
|
"learning_rate": 9.888091228504757e-06, |
|
"loss": 0.3481, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.09619140625, |
|
"grad_norm": 2.450500249862671, |
|
"learning_rate": 9.88642109197021e-06, |
|
"loss": 0.383, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.0966796875, |
|
"grad_norm": 1.9162877798080444, |
|
"learning_rate": 9.884738728093754e-06, |
|
"loss": 0.3698, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.09716796875, |
|
"grad_norm": 14.184158325195312, |
|
"learning_rate": 9.883044141085183e-06, |
|
"loss": 0.3327, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.09765625, |
|
"grad_norm": 3.0886130332946777, |
|
"learning_rate": 9.881337335184879e-06, |
|
"loss": 0.3767, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09814453125, |
|
"grad_norm": 2.5864577293395996, |
|
"learning_rate": 9.879618314663799e-06, |
|
"loss": 0.3498, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.0986328125, |
|
"grad_norm": 3.3661086559295654, |
|
"learning_rate": 9.87788708382347e-06, |
|
"loss": 0.3487, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.09912109375, |
|
"grad_norm": 2.543836832046509, |
|
"learning_rate": 9.876143646995964e-06, |
|
"loss": 0.3611, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.099609375, |
|
"grad_norm": 2.209348201751709, |
|
"learning_rate": 9.874388008543903e-06, |
|
"loss": 0.3303, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.10009765625, |
|
"grad_norm": 8.464391708374023, |
|
"learning_rate": 9.87262017286044e-06, |
|
"loss": 0.3915, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1005859375, |
|
"grad_norm": 2.339383125305176, |
|
"learning_rate": 9.870840144369247e-06, |
|
"loss": 0.3386, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.10107421875, |
|
"grad_norm": 4.952784538269043, |
|
"learning_rate": 9.869047927524508e-06, |
|
"loss": 0.3189, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.1015625, |
|
"grad_norm": 2.147639036178589, |
|
"learning_rate": 9.867243526810909e-06, |
|
"loss": 0.325, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.10205078125, |
|
"grad_norm": 2.364194393157959, |
|
"learning_rate": 9.865426946743614e-06, |
|
"loss": 0.3728, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1025390625, |
|
"grad_norm": 2.0875487327575684, |
|
"learning_rate": 9.863598191868275e-06, |
|
"loss": 0.3493, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.10302734375, |
|
"grad_norm": 3.100674629211426, |
|
"learning_rate": 9.861757266761002e-06, |
|
"loss": 0.3503, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.103515625, |
|
"grad_norm": 3.1530754566192627, |
|
"learning_rate": 9.859904176028364e-06, |
|
"loss": 0.3635, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.10400390625, |
|
"grad_norm": 2.373269557952881, |
|
"learning_rate": 9.858038924307363e-06, |
|
"loss": 0.316, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.1044921875, |
|
"grad_norm": 2.517578125, |
|
"learning_rate": 9.856161516265445e-06, |
|
"loss": 0.3729, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.10498046875, |
|
"grad_norm": 3.9366421699523926, |
|
"learning_rate": 9.854271956600463e-06, |
|
"loss": 0.3119, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.10546875, |
|
"grad_norm": 3.0418357849121094, |
|
"learning_rate": 9.852370250040682e-06, |
|
"loss": 0.3799, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.10595703125, |
|
"grad_norm": 2.486046314239502, |
|
"learning_rate": 9.85045640134476e-06, |
|
"loss": 0.3761, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1064453125, |
|
"grad_norm": 3.757772207260132, |
|
"learning_rate": 9.848530415301748e-06, |
|
"loss": 0.3281, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.10693359375, |
|
"grad_norm": 5.470198631286621, |
|
"learning_rate": 9.846592296731052e-06, |
|
"loss": 0.3626, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.107421875, |
|
"grad_norm": 2.6514899730682373, |
|
"learning_rate": 9.84464205048245e-06, |
|
"loss": 0.3312, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.10791015625, |
|
"grad_norm": 2.359720230102539, |
|
"learning_rate": 9.842679681436062e-06, |
|
"loss": 0.3332, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.1083984375, |
|
"grad_norm": 2.7306034564971924, |
|
"learning_rate": 9.840705194502349e-06, |
|
"loss": 0.3623, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.10888671875, |
|
"grad_norm": 2.2408559322357178, |
|
"learning_rate": 9.838718594622083e-06, |
|
"loss": 0.3579, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.109375, |
|
"grad_norm": 1.9728875160217285, |
|
"learning_rate": 9.836719886766357e-06, |
|
"loss": 0.3411, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.10986328125, |
|
"grad_norm": 2.826547861099243, |
|
"learning_rate": 9.83470907593656e-06, |
|
"loss": 0.2803, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1103515625, |
|
"grad_norm": 2.5550942420959473, |
|
"learning_rate": 9.832686167164361e-06, |
|
"loss": 0.3537, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.11083984375, |
|
"grad_norm": 2.6079165935516357, |
|
"learning_rate": 9.830651165511707e-06, |
|
"loss": 0.3527, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.111328125, |
|
"grad_norm": 2.2585561275482178, |
|
"learning_rate": 9.828604076070805e-06, |
|
"loss": 0.3741, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.11181640625, |
|
"grad_norm": 2.335930585861206, |
|
"learning_rate": 9.826544903964105e-06, |
|
"loss": 0.34, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.1123046875, |
|
"grad_norm": 2.3235063552856445, |
|
"learning_rate": 9.824473654344297e-06, |
|
"loss": 0.3691, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11279296875, |
|
"grad_norm": 3.584376811981201, |
|
"learning_rate": 9.82239033239429e-06, |
|
"loss": 0.3548, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.11328125, |
|
"grad_norm": 3.483834743499756, |
|
"learning_rate": 9.820294943327202e-06, |
|
"loss": 0.3905, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.11376953125, |
|
"grad_norm": 2.4160964488983154, |
|
"learning_rate": 9.818187492386346e-06, |
|
"loss": 0.3723, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.1142578125, |
|
"grad_norm": 2.206505298614502, |
|
"learning_rate": 9.816067984845218e-06, |
|
"loss": 0.3572, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.11474609375, |
|
"grad_norm": 2.8877620697021484, |
|
"learning_rate": 9.813936426007487e-06, |
|
"loss": 0.3486, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.115234375, |
|
"grad_norm": 2.2150516510009766, |
|
"learning_rate": 9.81179282120697e-06, |
|
"loss": 0.3431, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.11572265625, |
|
"grad_norm": 4.500147819519043, |
|
"learning_rate": 9.809637175807634e-06, |
|
"loss": 0.3465, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1162109375, |
|
"grad_norm": 2.428119659423828, |
|
"learning_rate": 9.80746949520357e-06, |
|
"loss": 0.3193, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.11669921875, |
|
"grad_norm": 4.387357711791992, |
|
"learning_rate": 9.805289784818991e-06, |
|
"loss": 0.3789, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.1171875, |
|
"grad_norm": 2.6022865772247314, |
|
"learning_rate": 9.803098050108206e-06, |
|
"loss": 0.3744, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11767578125, |
|
"grad_norm": 2.3189945220947266, |
|
"learning_rate": 9.800894296555618e-06, |
|
"loss": 0.3542, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.1181640625, |
|
"grad_norm": 2.428673505783081, |
|
"learning_rate": 9.798678529675702e-06, |
|
"loss": 0.354, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.11865234375, |
|
"grad_norm": 2.112927198410034, |
|
"learning_rate": 9.796450755012992e-06, |
|
"loss": 0.3541, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.119140625, |
|
"grad_norm": 3.9023051261901855, |
|
"learning_rate": 9.794210978142073e-06, |
|
"loss": 0.3902, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.11962890625, |
|
"grad_norm": 2.621843099594116, |
|
"learning_rate": 9.79195920466756e-06, |
|
"loss": 0.35, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1201171875, |
|
"grad_norm": 2.8156723976135254, |
|
"learning_rate": 9.789695440224094e-06, |
|
"loss": 0.3562, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.12060546875, |
|
"grad_norm": 4.237185001373291, |
|
"learning_rate": 9.78741969047631e-06, |
|
"loss": 0.3596, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.12109375, |
|
"grad_norm": 2.050010919570923, |
|
"learning_rate": 9.785131961118843e-06, |
|
"loss": 0.3562, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.12158203125, |
|
"grad_norm": 2.1943752765655518, |
|
"learning_rate": 9.782832257876302e-06, |
|
"loss": 0.3147, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.1220703125, |
|
"grad_norm": 3.3409993648529053, |
|
"learning_rate": 9.780520586503258e-06, |
|
"loss": 0.4023, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12255859375, |
|
"grad_norm": 2.073791027069092, |
|
"learning_rate": 9.77819695278423e-06, |
|
"loss": 0.3323, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.123046875, |
|
"grad_norm": 2.773463010787964, |
|
"learning_rate": 9.77586136253367e-06, |
|
"loss": 0.3461, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.12353515625, |
|
"grad_norm": 2.2921154499053955, |
|
"learning_rate": 9.773513821595951e-06, |
|
"loss": 0.3344, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.1240234375, |
|
"grad_norm": 2.6613571643829346, |
|
"learning_rate": 9.771154335845345e-06, |
|
"loss": 0.348, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.12451171875, |
|
"grad_norm": 8.336869239807129, |
|
"learning_rate": 9.768782911186023e-06, |
|
"loss": 0.3726, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 2.428882360458374, |
|
"learning_rate": 9.766399553552022e-06, |
|
"loss": 0.3765, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.12548828125, |
|
"grad_norm": 1.8940154314041138, |
|
"learning_rate": 9.764004268907244e-06, |
|
"loss": 0.3407, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.1259765625, |
|
"grad_norm": 2.5715792179107666, |
|
"learning_rate": 9.761597063245434e-06, |
|
"loss": 0.3679, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.12646484375, |
|
"grad_norm": 2.1206367015838623, |
|
"learning_rate": 9.759177942590166e-06, |
|
"loss": 0.3409, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.126953125, |
|
"grad_norm": 2.5495412349700928, |
|
"learning_rate": 9.756746912994832e-06, |
|
"loss": 0.3499, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.12744140625, |
|
"grad_norm": 2.9602348804473877, |
|
"learning_rate": 9.754303980542623e-06, |
|
"loss": 0.3706, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.1279296875, |
|
"grad_norm": 2.7507028579711914, |
|
"learning_rate": 9.751849151346513e-06, |
|
"loss": 0.3767, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.12841796875, |
|
"grad_norm": 2.539034843444824, |
|
"learning_rate": 9.749382431549247e-06, |
|
"loss": 0.3406, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.12890625, |
|
"grad_norm": 2.833279848098755, |
|
"learning_rate": 9.746903827323324e-06, |
|
"loss": 0.3522, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.12939453125, |
|
"grad_norm": 2.5430469512939453, |
|
"learning_rate": 9.74441334487098e-06, |
|
"loss": 0.3406, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.1298828125, |
|
"grad_norm": 2.858895778656006, |
|
"learning_rate": 9.741910990424173e-06, |
|
"loss": 0.3396, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.13037109375, |
|
"grad_norm": 3.113898515701294, |
|
"learning_rate": 9.739396770244575e-06, |
|
"loss": 0.3779, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.130859375, |
|
"grad_norm": 2.812479257583618, |
|
"learning_rate": 9.736870690623541e-06, |
|
"loss": 0.3581, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.13134765625, |
|
"grad_norm": 4.137664318084717, |
|
"learning_rate": 9.734332757882108e-06, |
|
"loss": 0.3731, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.1318359375, |
|
"grad_norm": 2.346695899963379, |
|
"learning_rate": 9.73178297837097e-06, |
|
"loss": 0.3499, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.13232421875, |
|
"grad_norm": 3.5724024772644043, |
|
"learning_rate": 9.729221358470468e-06, |
|
"loss": 0.346, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.1328125, |
|
"grad_norm": 2.5001883506774902, |
|
"learning_rate": 9.726647904590572e-06, |
|
"loss": 0.3371, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.13330078125, |
|
"grad_norm": 1.8020128011703491, |
|
"learning_rate": 9.724062623170855e-06, |
|
"loss": 0.3632, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.1337890625, |
|
"grad_norm": 2.486666679382324, |
|
"learning_rate": 9.721465520680501e-06, |
|
"loss": 0.3505, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.13427734375, |
|
"grad_norm": 2.269751787185669, |
|
"learning_rate": 9.718856603618263e-06, |
|
"loss": 0.3718, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.134765625, |
|
"grad_norm": 2.7286322116851807, |
|
"learning_rate": 9.716235878512462e-06, |
|
"loss": 0.3462, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.13525390625, |
|
"grad_norm": 2.535698175430298, |
|
"learning_rate": 9.713603351920964e-06, |
|
"loss": 0.3451, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.1357421875, |
|
"grad_norm": 1.9008198976516724, |
|
"learning_rate": 9.710959030431167e-06, |
|
"loss": 0.3924, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.13623046875, |
|
"grad_norm": 2.339395046234131, |
|
"learning_rate": 9.708302920659987e-06, |
|
"loss": 0.3331, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.13671875, |
|
"grad_norm": 2.376002550125122, |
|
"learning_rate": 9.705635029253833e-06, |
|
"loss": 0.3815, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13720703125, |
|
"grad_norm": 2.245027780532837, |
|
"learning_rate": 9.702955362888595e-06, |
|
"loss": 0.3548, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.1376953125, |
|
"grad_norm": 2.206878900527954, |
|
"learning_rate": 9.700263928269636e-06, |
|
"loss": 0.3204, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.13818359375, |
|
"grad_norm": 2.0215516090393066, |
|
"learning_rate": 9.697560732131753e-06, |
|
"loss": 0.3387, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.138671875, |
|
"grad_norm": 2.9142580032348633, |
|
"learning_rate": 9.694845781239188e-06, |
|
"loss": 0.3336, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.13916015625, |
|
"grad_norm": 2.0387048721313477, |
|
"learning_rate": 9.692119082385588e-06, |
|
"loss": 0.3342, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.1396484375, |
|
"grad_norm": 2.3236615657806396, |
|
"learning_rate": 9.689380642393998e-06, |
|
"loss": 0.3773, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.14013671875, |
|
"grad_norm": 3.4590189456939697, |
|
"learning_rate": 9.686630468116846e-06, |
|
"loss": 0.3358, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.140625, |
|
"grad_norm": 1.6319761276245117, |
|
"learning_rate": 9.683868566435922e-06, |
|
"loss": 0.2913, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.14111328125, |
|
"grad_norm": 6.874841690063477, |
|
"learning_rate": 9.681094944262361e-06, |
|
"loss": 0.3259, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.1416015625, |
|
"grad_norm": 4.962515830993652, |
|
"learning_rate": 9.678309608536626e-06, |
|
"loss": 0.3455, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14208984375, |
|
"grad_norm": 3.334455966949463, |
|
"learning_rate": 9.675512566228493e-06, |
|
"loss": 0.3561, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.142578125, |
|
"grad_norm": 3.891530990600586, |
|
"learning_rate": 9.672703824337026e-06, |
|
"loss": 0.3627, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.14306640625, |
|
"grad_norm": 2.2160141468048096, |
|
"learning_rate": 9.669883389890572e-06, |
|
"loss": 0.312, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.1435546875, |
|
"grad_norm": 3.7108445167541504, |
|
"learning_rate": 9.667051269946734e-06, |
|
"loss": 0.338, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.14404296875, |
|
"grad_norm": 2.138221025466919, |
|
"learning_rate": 9.664207471592353e-06, |
|
"loss": 0.3767, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.14453125, |
|
"grad_norm": 11.57601547241211, |
|
"learning_rate": 9.661352001943494e-06, |
|
"loss": 0.3481, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.14501953125, |
|
"grad_norm": 2.1737406253814697, |
|
"learning_rate": 9.658484868145428e-06, |
|
"loss": 0.3319, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.1455078125, |
|
"grad_norm": 4.048387050628662, |
|
"learning_rate": 9.655606077372619e-06, |
|
"loss": 0.3061, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.14599609375, |
|
"grad_norm": 2.4968268871307373, |
|
"learning_rate": 9.652715636828687e-06, |
|
"loss": 0.333, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.146484375, |
|
"grad_norm": 2.2704763412475586, |
|
"learning_rate": 9.649813553746416e-06, |
|
"loss": 0.3307, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14697265625, |
|
"grad_norm": 1.9303852319717407, |
|
"learning_rate": 9.646899835387718e-06, |
|
"loss": 0.3342, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.1474609375, |
|
"grad_norm": 2.8917553424835205, |
|
"learning_rate": 9.64397448904362e-06, |
|
"loss": 0.3595, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.14794921875, |
|
"grad_norm": 2.193105697631836, |
|
"learning_rate": 9.641037522034246e-06, |
|
"loss": 0.3675, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.1484375, |
|
"grad_norm": 1.9201539754867554, |
|
"learning_rate": 9.638088941708799e-06, |
|
"loss": 0.353, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.14892578125, |
|
"grad_norm": 2.513864517211914, |
|
"learning_rate": 9.635128755445542e-06, |
|
"loss": 0.3669, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.1494140625, |
|
"grad_norm": 2.397608518600464, |
|
"learning_rate": 9.63215697065178e-06, |
|
"loss": 0.3439, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.14990234375, |
|
"grad_norm": 2.335594654083252, |
|
"learning_rate": 9.62917359476384e-06, |
|
"loss": 0.3558, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.150390625, |
|
"grad_norm": 2.5134353637695312, |
|
"learning_rate": 9.626178635247054e-06, |
|
"loss": 0.3923, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.15087890625, |
|
"grad_norm": 2.9013524055480957, |
|
"learning_rate": 9.623172099595743e-06, |
|
"loss": 0.3748, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.1513671875, |
|
"grad_norm": 3.2646868228912354, |
|
"learning_rate": 9.620153995333188e-06, |
|
"loss": 0.3268, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.15185546875, |
|
"grad_norm": 2.843632459640503, |
|
"learning_rate": 9.617124330011624e-06, |
|
"loss": 0.3392, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.15234375, |
|
"grad_norm": 2.5182275772094727, |
|
"learning_rate": 9.614083111212216e-06, |
|
"loss": 0.3849, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.15283203125, |
|
"grad_norm": 2.9543368816375732, |
|
"learning_rate": 9.611030346545035e-06, |
|
"loss": 0.3784, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.1533203125, |
|
"grad_norm": 3.7902252674102783, |
|
"learning_rate": 9.607966043649047e-06, |
|
"loss": 0.3466, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.15380859375, |
|
"grad_norm": 2.4927687644958496, |
|
"learning_rate": 9.604890210192084e-06, |
|
"loss": 0.3638, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.154296875, |
|
"grad_norm": 4.722542762756348, |
|
"learning_rate": 9.601802853870843e-06, |
|
"loss": 0.3439, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.15478515625, |
|
"grad_norm": 2.0797646045684814, |
|
"learning_rate": 9.598703982410842e-06, |
|
"loss": 0.373, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.1552734375, |
|
"grad_norm": 2.1771399974823, |
|
"learning_rate": 9.595593603566423e-06, |
|
"loss": 0.3112, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.15576171875, |
|
"grad_norm": 2.621591091156006, |
|
"learning_rate": 9.592471725120714e-06, |
|
"loss": 0.3384, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 4.34113883972168, |
|
"learning_rate": 9.58933835488563e-06, |
|
"loss": 0.3488, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.15673828125, |
|
"grad_norm": 3.58477783203125, |
|
"learning_rate": 9.58619350070183e-06, |
|
"loss": 0.3329, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.1572265625, |
|
"grad_norm": 2.657738208770752, |
|
"learning_rate": 9.583037170438719e-06, |
|
"loss": 0.3371, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.15771484375, |
|
"grad_norm": 2.3004322052001953, |
|
"learning_rate": 9.579869371994412e-06, |
|
"loss": 0.3658, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.158203125, |
|
"grad_norm": 3.4922330379486084, |
|
"learning_rate": 9.576690113295726e-06, |
|
"loss": 0.3713, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.15869140625, |
|
"grad_norm": 4.173436641693115, |
|
"learning_rate": 9.573499402298152e-06, |
|
"loss": 0.3349, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1591796875, |
|
"grad_norm": 12.521305084228516, |
|
"learning_rate": 9.570297246985838e-06, |
|
"loss": 0.3411, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.15966796875, |
|
"grad_norm": 3.122694253921509, |
|
"learning_rate": 9.567083655371572e-06, |
|
"loss": 0.3644, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.16015625, |
|
"grad_norm": 1.6851651668548584, |
|
"learning_rate": 9.563858635496755e-06, |
|
"loss": 0.3567, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.16064453125, |
|
"grad_norm": 2.407923698425293, |
|
"learning_rate": 9.56062219543139e-06, |
|
"loss": 0.3298, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.1611328125, |
|
"grad_norm": 1.9536917209625244, |
|
"learning_rate": 9.557374343274056e-06, |
|
"loss": 0.352, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.16162109375, |
|
"grad_norm": 2.042382001876831, |
|
"learning_rate": 9.55411508715188e-06, |
|
"loss": 0.3249, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.162109375, |
|
"grad_norm": 1.9811147451400757, |
|
"learning_rate": 9.55084443522054e-06, |
|
"loss": 0.3341, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.16259765625, |
|
"grad_norm": 2.6401963233947754, |
|
"learning_rate": 9.547562395664219e-06, |
|
"loss": 0.3296, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.1630859375, |
|
"grad_norm": 2.3292157649993896, |
|
"learning_rate": 9.544268976695596e-06, |
|
"loss": 0.3446, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.16357421875, |
|
"grad_norm": 3.5120034217834473, |
|
"learning_rate": 9.54096418655583e-06, |
|
"loss": 0.3796, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.1640625, |
|
"grad_norm": 2.3993301391601562, |
|
"learning_rate": 9.53764803351453e-06, |
|
"loss": 0.3544, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.16455078125, |
|
"grad_norm": 2.403285264968872, |
|
"learning_rate": 9.534320525869742e-06, |
|
"loss": 0.3734, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.1650390625, |
|
"grad_norm": 1.878564476966858, |
|
"learning_rate": 9.530981671947924e-06, |
|
"loss": 0.3334, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.16552734375, |
|
"grad_norm": 3.3280200958251953, |
|
"learning_rate": 9.527631480103919e-06, |
|
"loss": 0.3282, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.166015625, |
|
"grad_norm": 2.304945230484009, |
|
"learning_rate": 9.524269958720951e-06, |
|
"loss": 0.3422, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.16650390625, |
|
"grad_norm": 2.0590991973876953, |
|
"learning_rate": 9.520897116210588e-06, |
|
"loss": 0.355, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.1669921875, |
|
"grad_norm": 1.660049557685852, |
|
"learning_rate": 9.517512961012729e-06, |
|
"loss": 0.3499, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.16748046875, |
|
"grad_norm": 1.8652247190475464, |
|
"learning_rate": 9.514117501595582e-06, |
|
"loss": 0.3594, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.16796875, |
|
"grad_norm": 1.7373839616775513, |
|
"learning_rate": 9.510710746455636e-06, |
|
"loss": 0.3447, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.16845703125, |
|
"grad_norm": 2.8204782009124756, |
|
"learning_rate": 9.507292704117655e-06, |
|
"loss": 0.362, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.1689453125, |
|
"grad_norm": 1.6446189880371094, |
|
"learning_rate": 9.503863383134636e-06, |
|
"loss": 0.3752, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.16943359375, |
|
"grad_norm": 3.4714109897613525, |
|
"learning_rate": 9.500422792087809e-06, |
|
"loss": 0.3358, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.169921875, |
|
"grad_norm": 2.125108003616333, |
|
"learning_rate": 9.496970939586598e-06, |
|
"loss": 0.3822, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.17041015625, |
|
"grad_norm": 2.7372467517852783, |
|
"learning_rate": 9.493507834268609e-06, |
|
"loss": 0.3513, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.1708984375, |
|
"grad_norm": 2.562140941619873, |
|
"learning_rate": 9.490033484799608e-06, |
|
"loss": 0.3727, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17138671875, |
|
"grad_norm": 2.868966817855835, |
|
"learning_rate": 9.486547899873495e-06, |
|
"loss": 0.3309, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.171875, |
|
"grad_norm": 2.5418648719787598, |
|
"learning_rate": 9.483051088212283e-06, |
|
"loss": 0.3826, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.17236328125, |
|
"grad_norm": 1.7842854261398315, |
|
"learning_rate": 9.479543058566081e-06, |
|
"loss": 0.3404, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.1728515625, |
|
"grad_norm": 1.8991374969482422, |
|
"learning_rate": 9.47602381971307e-06, |
|
"loss": 0.3946, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.17333984375, |
|
"grad_norm": 1.9261831045150757, |
|
"learning_rate": 9.472493380459474e-06, |
|
"loss": 0.3579, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.173828125, |
|
"grad_norm": 1.6657100915908813, |
|
"learning_rate": 9.468951749639552e-06, |
|
"loss": 0.3405, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.17431640625, |
|
"grad_norm": 2.1538491249084473, |
|
"learning_rate": 9.465398936115557e-06, |
|
"loss": 0.3657, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.1748046875, |
|
"grad_norm": 1.8424322605133057, |
|
"learning_rate": 9.461834948777738e-06, |
|
"loss": 0.3685, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.17529296875, |
|
"grad_norm": 3.16018009185791, |
|
"learning_rate": 9.458259796544293e-06, |
|
"loss": 0.3225, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.17578125, |
|
"grad_norm": 1.7529760599136353, |
|
"learning_rate": 9.454673488361363e-06, |
|
"loss": 0.3428, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.17626953125, |
|
"grad_norm": 1.6713848114013672, |
|
"learning_rate": 9.451076033203003e-06, |
|
"loss": 0.3383, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.1767578125, |
|
"grad_norm": 2.688614845275879, |
|
"learning_rate": 9.447467440071165e-06, |
|
"loss": 0.3553, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.17724609375, |
|
"grad_norm": 2.0093319416046143, |
|
"learning_rate": 9.443847717995666e-06, |
|
"loss": 0.3689, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.177734375, |
|
"grad_norm": 5.026141166687012, |
|
"learning_rate": 9.440216876034177e-06, |
|
"loss": 0.3072, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.17822265625, |
|
"grad_norm": 2.687075138092041, |
|
"learning_rate": 9.436574923272188e-06, |
|
"loss": 0.3624, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.1787109375, |
|
"grad_norm": 1.9798976182937622, |
|
"learning_rate": 9.432921868822997e-06, |
|
"loss": 0.3355, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.17919921875, |
|
"grad_norm": 2.060910701751709, |
|
"learning_rate": 9.42925772182768e-06, |
|
"loss": 0.3435, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.1796875, |
|
"grad_norm": 1.7003917694091797, |
|
"learning_rate": 9.425582491455068e-06, |
|
"loss": 0.3659, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.18017578125, |
|
"grad_norm": 2.026036262512207, |
|
"learning_rate": 9.421896186901729e-06, |
|
"loss": 0.3523, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.1806640625, |
|
"grad_norm": 1.9931825399398804, |
|
"learning_rate": 9.418198817391941e-06, |
|
"loss": 0.3654, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.18115234375, |
|
"grad_norm": 2.7290432453155518, |
|
"learning_rate": 9.41449039217767e-06, |
|
"loss": 0.3599, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.181640625, |
|
"grad_norm": 1.5444127321243286, |
|
"learning_rate": 9.410770920538545e-06, |
|
"loss": 0.2991, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.18212890625, |
|
"grad_norm": 2.319566011428833, |
|
"learning_rate": 9.407040411781843e-06, |
|
"loss": 0.3724, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.1826171875, |
|
"grad_norm": 1.9856535196304321, |
|
"learning_rate": 9.403298875242448e-06, |
|
"loss": 0.348, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.18310546875, |
|
"grad_norm": 1.9270925521850586, |
|
"learning_rate": 9.39954632028285e-06, |
|
"loss": 0.3766, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.18359375, |
|
"grad_norm": 2.2769391536712646, |
|
"learning_rate": 9.395782756293104e-06, |
|
"loss": 0.3563, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.18408203125, |
|
"grad_norm": 2.2026526927948, |
|
"learning_rate": 9.392008192690816e-06, |
|
"loss": 0.3213, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.1845703125, |
|
"grad_norm": 2.3757741451263428, |
|
"learning_rate": 9.388222638921116e-06, |
|
"loss": 0.3595, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.18505859375, |
|
"grad_norm": 1.9485424757003784, |
|
"learning_rate": 9.384426104456632e-06, |
|
"loss": 0.3561, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.185546875, |
|
"grad_norm": 2.7337324619293213, |
|
"learning_rate": 9.380618598797473e-06, |
|
"loss": 0.38, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.18603515625, |
|
"grad_norm": 2.1130242347717285, |
|
"learning_rate": 9.3768001314712e-06, |
|
"loss": 0.3533, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.1865234375, |
|
"grad_norm": 1.831874966621399, |
|
"learning_rate": 9.372970712032803e-06, |
|
"loss": 0.332, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.18701171875, |
|
"grad_norm": 2.3811991214752197, |
|
"learning_rate": 9.369130350064677e-06, |
|
"loss": 0.3798, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 1.8242988586425781, |
|
"learning_rate": 9.3652790551766e-06, |
|
"loss": 0.3634, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.18798828125, |
|
"grad_norm": 3.14345645904541, |
|
"learning_rate": 9.361416837005705e-06, |
|
"loss": 0.3513, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.1884765625, |
|
"grad_norm": 1.9473716020584106, |
|
"learning_rate": 9.357543705216465e-06, |
|
"loss": 0.3687, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.18896484375, |
|
"grad_norm": 1.982612133026123, |
|
"learning_rate": 9.353659669500652e-06, |
|
"loss": 0.3803, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.189453125, |
|
"grad_norm": 1.774999976158142, |
|
"learning_rate": 9.349764739577334e-06, |
|
"loss": 0.3331, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.18994140625, |
|
"grad_norm": 1.5273141860961914, |
|
"learning_rate": 9.34585892519283e-06, |
|
"loss": 0.3599, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.1904296875, |
|
"grad_norm": 1.8035123348236084, |
|
"learning_rate": 9.3419422361207e-06, |
|
"loss": 0.3771, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.19091796875, |
|
"grad_norm": 1.789610505104065, |
|
"learning_rate": 9.338014682161719e-06, |
|
"loss": 0.3236, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.19140625, |
|
"grad_norm": 1.9845644235610962, |
|
"learning_rate": 9.334076273143843e-06, |
|
"loss": 0.3274, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.19189453125, |
|
"grad_norm": 2.072159767150879, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 0.3416, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.1923828125, |
|
"grad_norm": 1.8441466093063354, |
|
"learning_rate": 9.326166929379032e-06, |
|
"loss": 0.3352, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.19287109375, |
|
"grad_norm": 2.479971170425415, |
|
"learning_rate": 9.322196014423729e-06, |
|
"loss": 0.3472, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.193359375, |
|
"grad_norm": 2.514597177505493, |
|
"learning_rate": 9.318214283992747e-06, |
|
"loss": 0.3544, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.19384765625, |
|
"grad_norm": 2.048144578933716, |
|
"learning_rate": 9.314221748049613e-06, |
|
"loss": 0.3869, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.1943359375, |
|
"grad_norm": 2.8453140258789062, |
|
"learning_rate": 9.310218416584887e-06, |
|
"loss": 0.3734, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.19482421875, |
|
"grad_norm": 1.6406381130218506, |
|
"learning_rate": 9.306204299616148e-06, |
|
"loss": 0.3507, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"grad_norm": 2.275040626525879, |
|
"learning_rate": 9.302179407187965e-06, |
|
"loss": 0.3787, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19580078125, |
|
"grad_norm": 1.522905945777893, |
|
"learning_rate": 9.298143749371865e-06, |
|
"loss": 0.341, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.1962890625, |
|
"grad_norm": 2.3068466186523438, |
|
"learning_rate": 9.294097336266317e-06, |
|
"loss": 0.3686, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.19677734375, |
|
"grad_norm": 2.8621833324432373, |
|
"learning_rate": 9.290040177996703e-06, |
|
"loss": 0.3331, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.197265625, |
|
"grad_norm": 2.339892864227295, |
|
"learning_rate": 9.285972284715291e-06, |
|
"loss": 0.3889, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.19775390625, |
|
"grad_norm": 1.7295536994934082, |
|
"learning_rate": 9.281893666601214e-06, |
|
"loss": 0.3692, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.1982421875, |
|
"grad_norm": 4.145984649658203, |
|
"learning_rate": 9.277804333860435e-06, |
|
"loss": 0.3387, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.19873046875, |
|
"grad_norm": 1.866166114807129, |
|
"learning_rate": 9.273704296725741e-06, |
|
"loss": 0.3503, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.19921875, |
|
"grad_norm": 1.8600391149520874, |
|
"learning_rate": 9.269593565456691e-06, |
|
"loss": 0.347, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.19970703125, |
|
"grad_norm": 1.990860104560852, |
|
"learning_rate": 9.265472150339615e-06, |
|
"loss": 0.3642, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.2001953125, |
|
"grad_norm": 1.4612618684768677, |
|
"learning_rate": 9.26134006168757e-06, |
|
"loss": 0.3624, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.20068359375, |
|
"grad_norm": 1.4518144130706787, |
|
"learning_rate": 9.257197309840322e-06, |
|
"loss": 0.3374, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.201171875, |
|
"grad_norm": 1.5550000667572021, |
|
"learning_rate": 9.253043905164327e-06, |
|
"loss": 0.3651, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.20166015625, |
|
"grad_norm": 1.9353028535842896, |
|
"learning_rate": 9.248879858052688e-06, |
|
"loss": 0.3111, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.2021484375, |
|
"grad_norm": 1.5865511894226074, |
|
"learning_rate": 9.244705178925146e-06, |
|
"loss": 0.3734, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.20263671875, |
|
"grad_norm": 1.9505976438522339, |
|
"learning_rate": 9.24051987822804e-06, |
|
"loss": 0.3294, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.203125, |
|
"grad_norm": 1.7402981519699097, |
|
"learning_rate": 9.236323966434296e-06, |
|
"loss": 0.3664, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.20361328125, |
|
"grad_norm": 2.2276546955108643, |
|
"learning_rate": 9.232117454043383e-06, |
|
"loss": 0.3943, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.2041015625, |
|
"grad_norm": 2.5883917808532715, |
|
"learning_rate": 9.227900351581303e-06, |
|
"loss": 0.3759, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.20458984375, |
|
"grad_norm": 2.116527795791626, |
|
"learning_rate": 9.223672669600552e-06, |
|
"loss": 0.371, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.205078125, |
|
"grad_norm": 1.890336036682129, |
|
"learning_rate": 9.219434418680107e-06, |
|
"loss": 0.3208, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.20556640625, |
|
"grad_norm": 2.831151247024536, |
|
"learning_rate": 9.215185609425383e-06, |
|
"loss": 0.3283, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.2060546875, |
|
"grad_norm": 1.890857458114624, |
|
"learning_rate": 9.21092625246822e-06, |
|
"loss": 0.3634, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.20654296875, |
|
"grad_norm": 1.4543401002883911, |
|
"learning_rate": 9.206656358466851e-06, |
|
"loss": 0.3615, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.20703125, |
|
"grad_norm": 1.9577465057373047, |
|
"learning_rate": 9.202375938105876e-06, |
|
"loss": 0.364, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.20751953125, |
|
"grad_norm": 1.5794016122817993, |
|
"learning_rate": 9.198085002096237e-06, |
|
"loss": 0.34, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2080078125, |
|
"grad_norm": 1.8114027976989746, |
|
"learning_rate": 9.193783561175184e-06, |
|
"loss": 0.3413, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.20849609375, |
|
"grad_norm": 1.5112391710281372, |
|
"learning_rate": 9.189471626106261e-06, |
|
"loss": 0.3558, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.208984375, |
|
"grad_norm": 1.5750012397766113, |
|
"learning_rate": 9.185149207679263e-06, |
|
"loss": 0.3211, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.20947265625, |
|
"grad_norm": 1.5355925559997559, |
|
"learning_rate": 9.180816316710226e-06, |
|
"loss": 0.316, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.2099609375, |
|
"grad_norm": 1.7540535926818848, |
|
"learning_rate": 9.176472964041385e-06, |
|
"loss": 0.3446, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.21044921875, |
|
"grad_norm": 1.94683837890625, |
|
"learning_rate": 9.172119160541158e-06, |
|
"loss": 0.3894, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.2109375, |
|
"grad_norm": 2.1505014896392822, |
|
"learning_rate": 9.167754917104112e-06, |
|
"loss": 0.3516, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.21142578125, |
|
"grad_norm": 3.6382253170013428, |
|
"learning_rate": 9.163380244650938e-06, |
|
"loss": 0.3766, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.2119140625, |
|
"grad_norm": 1.4218906164169312, |
|
"learning_rate": 9.158995154128425e-06, |
|
"loss": 0.3377, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.21240234375, |
|
"grad_norm": 1.6487233638763428, |
|
"learning_rate": 9.15459965650943e-06, |
|
"loss": 0.3198, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.212890625, |
|
"grad_norm": 6.333557605743408, |
|
"learning_rate": 9.15019376279285e-06, |
|
"loss": 0.3336, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.21337890625, |
|
"grad_norm": 1.746251106262207, |
|
"learning_rate": 9.1457774840036e-06, |
|
"loss": 0.3434, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.2138671875, |
|
"grad_norm": 2.1596200466156006, |
|
"learning_rate": 9.14135083119258e-06, |
|
"loss": 0.3496, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.21435546875, |
|
"grad_norm": 1.7951174974441528, |
|
"learning_rate": 9.13691381543665e-06, |
|
"loss": 0.3589, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.21484375, |
|
"grad_norm": 1.7067686319351196, |
|
"learning_rate": 9.132466447838598e-06, |
|
"loss": 0.3367, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.21533203125, |
|
"grad_norm": 2.095935344696045, |
|
"learning_rate": 9.128008739527119e-06, |
|
"loss": 0.3305, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.2158203125, |
|
"grad_norm": 2.011528968811035, |
|
"learning_rate": 9.123540701656782e-06, |
|
"loss": 0.368, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.21630859375, |
|
"grad_norm": 1.4319236278533936, |
|
"learning_rate": 9.119062345408005e-06, |
|
"loss": 0.3288, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.216796875, |
|
"grad_norm": 1.8978536128997803, |
|
"learning_rate": 9.114573681987024e-06, |
|
"loss": 0.3222, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.21728515625, |
|
"grad_norm": 1.8402870893478394, |
|
"learning_rate": 9.11007472262587e-06, |
|
"loss": 0.3286, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.2177734375, |
|
"grad_norm": 1.8938474655151367, |
|
"learning_rate": 9.105565478582335e-06, |
|
"loss": 0.3725, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.21826171875, |
|
"grad_norm": 1.723388433456421, |
|
"learning_rate": 9.101045961139945e-06, |
|
"loss": 0.3634, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 1.8326998949050903, |
|
"learning_rate": 9.096516181607935e-06, |
|
"loss": 0.3276, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.21923828125, |
|
"grad_norm": 1.6433813571929932, |
|
"learning_rate": 9.09197615132122e-06, |
|
"loss": 0.3637, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.2197265625, |
|
"grad_norm": 1.482116460800171, |
|
"learning_rate": 9.087425881640366e-06, |
|
"loss": 0.3413, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.22021484375, |
|
"grad_norm": 5.252507209777832, |
|
"learning_rate": 9.082865383951558e-06, |
|
"loss": 0.35, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.220703125, |
|
"grad_norm": 1.4982550144195557, |
|
"learning_rate": 9.078294669666577e-06, |
|
"loss": 0.3354, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.22119140625, |
|
"grad_norm": 2.408413887023926, |
|
"learning_rate": 9.073713750222766e-06, |
|
"loss": 0.3376, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.2216796875, |
|
"grad_norm": 1.682771921157837, |
|
"learning_rate": 9.069122637083012e-06, |
|
"loss": 0.3131, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.22216796875, |
|
"grad_norm": 1.6665334701538086, |
|
"learning_rate": 9.064521341735702e-06, |
|
"loss": 0.3348, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.22265625, |
|
"grad_norm": 1.3198261260986328, |
|
"learning_rate": 9.059909875694703e-06, |
|
"loss": 0.3087, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.22314453125, |
|
"grad_norm": 2.0489742755889893, |
|
"learning_rate": 9.055288250499339e-06, |
|
"loss": 0.3549, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.2236328125, |
|
"grad_norm": 1.4335616827011108, |
|
"learning_rate": 9.050656477714345e-06, |
|
"loss": 0.3859, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.22412109375, |
|
"grad_norm": 1.9734736680984497, |
|
"learning_rate": 9.046014568929856e-06, |
|
"loss": 0.358, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.224609375, |
|
"grad_norm": 1.8493421077728271, |
|
"learning_rate": 9.04136253576137e-06, |
|
"loss": 0.3306, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.22509765625, |
|
"grad_norm": 2.6172261238098145, |
|
"learning_rate": 9.036700389849717e-06, |
|
"loss": 0.3481, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.2255859375, |
|
"grad_norm": 1.538042664527893, |
|
"learning_rate": 9.03202814286103e-06, |
|
"loss": 0.3154, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.22607421875, |
|
"grad_norm": 2.418534278869629, |
|
"learning_rate": 9.027345806486722e-06, |
|
"loss": 0.3247, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.2265625, |
|
"grad_norm": 1.7823346853256226, |
|
"learning_rate": 9.022653392443455e-06, |
|
"loss": 0.338, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.22705078125, |
|
"grad_norm": 1.9469126462936401, |
|
"learning_rate": 9.0179509124731e-06, |
|
"loss": 0.3377, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.2275390625, |
|
"grad_norm": 1.985723614692688, |
|
"learning_rate": 9.013238378342725e-06, |
|
"loss": 0.3438, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.22802734375, |
|
"grad_norm": 1.5227419137954712, |
|
"learning_rate": 9.008515801844552e-06, |
|
"loss": 0.3392, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.228515625, |
|
"grad_norm": 2.764451026916504, |
|
"learning_rate": 9.003783194795931e-06, |
|
"loss": 0.3439, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.22900390625, |
|
"grad_norm": 1.489700198173523, |
|
"learning_rate": 8.999040569039315e-06, |
|
"loss": 0.3654, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.2294921875, |
|
"grad_norm": 2.0311126708984375, |
|
"learning_rate": 8.994287936442226e-06, |
|
"loss": 0.3312, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.22998046875, |
|
"grad_norm": 1.7580716609954834, |
|
"learning_rate": 8.989525308897223e-06, |
|
"loss": 0.3573, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.23046875, |
|
"grad_norm": 1.7429345846176147, |
|
"learning_rate": 8.98475269832188e-06, |
|
"loss": 0.3757, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.23095703125, |
|
"grad_norm": 1.544498085975647, |
|
"learning_rate": 8.97997011665875e-06, |
|
"loss": 0.2787, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.2314453125, |
|
"grad_norm": 1.6220890283584595, |
|
"learning_rate": 8.975177575875335e-06, |
|
"loss": 0.3597, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.23193359375, |
|
"grad_norm": 1.598620057106018, |
|
"learning_rate": 8.97037508796406e-06, |
|
"loss": 0.3615, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.232421875, |
|
"grad_norm": 1.567460298538208, |
|
"learning_rate": 8.96556266494224e-06, |
|
"loss": 0.3613, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.23291015625, |
|
"grad_norm": 1.5737589597702026, |
|
"learning_rate": 8.960740318852051e-06, |
|
"loss": 0.3699, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.2333984375, |
|
"grad_norm": 1.9563899040222168, |
|
"learning_rate": 8.9559080617605e-06, |
|
"loss": 0.3578, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.23388671875, |
|
"grad_norm": 2.225196599960327, |
|
"learning_rate": 8.951065905759392e-06, |
|
"loss": 0.3346, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"grad_norm": 1.5860683917999268, |
|
"learning_rate": 8.946213862965306e-06, |
|
"loss": 0.3741, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.23486328125, |
|
"grad_norm": 1.289207935333252, |
|
"learning_rate": 8.941351945519557e-06, |
|
"loss": 0.3434, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.2353515625, |
|
"grad_norm": 1.576648235321045, |
|
"learning_rate": 8.936480165588174e-06, |
|
"loss": 0.3513, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.23583984375, |
|
"grad_norm": 1.5328677892684937, |
|
"learning_rate": 8.931598535361855e-06, |
|
"loss": 0.3299, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.236328125, |
|
"grad_norm": 1.439266562461853, |
|
"learning_rate": 8.926707067055963e-06, |
|
"loss": 0.3077, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.23681640625, |
|
"grad_norm": 1.6571671962738037, |
|
"learning_rate": 8.921805772910463e-06, |
|
"loss": 0.3666, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.2373046875, |
|
"grad_norm": 2.0075385570526123, |
|
"learning_rate": 8.916894665189918e-06, |
|
"loss": 0.3695, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.23779296875, |
|
"grad_norm": 1.3680145740509033, |
|
"learning_rate": 8.91197375618344e-06, |
|
"loss": 0.3393, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.23828125, |
|
"grad_norm": 1.9149501323699951, |
|
"learning_rate": 8.907043058204674e-06, |
|
"loss": 0.3374, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.23876953125, |
|
"grad_norm": 1.5481083393096924, |
|
"learning_rate": 8.902102583591755e-06, |
|
"loss": 0.3263, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.2392578125, |
|
"grad_norm": 1.8688881397247314, |
|
"learning_rate": 8.89715234470728e-06, |
|
"loss": 0.3207, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.23974609375, |
|
"grad_norm": 1.846941351890564, |
|
"learning_rate": 8.892192353938288e-06, |
|
"loss": 0.3677, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.240234375, |
|
"grad_norm": 1.4003583192825317, |
|
"learning_rate": 8.887222623696213e-06, |
|
"loss": 0.3281, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.24072265625, |
|
"grad_norm": 1.9100502729415894, |
|
"learning_rate": 8.882243166416862e-06, |
|
"loss": 0.3685, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.2412109375, |
|
"grad_norm": 1.6730045080184937, |
|
"learning_rate": 8.877253994560381e-06, |
|
"loss": 0.3482, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.24169921875, |
|
"grad_norm": 1.4065086841583252, |
|
"learning_rate": 8.87225512061123e-06, |
|
"loss": 0.3404, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.2421875, |
|
"grad_norm": 1.5349781513214111, |
|
"learning_rate": 8.867246557078141e-06, |
|
"loss": 0.3279, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.24267578125, |
|
"grad_norm": 1.376725196838379, |
|
"learning_rate": 8.862228316494094e-06, |
|
"loss": 0.3384, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.2431640625, |
|
"grad_norm": 1.5585695505142212, |
|
"learning_rate": 8.857200411416283e-06, |
|
"loss": 0.3638, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.24365234375, |
|
"grad_norm": 3.5493311882019043, |
|
"learning_rate": 8.852162854426087e-06, |
|
"loss": 0.3561, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.244140625, |
|
"grad_norm": 2.1406612396240234, |
|
"learning_rate": 8.84711565812904e-06, |
|
"loss": 0.3097, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24462890625, |
|
"grad_norm": 1.5322456359863281, |
|
"learning_rate": 8.842058835154789e-06, |
|
"loss": 0.36, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.2451171875, |
|
"grad_norm": 2.3245677947998047, |
|
"learning_rate": 8.836992398157076e-06, |
|
"loss": 0.3479, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.24560546875, |
|
"grad_norm": 1.8092581033706665, |
|
"learning_rate": 8.831916359813702e-06, |
|
"loss": 0.3292, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.24609375, |
|
"grad_norm": 1.6669384241104126, |
|
"learning_rate": 8.826830732826484e-06, |
|
"loss": 0.357, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.24658203125, |
|
"grad_norm": 1.3617286682128906, |
|
"learning_rate": 8.821735529921243e-06, |
|
"loss": 0.3434, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.2470703125, |
|
"grad_norm": 5.754039287567139, |
|
"learning_rate": 8.816630763847756e-06, |
|
"loss": 0.3677, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.24755859375, |
|
"grad_norm": 1.2652654647827148, |
|
"learning_rate": 8.811516447379734e-06, |
|
"loss": 0.3573, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.248046875, |
|
"grad_norm": 1.6732009649276733, |
|
"learning_rate": 8.806392593314781e-06, |
|
"loss": 0.3398, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.24853515625, |
|
"grad_norm": 1.280765175819397, |
|
"learning_rate": 8.801259214474371e-06, |
|
"loss": 0.3371, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.2490234375, |
|
"grad_norm": 1.2774041891098022, |
|
"learning_rate": 8.796116323703811e-06, |
|
"loss": 0.3466, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.24951171875, |
|
"grad_norm": 1.4741958379745483, |
|
"learning_rate": 8.790963933872212e-06, |
|
"loss": 0.3506, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.4504543542861938, |
|
"learning_rate": 8.785802057872447e-06, |
|
"loss": 0.4083, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.25048828125, |
|
"grad_norm": 1.4813644886016846, |
|
"learning_rate": 8.780630708621135e-06, |
|
"loss": 0.382, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.2509765625, |
|
"grad_norm": 1.6617738008499146, |
|
"learning_rate": 8.775449899058597e-06, |
|
"loss": 0.3387, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.25146484375, |
|
"grad_norm": 1.8677629232406616, |
|
"learning_rate": 8.770259642148826e-06, |
|
"loss": 0.3422, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.251953125, |
|
"grad_norm": 1.4123599529266357, |
|
"learning_rate": 8.765059950879454e-06, |
|
"loss": 0.3621, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.25244140625, |
|
"grad_norm": 1.966430902481079, |
|
"learning_rate": 8.759850838261723e-06, |
|
"loss": 0.3475, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.2529296875, |
|
"grad_norm": 1.3296693563461304, |
|
"learning_rate": 8.754632317330448e-06, |
|
"loss": 0.3938, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.25341796875, |
|
"grad_norm": 1.4010918140411377, |
|
"learning_rate": 8.749404401143991e-06, |
|
"loss": 0.3474, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.25390625, |
|
"grad_norm": 1.5129917860031128, |
|
"learning_rate": 8.744167102784216e-06, |
|
"loss": 0.3783, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.25439453125, |
|
"grad_norm": 1.7624212503433228, |
|
"learning_rate": 8.738920435356473e-06, |
|
"loss": 0.3272, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.2548828125, |
|
"grad_norm": 1.4559099674224854, |
|
"learning_rate": 8.733664411989548e-06, |
|
"loss": 0.3526, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.25537109375, |
|
"grad_norm": 1.8239963054656982, |
|
"learning_rate": 8.728399045835648e-06, |
|
"loss": 0.3385, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.255859375, |
|
"grad_norm": 1.4369486570358276, |
|
"learning_rate": 8.723124350070347e-06, |
|
"loss": 0.3193, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.25634765625, |
|
"grad_norm": 4.341763496398926, |
|
"learning_rate": 8.717840337892575e-06, |
|
"loss": 0.3256, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2568359375, |
|
"grad_norm": 2.0711512565612793, |
|
"learning_rate": 8.712547022524566e-06, |
|
"loss": 0.3639, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.25732421875, |
|
"grad_norm": 1.4793862104415894, |
|
"learning_rate": 8.707244417211844e-06, |
|
"loss": 0.3166, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.2578125, |
|
"grad_norm": 1.742661476135254, |
|
"learning_rate": 8.701932535223168e-06, |
|
"loss": 0.3533, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.25830078125, |
|
"grad_norm": 1.4166213274002075, |
|
"learning_rate": 8.696611389850516e-06, |
|
"loss": 0.3436, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.2587890625, |
|
"grad_norm": 1.362882137298584, |
|
"learning_rate": 8.691280994409044e-06, |
|
"loss": 0.3165, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.25927734375, |
|
"grad_norm": 2.5286190509796143, |
|
"learning_rate": 8.685941362237058e-06, |
|
"loss": 0.3438, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.259765625, |
|
"grad_norm": 2.232900381088257, |
|
"learning_rate": 8.680592506695972e-06, |
|
"loss": 0.3389, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.26025390625, |
|
"grad_norm": 1.2126928567886353, |
|
"learning_rate": 8.675234441170286e-06, |
|
"loss": 0.306, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.2607421875, |
|
"grad_norm": 1.480934977531433, |
|
"learning_rate": 8.669867179067538e-06, |
|
"loss": 0.3696, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.26123046875, |
|
"grad_norm": 2.439810037612915, |
|
"learning_rate": 8.664490733818289e-06, |
|
"loss": 0.3628, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.26171875, |
|
"grad_norm": 1.3664276599884033, |
|
"learning_rate": 8.659105118876068e-06, |
|
"loss": 0.3534, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.26220703125, |
|
"grad_norm": 1.8439381122589111, |
|
"learning_rate": 8.65371034771736e-06, |
|
"loss": 0.3539, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.2626953125, |
|
"grad_norm": 2.1068308353424072, |
|
"learning_rate": 8.64830643384155e-06, |
|
"loss": 0.4281, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.26318359375, |
|
"grad_norm": 1.847388505935669, |
|
"learning_rate": 8.642893390770912e-06, |
|
"loss": 0.3624, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.263671875, |
|
"grad_norm": 2.783621311187744, |
|
"learning_rate": 8.63747123205056e-06, |
|
"loss": 0.3501, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.26416015625, |
|
"grad_norm": 5.078010559082031, |
|
"learning_rate": 8.632039971248416e-06, |
|
"loss": 0.3423, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.2646484375, |
|
"grad_norm": 1.461103916168213, |
|
"learning_rate": 8.626599621955179e-06, |
|
"loss": 0.3505, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.26513671875, |
|
"grad_norm": 1.512221336364746, |
|
"learning_rate": 8.621150197784293e-06, |
|
"loss": 0.344, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.265625, |
|
"grad_norm": 2.6210267543792725, |
|
"learning_rate": 8.615691712371907e-06, |
|
"loss": 0.3192, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.26611328125, |
|
"grad_norm": 1.5492252111434937, |
|
"learning_rate": 8.610224179376847e-06, |
|
"loss": 0.3217, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.2666015625, |
|
"grad_norm": 1.4719685316085815, |
|
"learning_rate": 8.604747612480577e-06, |
|
"loss": 0.3251, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.26708984375, |
|
"grad_norm": 1.9413729906082153, |
|
"learning_rate": 8.599262025387165e-06, |
|
"loss": 0.3658, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.267578125, |
|
"grad_norm": 1.8121291399002075, |
|
"learning_rate": 8.593767431823255e-06, |
|
"loss": 0.3274, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.26806640625, |
|
"grad_norm": 1.7863436937332153, |
|
"learning_rate": 8.588263845538021e-06, |
|
"loss": 0.3586, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.2685546875, |
|
"grad_norm": 2.253500461578369, |
|
"learning_rate": 8.582751280303148e-06, |
|
"loss": 0.383, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.26904296875, |
|
"grad_norm": 1.9108343124389648, |
|
"learning_rate": 8.577229749912782e-06, |
|
"loss": 0.3188, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.26953125, |
|
"grad_norm": 1.4474389553070068, |
|
"learning_rate": 8.571699268183506e-06, |
|
"loss": 0.3239, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.27001953125, |
|
"grad_norm": 1.6433511972427368, |
|
"learning_rate": 8.566159848954305e-06, |
|
"loss": 0.3565, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.2705078125, |
|
"grad_norm": 2.9185471534729004, |
|
"learning_rate": 8.560611506086518e-06, |
|
"loss": 0.3916, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.27099609375, |
|
"grad_norm": 1.6128103733062744, |
|
"learning_rate": 8.555054253463828e-06, |
|
"loss": 0.3518, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.271484375, |
|
"grad_norm": 1.3888630867004395, |
|
"learning_rate": 8.549488104992201e-06, |
|
"loss": 0.3772, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.27197265625, |
|
"grad_norm": 1.7909587621688843, |
|
"learning_rate": 8.543913074599867e-06, |
|
"loss": 0.3313, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.2724609375, |
|
"grad_norm": 1.6241544485092163, |
|
"learning_rate": 8.538329176237287e-06, |
|
"loss": 0.3535, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.27294921875, |
|
"grad_norm": 1.4434620141983032, |
|
"learning_rate": 8.532736423877102e-06, |
|
"loss": 0.3329, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.2734375, |
|
"grad_norm": 1.8953794240951538, |
|
"learning_rate": 8.527134831514116e-06, |
|
"loss": 0.3318, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.27392578125, |
|
"grad_norm": 1.287680983543396, |
|
"learning_rate": 8.521524413165254e-06, |
|
"loss": 0.3187, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.2744140625, |
|
"grad_norm": 1.6521981954574585, |
|
"learning_rate": 8.51590518286952e-06, |
|
"loss": 0.3509, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.27490234375, |
|
"grad_norm": 1.4679384231567383, |
|
"learning_rate": 8.510277154687973e-06, |
|
"loss": 0.3598, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.275390625, |
|
"grad_norm": 2.19455885887146, |
|
"learning_rate": 8.504640342703687e-06, |
|
"loss": 0.3371, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.27587890625, |
|
"grad_norm": 1.4917466640472412, |
|
"learning_rate": 8.498994761021715e-06, |
|
"loss": 0.3086, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.2763671875, |
|
"grad_norm": 2.3828556537628174, |
|
"learning_rate": 8.493340423769054e-06, |
|
"loss": 0.328, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.27685546875, |
|
"grad_norm": 2.0100631713867188, |
|
"learning_rate": 8.487677345094606e-06, |
|
"loss": 0.3497, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.27734375, |
|
"grad_norm": 2.037872552871704, |
|
"learning_rate": 8.482005539169158e-06, |
|
"loss": 0.3649, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.27783203125, |
|
"grad_norm": 1.3535383939743042, |
|
"learning_rate": 8.476325020185326e-06, |
|
"loss": 0.3321, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.2783203125, |
|
"grad_norm": 1.4872392416000366, |
|
"learning_rate": 8.47063580235753e-06, |
|
"loss": 0.3775, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.27880859375, |
|
"grad_norm": 2.482274293899536, |
|
"learning_rate": 8.46493789992196e-06, |
|
"loss": 0.3518, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.279296875, |
|
"grad_norm": 1.4444823265075684, |
|
"learning_rate": 8.459231327136532e-06, |
|
"loss": 0.3503, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.27978515625, |
|
"grad_norm": 1.3315978050231934, |
|
"learning_rate": 8.453516098280869e-06, |
|
"loss": 0.3408, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.2802734375, |
|
"grad_norm": 2.0306880474090576, |
|
"learning_rate": 8.447792227656241e-06, |
|
"loss": 0.3751, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.28076171875, |
|
"grad_norm": 1.3674098253250122, |
|
"learning_rate": 8.442059729585552e-06, |
|
"loss": 0.3307, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 2.2325830459594727, |
|
"learning_rate": 8.43631861841329e-06, |
|
"loss": 0.3168, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.28173828125, |
|
"grad_norm": 1.956121802330017, |
|
"learning_rate": 8.430568908505497e-06, |
|
"loss": 0.3317, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.2822265625, |
|
"grad_norm": 2.0539493560791016, |
|
"learning_rate": 8.42481061424973e-06, |
|
"loss": 0.3172, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.28271484375, |
|
"grad_norm": 1.3269410133361816, |
|
"learning_rate": 8.41904375005503e-06, |
|
"loss": 0.3726, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.283203125, |
|
"grad_norm": 2.887756586074829, |
|
"learning_rate": 8.413268330351881e-06, |
|
"loss": 0.342, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.28369140625, |
|
"grad_norm": 1.640519618988037, |
|
"learning_rate": 8.40748436959217e-06, |
|
"loss": 0.3418, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.2841796875, |
|
"grad_norm": 2.179222583770752, |
|
"learning_rate": 8.40169188224917e-06, |
|
"loss": 0.368, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.28466796875, |
|
"grad_norm": 2.25158429145813, |
|
"learning_rate": 8.395890882817478e-06, |
|
"loss": 0.3555, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.28515625, |
|
"grad_norm": 1.5757050514221191, |
|
"learning_rate": 8.390081385812993e-06, |
|
"loss": 0.3453, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.28564453125, |
|
"grad_norm": 1.5802643299102783, |
|
"learning_rate": 8.38426340577288e-06, |
|
"loss": 0.3635, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.2861328125, |
|
"grad_norm": 1.5654072761535645, |
|
"learning_rate": 8.378436957255535e-06, |
|
"loss": 0.3304, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.28662109375, |
|
"grad_norm": 1.2622393369674683, |
|
"learning_rate": 8.372602054840532e-06, |
|
"loss": 0.3468, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.287109375, |
|
"grad_norm": 2.9419167041778564, |
|
"learning_rate": 8.366758713128617e-06, |
|
"loss": 0.3286, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.28759765625, |
|
"grad_norm": 1.6033565998077393, |
|
"learning_rate": 8.360906946741635e-06, |
|
"loss": 0.3375, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.2880859375, |
|
"grad_norm": 1.5381578207015991, |
|
"learning_rate": 8.355046770322528e-06, |
|
"loss": 0.3531, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.28857421875, |
|
"grad_norm": 1.7467304468154907, |
|
"learning_rate": 8.349178198535273e-06, |
|
"loss": 0.305, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.2890625, |
|
"grad_norm": 1.3759098052978516, |
|
"learning_rate": 8.343301246064858e-06, |
|
"loss": 0.3643, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.28955078125, |
|
"grad_norm": 1.3180525302886963, |
|
"learning_rate": 8.337415927617243e-06, |
|
"loss": 0.3468, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.2900390625, |
|
"grad_norm": 1.3249021768569946, |
|
"learning_rate": 8.33152225791932e-06, |
|
"loss": 0.3502, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.29052734375, |
|
"grad_norm": 1.9022133350372314, |
|
"learning_rate": 8.32562025171888e-06, |
|
"loss": 0.3842, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.291015625, |
|
"grad_norm": 1.4465323686599731, |
|
"learning_rate": 8.319709923784573e-06, |
|
"loss": 0.3247, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.29150390625, |
|
"grad_norm": 2.4993956089019775, |
|
"learning_rate": 8.313791288905874e-06, |
|
"loss": 0.3826, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.2919921875, |
|
"grad_norm": 1.842347264289856, |
|
"learning_rate": 8.307864361893045e-06, |
|
"loss": 0.329, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.29248046875, |
|
"grad_norm": 1.5460954904556274, |
|
"learning_rate": 8.301929157577097e-06, |
|
"loss": 0.3453, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.29296875, |
|
"grad_norm": 3.255307912826538, |
|
"learning_rate": 8.295985690809752e-06, |
|
"loss": 0.3358, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29345703125, |
|
"grad_norm": 1.4224542379379272, |
|
"learning_rate": 8.290033976463407e-06, |
|
"loss": 0.3683, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.2939453125, |
|
"grad_norm": 1.4209293127059937, |
|
"learning_rate": 8.2840740294311e-06, |
|
"loss": 0.315, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.29443359375, |
|
"grad_norm": 2.0559093952178955, |
|
"learning_rate": 8.278105864626467e-06, |
|
"loss": 0.3801, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.294921875, |
|
"grad_norm": 1.880486249923706, |
|
"learning_rate": 8.27212949698371e-06, |
|
"loss": 0.3713, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.29541015625, |
|
"grad_norm": 3.0988686084747314, |
|
"learning_rate": 8.266144941457552e-06, |
|
"loss": 0.3917, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.2958984375, |
|
"grad_norm": 1.6043518781661987, |
|
"learning_rate": 8.26015221302321e-06, |
|
"loss": 0.3678, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.29638671875, |
|
"grad_norm": 1.520564079284668, |
|
"learning_rate": 8.254151326676354e-06, |
|
"loss": 0.3259, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.296875, |
|
"grad_norm": 1.9146232604980469, |
|
"learning_rate": 8.248142297433058e-06, |
|
"loss": 0.3291, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.29736328125, |
|
"grad_norm": 2.2928895950317383, |
|
"learning_rate": 8.24212514032978e-06, |
|
"loss": 0.3828, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.2978515625, |
|
"grad_norm": 1.9419975280761719, |
|
"learning_rate": 8.236099870423314e-06, |
|
"loss": 0.3287, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.29833984375, |
|
"grad_norm": 1.7183066606521606, |
|
"learning_rate": 8.230066502790756e-06, |
|
"loss": 0.3121, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.298828125, |
|
"grad_norm": 1.5658105611801147, |
|
"learning_rate": 8.224025052529463e-06, |
|
"loss": 0.3501, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.29931640625, |
|
"grad_norm": 1.9759196043014526, |
|
"learning_rate": 8.21797553475702e-06, |
|
"loss": 0.3345, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.2998046875, |
|
"grad_norm": 2.0763461589813232, |
|
"learning_rate": 8.211917964611197e-06, |
|
"loss": 0.3187, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.30029296875, |
|
"grad_norm": 1.4480257034301758, |
|
"learning_rate": 8.205852357249912e-06, |
|
"loss": 0.2866, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.30078125, |
|
"grad_norm": 1.9418996572494507, |
|
"learning_rate": 8.1997787278512e-06, |
|
"loss": 0.3125, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.30126953125, |
|
"grad_norm": 1.726302146911621, |
|
"learning_rate": 8.193697091613163e-06, |
|
"loss": 0.3663, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.3017578125, |
|
"grad_norm": 1.622819423675537, |
|
"learning_rate": 8.187607463753946e-06, |
|
"loss": 0.3385, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.30224609375, |
|
"grad_norm": 2.375453472137451, |
|
"learning_rate": 8.181509859511686e-06, |
|
"loss": 0.3314, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.302734375, |
|
"grad_norm": 1.6941611766815186, |
|
"learning_rate": 8.175404294144482e-06, |
|
"loss": 0.3152, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.30322265625, |
|
"grad_norm": 1.6905850172042847, |
|
"learning_rate": 8.16929078293035e-06, |
|
"loss": 0.3352, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.3037109375, |
|
"grad_norm": 1.9776393175125122, |
|
"learning_rate": 8.163169341167196e-06, |
|
"loss": 0.39, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.30419921875, |
|
"grad_norm": 1.4409841299057007, |
|
"learning_rate": 8.157039984172764e-06, |
|
"loss": 0.3445, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.3046875, |
|
"grad_norm": 1.7097798585891724, |
|
"learning_rate": 8.150902727284609e-06, |
|
"loss": 0.3583, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.30517578125, |
|
"grad_norm": 1.5705921649932861, |
|
"learning_rate": 8.144757585860053e-06, |
|
"loss": 0.355, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3056640625, |
|
"grad_norm": 1.5804706811904907, |
|
"learning_rate": 8.138604575276143e-06, |
|
"loss": 0.3615, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.30615234375, |
|
"grad_norm": 1.7296881675720215, |
|
"learning_rate": 8.132443710929624e-06, |
|
"loss": 0.381, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.306640625, |
|
"grad_norm": 1.3139718770980835, |
|
"learning_rate": 8.126275008236891e-06, |
|
"loss": 0.3296, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.30712890625, |
|
"grad_norm": 1.339277744293213, |
|
"learning_rate": 8.12009848263395e-06, |
|
"loss": 0.3262, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.3076171875, |
|
"grad_norm": 5.439074516296387, |
|
"learning_rate": 8.113914149576388e-06, |
|
"loss": 0.361, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.30810546875, |
|
"grad_norm": 1.8875752687454224, |
|
"learning_rate": 8.107722024539321e-06, |
|
"loss": 0.3419, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.30859375, |
|
"grad_norm": 1.3780957460403442, |
|
"learning_rate": 8.10152212301737e-06, |
|
"loss": 0.3398, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.30908203125, |
|
"grad_norm": 2.1425485610961914, |
|
"learning_rate": 8.095314460524612e-06, |
|
"loss": 0.3473, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.3095703125, |
|
"grad_norm": 2.3225300312042236, |
|
"learning_rate": 8.089099052594545e-06, |
|
"loss": 0.3757, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.31005859375, |
|
"grad_norm": 1.4518051147460938, |
|
"learning_rate": 8.08287591478005e-06, |
|
"loss": 0.3112, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.310546875, |
|
"grad_norm": 2.2762012481689453, |
|
"learning_rate": 8.076645062653346e-06, |
|
"loss": 0.3642, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.31103515625, |
|
"grad_norm": 1.6947425603866577, |
|
"learning_rate": 8.070406511805961e-06, |
|
"loss": 0.35, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.3115234375, |
|
"grad_norm": 1.5694466829299927, |
|
"learning_rate": 8.064160277848683e-06, |
|
"loss": 0.3458, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.31201171875, |
|
"grad_norm": 1.9441496133804321, |
|
"learning_rate": 8.05790637641153e-06, |
|
"loss": 0.3698, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 1.6394853591918945, |
|
"learning_rate": 8.051644823143702e-06, |
|
"loss": 0.3515, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.31298828125, |
|
"grad_norm": 1.8157254457473755, |
|
"learning_rate": 8.04537563371355e-06, |
|
"loss": 0.3278, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.3134765625, |
|
"grad_norm": 1.6162160634994507, |
|
"learning_rate": 8.03909882380853e-06, |
|
"loss": 0.3586, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.31396484375, |
|
"grad_norm": 1.7346367835998535, |
|
"learning_rate": 8.03281440913517e-06, |
|
"loss": 0.3194, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.314453125, |
|
"grad_norm": 1.593997836112976, |
|
"learning_rate": 8.026522405419024e-06, |
|
"loss": 0.3205, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.31494140625, |
|
"grad_norm": 1.3535056114196777, |
|
"learning_rate": 8.020222828404638e-06, |
|
"loss": 0.3382, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.3154296875, |
|
"grad_norm": 2.354459524154663, |
|
"learning_rate": 8.01391569385551e-06, |
|
"loss": 0.3041, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.31591796875, |
|
"grad_norm": 1.6168910264968872, |
|
"learning_rate": 8.007601017554045e-06, |
|
"loss": 0.392, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.31640625, |
|
"grad_norm": 1.7411466836929321, |
|
"learning_rate": 8.001278815301525e-06, |
|
"loss": 0.319, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.31689453125, |
|
"grad_norm": 2.3402931690216064, |
|
"learning_rate": 7.994949102918062e-06, |
|
"loss": 0.3657, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.3173828125, |
|
"grad_norm": 1.2933272123336792, |
|
"learning_rate": 7.98861189624256e-06, |
|
"loss": 0.3049, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.31787109375, |
|
"grad_norm": 1.6581286191940308, |
|
"learning_rate": 7.982267211132675e-06, |
|
"loss": 0.354, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.318359375, |
|
"grad_norm": 2.0283968448638916, |
|
"learning_rate": 7.97591506346478e-06, |
|
"loss": 0.3521, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.31884765625, |
|
"grad_norm": 1.6676313877105713, |
|
"learning_rate": 7.96955546913392e-06, |
|
"loss": 0.3237, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.3193359375, |
|
"grad_norm": 1.548922061920166, |
|
"learning_rate": 7.963188444053772e-06, |
|
"loss": 0.3145, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.31982421875, |
|
"grad_norm": 2.61688232421875, |
|
"learning_rate": 7.95681400415661e-06, |
|
"loss": 0.3159, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.3203125, |
|
"grad_norm": 2.0864787101745605, |
|
"learning_rate": 7.95043216539326e-06, |
|
"loss": 0.3394, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.32080078125, |
|
"grad_norm": 1.82245934009552, |
|
"learning_rate": 7.944042943733061e-06, |
|
"loss": 0.355, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.3212890625, |
|
"grad_norm": 1.6342824697494507, |
|
"learning_rate": 7.937646355163833e-06, |
|
"loss": 0.3407, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.32177734375, |
|
"grad_norm": 1.7688589096069336, |
|
"learning_rate": 7.931242415691822e-06, |
|
"loss": 0.3936, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.322265625, |
|
"grad_norm": 1.5749949216842651, |
|
"learning_rate": 7.924831141341671e-06, |
|
"loss": 0.3226, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.32275390625, |
|
"grad_norm": 4.079642295837402, |
|
"learning_rate": 7.918412548156382e-06, |
|
"loss": 0.3478, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.3232421875, |
|
"grad_norm": 1.564584732055664, |
|
"learning_rate": 7.911986652197263e-06, |
|
"loss": 0.345, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.32373046875, |
|
"grad_norm": 1.9359629154205322, |
|
"learning_rate": 7.905553469543903e-06, |
|
"loss": 0.3478, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.32421875, |
|
"grad_norm": 1.3265938758850098, |
|
"learning_rate": 7.899113016294118e-06, |
|
"loss": 0.3789, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.32470703125, |
|
"grad_norm": 1.617301106452942, |
|
"learning_rate": 7.892665308563922e-06, |
|
"loss": 0.3182, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3251953125, |
|
"grad_norm": 2.50874924659729, |
|
"learning_rate": 7.88621036248748e-06, |
|
"loss": 0.3269, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.32568359375, |
|
"grad_norm": 2.0309231281280518, |
|
"learning_rate": 7.879748194217074e-06, |
|
"loss": 0.3294, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.326171875, |
|
"grad_norm": 1.6182068586349487, |
|
"learning_rate": 7.873278819923047e-06, |
|
"loss": 0.3269, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.32666015625, |
|
"grad_norm": 2.3924951553344727, |
|
"learning_rate": 7.866802255793788e-06, |
|
"loss": 0.3498, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.3271484375, |
|
"grad_norm": 2.816044330596924, |
|
"learning_rate": 7.860318518035668e-06, |
|
"loss": 0.3231, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.32763671875, |
|
"grad_norm": 1.9277939796447754, |
|
"learning_rate": 7.853827622873011e-06, |
|
"loss": 0.3236, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.328125, |
|
"grad_norm": 1.364225149154663, |
|
"learning_rate": 7.847329586548049e-06, |
|
"loss": 0.3807, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.32861328125, |
|
"grad_norm": 1.443907380104065, |
|
"learning_rate": 7.840824425320888e-06, |
|
"loss": 0.4092, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.3291015625, |
|
"grad_norm": 1.670778512954712, |
|
"learning_rate": 7.834312155469457e-06, |
|
"loss": 0.3653, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.32958984375, |
|
"grad_norm": 1.510043740272522, |
|
"learning_rate": 7.827792793289477e-06, |
|
"loss": 0.3463, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.330078125, |
|
"grad_norm": 2.1872780323028564, |
|
"learning_rate": 7.821266355094419e-06, |
|
"loss": 0.3479, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.33056640625, |
|
"grad_norm": 1.6790423393249512, |
|
"learning_rate": 7.814732857215453e-06, |
|
"loss": 0.3476, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.3310546875, |
|
"grad_norm": 1.3476860523223877, |
|
"learning_rate": 7.808192316001417e-06, |
|
"loss": 0.3333, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.33154296875, |
|
"grad_norm": 1.752164602279663, |
|
"learning_rate": 7.801644747818777e-06, |
|
"loss": 0.3341, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.33203125, |
|
"grad_norm": 2.4022326469421387, |
|
"learning_rate": 7.79509016905158e-06, |
|
"loss": 0.357, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.33251953125, |
|
"grad_norm": 1.3659697771072388, |
|
"learning_rate": 7.788528596101419e-06, |
|
"loss": 0.3073, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.3330078125, |
|
"grad_norm": 1.4519615173339844, |
|
"learning_rate": 7.78196004538738e-06, |
|
"loss": 0.3052, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.33349609375, |
|
"grad_norm": 2.08927583694458, |
|
"learning_rate": 7.775384533346018e-06, |
|
"loss": 0.3242, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.333984375, |
|
"grad_norm": 1.4538501501083374, |
|
"learning_rate": 7.768802076431304e-06, |
|
"loss": 0.3495, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.33447265625, |
|
"grad_norm": 2.239643096923828, |
|
"learning_rate": 7.76221269111459e-06, |
|
"loss": 0.3554, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.3349609375, |
|
"grad_norm": 1.8009265661239624, |
|
"learning_rate": 7.755616393884562e-06, |
|
"loss": 0.3652, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.33544921875, |
|
"grad_norm": 1.5794439315795898, |
|
"learning_rate": 7.7490132012472e-06, |
|
"loss": 0.3321, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.3359375, |
|
"grad_norm": 1.737437129020691, |
|
"learning_rate": 7.742403129725742e-06, |
|
"loss": 0.3138, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.33642578125, |
|
"grad_norm": 1.7152299880981445, |
|
"learning_rate": 7.735786195860641e-06, |
|
"loss": 0.3582, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.3369140625, |
|
"grad_norm": 1.3847858905792236, |
|
"learning_rate": 7.729162416209518e-06, |
|
"loss": 0.3396, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.33740234375, |
|
"grad_norm": 1.6747031211853027, |
|
"learning_rate": 7.722531807347122e-06, |
|
"loss": 0.3474, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.337890625, |
|
"grad_norm": 1.3016866445541382, |
|
"learning_rate": 7.715894385865299e-06, |
|
"loss": 0.3391, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.33837890625, |
|
"grad_norm": 1.3648223876953125, |
|
"learning_rate": 7.709250168372932e-06, |
|
"loss": 0.3298, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.3388671875, |
|
"grad_norm": 1.5124351978302002, |
|
"learning_rate": 7.702599171495919e-06, |
|
"loss": 0.3334, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.33935546875, |
|
"grad_norm": 37.46984100341797, |
|
"learning_rate": 7.695941411877115e-06, |
|
"loss": 0.3342, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.33984375, |
|
"grad_norm": 1.4970625638961792, |
|
"learning_rate": 7.689276906176302e-06, |
|
"loss": 0.3436, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.34033203125, |
|
"grad_norm": 3.098925828933716, |
|
"learning_rate": 7.682605671070142e-06, |
|
"loss": 0.3437, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.3408203125, |
|
"grad_norm": 1.7555867433547974, |
|
"learning_rate": 7.675927723252134e-06, |
|
"loss": 0.322, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.34130859375, |
|
"grad_norm": 1.5935651063919067, |
|
"learning_rate": 7.669243079432578e-06, |
|
"loss": 0.2998, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.341796875, |
|
"grad_norm": 1.506208896636963, |
|
"learning_rate": 7.662551756338525e-06, |
|
"loss": 0.3612, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.34228515625, |
|
"grad_norm": 1.923596978187561, |
|
"learning_rate": 7.655853770713744e-06, |
|
"loss": 0.3593, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.3427734375, |
|
"grad_norm": 1.9344090223312378, |
|
"learning_rate": 7.64914913931867e-06, |
|
"loss": 0.3156, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.34326171875, |
|
"grad_norm": 1.7808047533035278, |
|
"learning_rate": 7.642437878930376e-06, |
|
"loss": 0.3419, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 1.5053675174713135, |
|
"learning_rate": 7.635720006342513e-06, |
|
"loss": 0.3539, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.34423828125, |
|
"grad_norm": 1.5963175296783447, |
|
"learning_rate": 7.628995538365287e-06, |
|
"loss": 0.3562, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.3447265625, |
|
"grad_norm": 1.4388726949691772, |
|
"learning_rate": 7.6222644918254005e-06, |
|
"loss": 0.3413, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.34521484375, |
|
"grad_norm": 3.6217451095581055, |
|
"learning_rate": 7.615526883566023e-06, |
|
"loss": 0.3584, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.345703125, |
|
"grad_norm": 1.6617943048477173, |
|
"learning_rate": 7.608782730446741e-06, |
|
"loss": 0.3675, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.34619140625, |
|
"grad_norm": 3.6505870819091797, |
|
"learning_rate": 7.6020320493435175e-06, |
|
"loss": 0.3028, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.3466796875, |
|
"grad_norm": 1.5057923793792725, |
|
"learning_rate": 7.595274857148651e-06, |
|
"loss": 0.3601, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.34716796875, |
|
"grad_norm": 1.775791049003601, |
|
"learning_rate": 7.588511170770736e-06, |
|
"loss": 0.3561, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.34765625, |
|
"grad_norm": 2.0912845134735107, |
|
"learning_rate": 7.581741007134611e-06, |
|
"loss": 0.3211, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.34814453125, |
|
"grad_norm": 1.4719021320343018, |
|
"learning_rate": 7.574964383181329e-06, |
|
"loss": 0.3571, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.3486328125, |
|
"grad_norm": 1.5099034309387207, |
|
"learning_rate": 7.568181315868104e-06, |
|
"loss": 0.3773, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.34912109375, |
|
"grad_norm": 1.797803282737732, |
|
"learning_rate": 7.561391822168277e-06, |
|
"loss": 0.3305, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.349609375, |
|
"grad_norm": 1.5316636562347412, |
|
"learning_rate": 7.554595919071268e-06, |
|
"loss": 0.3692, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.35009765625, |
|
"grad_norm": 1.332055926322937, |
|
"learning_rate": 7.5477936235825344e-06, |
|
"loss": 0.2998, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.3505859375, |
|
"grad_norm": 1.538785457611084, |
|
"learning_rate": 7.540984952723531e-06, |
|
"loss": 0.3325, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.35107421875, |
|
"grad_norm": 2.884404420852661, |
|
"learning_rate": 7.534169923531665e-06, |
|
"loss": 0.3036, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.3515625, |
|
"grad_norm": 1.7468745708465576, |
|
"learning_rate": 7.527348553060254e-06, |
|
"loss": 0.3199, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.35205078125, |
|
"grad_norm": 2.015227794647217, |
|
"learning_rate": 7.520520858378486e-06, |
|
"loss": 0.3884, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.3525390625, |
|
"grad_norm": 1.3880223035812378, |
|
"learning_rate": 7.513686856571367e-06, |
|
"loss": 0.336, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.35302734375, |
|
"grad_norm": 1.297411561012268, |
|
"learning_rate": 7.506846564739694e-06, |
|
"loss": 0.3306, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.353515625, |
|
"grad_norm": 1.55870521068573, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.3056, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.35400390625, |
|
"grad_norm": 2.036909818649292, |
|
"learning_rate": 7.493147179484514e-06, |
|
"loss": 0.3273, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3544921875, |
|
"grad_norm": 1.3678783178329468, |
|
"learning_rate": 7.486288120341118e-06, |
|
"loss": 0.345, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.35498046875, |
|
"grad_norm": 2.0894579887390137, |
|
"learning_rate": 7.479422839733307e-06, |
|
"loss": 0.359, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.35546875, |
|
"grad_norm": 1.6823246479034424, |
|
"learning_rate": 7.4725513548401455e-06, |
|
"loss": 0.3563, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.35595703125, |
|
"grad_norm": 1.351969838142395, |
|
"learning_rate": 7.4656736828562186e-06, |
|
"loss": 0.3017, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.3564453125, |
|
"grad_norm": 1.6686972379684448, |
|
"learning_rate": 7.458789840991596e-06, |
|
"loss": 0.3478, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.35693359375, |
|
"grad_norm": 1.3534908294677734, |
|
"learning_rate": 7.4518998464717874e-06, |
|
"loss": 0.3244, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.357421875, |
|
"grad_norm": 1.4082777500152588, |
|
"learning_rate": 7.445003716537698e-06, |
|
"loss": 0.3251, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.35791015625, |
|
"grad_norm": 2.0288498401641846, |
|
"learning_rate": 7.438101468445582e-06, |
|
"loss": 0.3379, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.3583984375, |
|
"grad_norm": 1.6891510486602783, |
|
"learning_rate": 7.4311931194670085e-06, |
|
"loss": 0.3576, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.35888671875, |
|
"grad_norm": 1.3616983890533447, |
|
"learning_rate": 7.42427868688881e-06, |
|
"loss": 0.3439, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.359375, |
|
"grad_norm": 1.5869650840759277, |
|
"learning_rate": 7.417358188013042e-06, |
|
"loss": 0.3389, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.35986328125, |
|
"grad_norm": 1.3705356121063232, |
|
"learning_rate": 7.410431640156937e-06, |
|
"loss": 0.346, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.3603515625, |
|
"grad_norm": 2.2622792720794678, |
|
"learning_rate": 7.403499060652874e-06, |
|
"loss": 0.3535, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.36083984375, |
|
"grad_norm": 1.719897747039795, |
|
"learning_rate": 7.3965604668483145e-06, |
|
"loss": 0.382, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.361328125, |
|
"grad_norm": 1.3844950199127197, |
|
"learning_rate": 7.389615876105773e-06, |
|
"loss": 0.3481, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.36181640625, |
|
"grad_norm": 1.6294703483581543, |
|
"learning_rate": 7.38266530580277e-06, |
|
"loss": 0.3656, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.3623046875, |
|
"grad_norm": 2.908967971801758, |
|
"learning_rate": 7.375708773331791e-06, |
|
"loss": 0.3457, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.36279296875, |
|
"grad_norm": 1.473132848739624, |
|
"learning_rate": 7.36874629610024e-06, |
|
"loss": 0.3385, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.36328125, |
|
"grad_norm": 2.919328451156616, |
|
"learning_rate": 7.361777891530392e-06, |
|
"loss": 0.3336, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.36376953125, |
|
"grad_norm": 2.563336133956909, |
|
"learning_rate": 7.354803577059359e-06, |
|
"loss": 0.3357, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.3642578125, |
|
"grad_norm": 1.4097625017166138, |
|
"learning_rate": 7.347823370139042e-06, |
|
"loss": 0.3559, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.36474609375, |
|
"grad_norm": 1.3321950435638428, |
|
"learning_rate": 7.340837288236085e-06, |
|
"loss": 0.3626, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.365234375, |
|
"grad_norm": 1.6507295370101929, |
|
"learning_rate": 7.3338453488318284e-06, |
|
"loss": 0.3095, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.36572265625, |
|
"grad_norm": 1.8008859157562256, |
|
"learning_rate": 7.326847569422278e-06, |
|
"loss": 0.3193, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.3662109375, |
|
"grad_norm": 1.4755789041519165, |
|
"learning_rate": 7.3198439675180484e-06, |
|
"loss": 0.2986, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.36669921875, |
|
"grad_norm": 1.7474323511123657, |
|
"learning_rate": 7.312834560644327e-06, |
|
"loss": 0.3936, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.3671875, |
|
"grad_norm": 1.6639896631240845, |
|
"learning_rate": 7.30581936634082e-06, |
|
"loss": 0.3673, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.36767578125, |
|
"grad_norm": 1.3790712356567383, |
|
"learning_rate": 7.298798402161725e-06, |
|
"loss": 0.3639, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.3681640625, |
|
"grad_norm": 1.9777040481567383, |
|
"learning_rate": 7.291771685675673e-06, |
|
"loss": 0.3299, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.36865234375, |
|
"grad_norm": 1.7995957136154175, |
|
"learning_rate": 7.284739234465686e-06, |
|
"loss": 0.3605, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.369140625, |
|
"grad_norm": 1.9671039581298828, |
|
"learning_rate": 7.277701066129141e-06, |
|
"loss": 0.3792, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.36962890625, |
|
"grad_norm": 2.719590187072754, |
|
"learning_rate": 7.27065719827772e-06, |
|
"loss": 0.3318, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.3701171875, |
|
"grad_norm": 1.9835278987884521, |
|
"learning_rate": 7.2636076485373645e-06, |
|
"loss": 0.3286, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.37060546875, |
|
"grad_norm": 1.2610225677490234, |
|
"learning_rate": 7.256552434548236e-06, |
|
"loss": 0.3274, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.37109375, |
|
"grad_norm": 1.2788983583450317, |
|
"learning_rate": 7.249491573964671e-06, |
|
"loss": 0.3622, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.37158203125, |
|
"grad_norm": 1.2974728345870972, |
|
"learning_rate": 7.242425084455132e-06, |
|
"loss": 0.3253, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.3720703125, |
|
"grad_norm": 1.8051031827926636, |
|
"learning_rate": 7.23535298370217e-06, |
|
"loss": 0.3486, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.37255859375, |
|
"grad_norm": 1.7785935401916504, |
|
"learning_rate": 7.228275289402373e-06, |
|
"loss": 0.3195, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.373046875, |
|
"grad_norm": 1.2360249757766724, |
|
"learning_rate": 7.221192019266332e-06, |
|
"loss": 0.3005, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.37353515625, |
|
"grad_norm": 1.5772784948349, |
|
"learning_rate": 7.214103191018584e-06, |
|
"loss": 0.3319, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.3740234375, |
|
"grad_norm": 1.5777393579483032, |
|
"learning_rate": 7.2070088223975784e-06, |
|
"loss": 0.3412, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.37451171875, |
|
"grad_norm": 1.2442673444747925, |
|
"learning_rate": 7.199908931155628e-06, |
|
"loss": 0.3236, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 1.1323033571243286, |
|
"learning_rate": 7.192803535058861e-06, |
|
"loss": 0.3236, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.37548828125, |
|
"grad_norm": 1.316483974456787, |
|
"learning_rate": 7.185692651887186e-06, |
|
"loss": 0.3295, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.3759765625, |
|
"grad_norm": 1.5371990203857422, |
|
"learning_rate": 7.178576299434239e-06, |
|
"loss": 0.3711, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.37646484375, |
|
"grad_norm": 1.7177865505218506, |
|
"learning_rate": 7.171454495507341e-06, |
|
"loss": 0.3294, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.376953125, |
|
"grad_norm": 1.4074996709823608, |
|
"learning_rate": 7.164327257927456e-06, |
|
"loss": 0.3472, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.37744140625, |
|
"grad_norm": 1.3459590673446655, |
|
"learning_rate": 7.157194604529143e-06, |
|
"loss": 0.3268, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.3779296875, |
|
"grad_norm": 1.3509142398834229, |
|
"learning_rate": 7.150056553160517e-06, |
|
"loss": 0.3258, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.37841796875, |
|
"grad_norm": 1.3562768697738647, |
|
"learning_rate": 7.142913121683195e-06, |
|
"loss": 0.3301, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.37890625, |
|
"grad_norm": 1.815333604812622, |
|
"learning_rate": 7.135764327972261e-06, |
|
"loss": 0.3653, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.37939453125, |
|
"grad_norm": 1.3162930011749268, |
|
"learning_rate": 7.128610189916213e-06, |
|
"loss": 0.376, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.3798828125, |
|
"grad_norm": 1.7800266742706299, |
|
"learning_rate": 7.121450725416928e-06, |
|
"loss": 0.3662, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.38037109375, |
|
"grad_norm": 1.5096458196640015, |
|
"learning_rate": 7.114285952389604e-06, |
|
"loss": 0.3588, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.380859375, |
|
"grad_norm": 2.538273334503174, |
|
"learning_rate": 7.1071158887627304e-06, |
|
"loss": 0.3312, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.38134765625, |
|
"grad_norm": 1.3077067136764526, |
|
"learning_rate": 7.0999405524780266e-06, |
|
"loss": 0.3344, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.3818359375, |
|
"grad_norm": 1.3059022426605225, |
|
"learning_rate": 7.092759961490415e-06, |
|
"loss": 0.3259, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.38232421875, |
|
"grad_norm": 2.276553153991699, |
|
"learning_rate": 7.08557413376796e-06, |
|
"loss": 0.3331, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.3828125, |
|
"grad_norm": 1.3777782917022705, |
|
"learning_rate": 7.078383087291833e-06, |
|
"loss": 0.3211, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.38330078125, |
|
"grad_norm": 1.3232738971710205, |
|
"learning_rate": 7.071186840056264e-06, |
|
"loss": 0.2928, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.3837890625, |
|
"grad_norm": 1.1360565423965454, |
|
"learning_rate": 7.063985410068499e-06, |
|
"loss": 0.3291, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.38427734375, |
|
"grad_norm": 1.5104074478149414, |
|
"learning_rate": 7.056778815348746e-06, |
|
"loss": 0.3388, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.384765625, |
|
"grad_norm": 1.3837941884994507, |
|
"learning_rate": 7.0495670739301435e-06, |
|
"loss": 0.3802, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.38525390625, |
|
"grad_norm": 2.0784964561462402, |
|
"learning_rate": 7.042350203858706e-06, |
|
"loss": 0.3153, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.3857421875, |
|
"grad_norm": 1.4472565650939941, |
|
"learning_rate": 7.035128223193286e-06, |
|
"loss": 0.3145, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.38623046875, |
|
"grad_norm": 1.729691505432129, |
|
"learning_rate": 7.0279011500055136e-06, |
|
"loss": 0.393, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.38671875, |
|
"grad_norm": 1.4967801570892334, |
|
"learning_rate": 7.020669002379772e-06, |
|
"loss": 0.3344, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.38720703125, |
|
"grad_norm": 1.322029948234558, |
|
"learning_rate": 7.0134317984131395e-06, |
|
"loss": 0.3319, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.3876953125, |
|
"grad_norm": 2.8917009830474854, |
|
"learning_rate": 7.006189556215346e-06, |
|
"loss": 0.3152, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.38818359375, |
|
"grad_norm": 1.581947922706604, |
|
"learning_rate": 6.998942293908725e-06, |
|
"loss": 0.3606, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.388671875, |
|
"grad_norm": 2.658916711807251, |
|
"learning_rate": 6.991690029628181e-06, |
|
"loss": 0.3451, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.38916015625, |
|
"grad_norm": 2.3201754093170166, |
|
"learning_rate": 6.9844327815211275e-06, |
|
"loss": 0.333, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.3896484375, |
|
"grad_norm": 1.4934650659561157, |
|
"learning_rate": 6.977170567747452e-06, |
|
"loss": 0.3336, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.39013671875, |
|
"grad_norm": 1.4863629341125488, |
|
"learning_rate": 6.969903406479465e-06, |
|
"loss": 0.3347, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 1.3552590608596802, |
|
"learning_rate": 6.962631315901861e-06, |
|
"loss": 0.3623, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.39111328125, |
|
"grad_norm": 2.2949376106262207, |
|
"learning_rate": 6.955354314211669e-06, |
|
"loss": 0.2987, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.3916015625, |
|
"grad_norm": 1.3013123273849487, |
|
"learning_rate": 6.948072419618201e-06, |
|
"loss": 0.3307, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.39208984375, |
|
"grad_norm": 1.4084373712539673, |
|
"learning_rate": 6.940785650343019e-06, |
|
"loss": 0.3119, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.392578125, |
|
"grad_norm": 2.596653461456299, |
|
"learning_rate": 6.93349402461988e-06, |
|
"loss": 0.3228, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.39306640625, |
|
"grad_norm": 1.5036858320236206, |
|
"learning_rate": 6.926197560694699e-06, |
|
"loss": 0.3463, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.3935546875, |
|
"grad_norm": 1.8642725944519043, |
|
"learning_rate": 6.918896276825485e-06, |
|
"loss": 0.368, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.39404296875, |
|
"grad_norm": 1.289711356163025, |
|
"learning_rate": 6.9115901912823226e-06, |
|
"loss": 0.3582, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.39453125, |
|
"grad_norm": 1.507915735244751, |
|
"learning_rate": 6.9042793223473024e-06, |
|
"loss": 0.3829, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.39501953125, |
|
"grad_norm": 1.7021656036376953, |
|
"learning_rate": 6.896963688314489e-06, |
|
"loss": 0.3668, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.3955078125, |
|
"grad_norm": 1.2955149412155151, |
|
"learning_rate": 6.889643307489865e-06, |
|
"loss": 0.3344, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.39599609375, |
|
"grad_norm": 1.183563232421875, |
|
"learning_rate": 6.882318198191298e-06, |
|
"loss": 0.3191, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.396484375, |
|
"grad_norm": 1.458882451057434, |
|
"learning_rate": 6.874988378748484e-06, |
|
"loss": 0.3531, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.39697265625, |
|
"grad_norm": 1.6540387868881226, |
|
"learning_rate": 6.8676538675029054e-06, |
|
"loss": 0.3399, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.3974609375, |
|
"grad_norm": 1.2130305767059326, |
|
"learning_rate": 6.860314682807786e-06, |
|
"loss": 0.3387, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.39794921875, |
|
"grad_norm": 1.3185558319091797, |
|
"learning_rate": 6.852970843028043e-06, |
|
"loss": 0.3389, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.3984375, |
|
"grad_norm": 1.6620187759399414, |
|
"learning_rate": 6.845622366540242e-06, |
|
"loss": 0.3041, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.39892578125, |
|
"grad_norm": 1.1920667886734009, |
|
"learning_rate": 6.8382692717325525e-06, |
|
"loss": 0.3047, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.3994140625, |
|
"grad_norm": 1.4352617263793945, |
|
"learning_rate": 6.8309115770046986e-06, |
|
"loss": 0.3276, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.39990234375, |
|
"grad_norm": 1.6452810764312744, |
|
"learning_rate": 6.8235493007679155e-06, |
|
"loss": 0.3243, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.400390625, |
|
"grad_norm": 1.6612956523895264, |
|
"learning_rate": 6.816182461444905e-06, |
|
"loss": 0.342, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.40087890625, |
|
"grad_norm": 1.2954360246658325, |
|
"learning_rate": 6.8088110774697825e-06, |
|
"loss": 0.3117, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.4013671875, |
|
"grad_norm": 2.189624786376953, |
|
"learning_rate": 6.8014351672880395e-06, |
|
"loss": 0.3069, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.40185546875, |
|
"grad_norm": 1.4809291362762451, |
|
"learning_rate": 6.794054749356492e-06, |
|
"loss": 0.3355, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.40234375, |
|
"grad_norm": 1.6851189136505127, |
|
"learning_rate": 6.786669842143236e-06, |
|
"loss": 0.3435, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.40283203125, |
|
"grad_norm": 1.401813268661499, |
|
"learning_rate": 6.779280464127601e-06, |
|
"loss": 0.326, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4033203125, |
|
"grad_norm": 1.7311843633651733, |
|
"learning_rate": 6.771886633800104e-06, |
|
"loss": 0.3281, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.40380859375, |
|
"grad_norm": 2.936901092529297, |
|
"learning_rate": 6.764488369662403e-06, |
|
"loss": 0.3727, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.404296875, |
|
"grad_norm": 1.319385051727295, |
|
"learning_rate": 6.75708569022725e-06, |
|
"loss": 0.344, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.40478515625, |
|
"grad_norm": 1.9358359575271606, |
|
"learning_rate": 6.749678614018446e-06, |
|
"loss": 0.3622, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.4052734375, |
|
"grad_norm": 1.1188249588012695, |
|
"learning_rate": 6.742267159570796e-06, |
|
"loss": 0.3299, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.40576171875, |
|
"grad_norm": 1.3562527894973755, |
|
"learning_rate": 6.734851345430057e-06, |
|
"loss": 0.319, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 1.2941495180130005, |
|
"learning_rate": 6.727431190152898e-06, |
|
"loss": 0.3323, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.40673828125, |
|
"grad_norm": 2.1621103286743164, |
|
"learning_rate": 6.720006712306849e-06, |
|
"loss": 0.3409, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.4072265625, |
|
"grad_norm": 1.3561265468597412, |
|
"learning_rate": 6.712577930470258e-06, |
|
"loss": 0.3549, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.40771484375, |
|
"grad_norm": 1.2518807649612427, |
|
"learning_rate": 6.705144863232246e-06, |
|
"loss": 0.3279, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.408203125, |
|
"grad_norm": 1.1951934099197388, |
|
"learning_rate": 6.697707529192648e-06, |
|
"loss": 0.3146, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.40869140625, |
|
"grad_norm": 1.2976142168045044, |
|
"learning_rate": 6.6902659469619855e-06, |
|
"loss": 0.3151, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.4091796875, |
|
"grad_norm": 1.554851770401001, |
|
"learning_rate": 6.682820135161405e-06, |
|
"loss": 0.2972, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.40966796875, |
|
"grad_norm": 1.467674732208252, |
|
"learning_rate": 6.675370112422639e-06, |
|
"loss": 0.3538, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.41015625, |
|
"grad_norm": 2.0394184589385986, |
|
"learning_rate": 6.667915897387957e-06, |
|
"loss": 0.3124, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.41064453125, |
|
"grad_norm": 1.458815097808838, |
|
"learning_rate": 6.6604575087101165e-06, |
|
"loss": 0.3073, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.4111328125, |
|
"grad_norm": 1.2343790531158447, |
|
"learning_rate": 6.6529949650523195e-06, |
|
"loss": 0.3224, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.41162109375, |
|
"grad_norm": 1.307780385017395, |
|
"learning_rate": 6.645528285088169e-06, |
|
"loss": 0.3139, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.412109375, |
|
"grad_norm": 1.187071681022644, |
|
"learning_rate": 6.638057487501613e-06, |
|
"loss": 0.3316, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.41259765625, |
|
"grad_norm": 1.9509886503219604, |
|
"learning_rate": 6.630582590986907e-06, |
|
"loss": 0.3381, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.4130859375, |
|
"grad_norm": 1.5562846660614014, |
|
"learning_rate": 6.623103614248561e-06, |
|
"loss": 0.3648, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.41357421875, |
|
"grad_norm": 1.423948049545288, |
|
"learning_rate": 6.615620576001293e-06, |
|
"loss": 0.3163, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.4140625, |
|
"grad_norm": 1.5273832082748413, |
|
"learning_rate": 6.608133494969993e-06, |
|
"loss": 0.3002, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.41455078125, |
|
"grad_norm": 1.2620773315429688, |
|
"learning_rate": 6.600642389889657e-06, |
|
"loss": 0.3599, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.4150390625, |
|
"grad_norm": 1.283124566078186, |
|
"learning_rate": 6.593147279505352e-06, |
|
"loss": 0.3348, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.41552734375, |
|
"grad_norm": 1.2876836061477661, |
|
"learning_rate": 6.585648182572176e-06, |
|
"loss": 0.347, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.416015625, |
|
"grad_norm": 2.6049535274505615, |
|
"learning_rate": 6.578145117855192e-06, |
|
"loss": 0.3305, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.41650390625, |
|
"grad_norm": 1.7834153175354004, |
|
"learning_rate": 6.570638104129399e-06, |
|
"loss": 0.323, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.4169921875, |
|
"grad_norm": 1.3892278671264648, |
|
"learning_rate": 6.563127160179672e-06, |
|
"loss": 0.3475, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.41748046875, |
|
"grad_norm": 1.4540331363677979, |
|
"learning_rate": 6.555612304800727e-06, |
|
"loss": 0.3442, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.41796875, |
|
"grad_norm": 1.058359146118164, |
|
"learning_rate": 6.548093556797063e-06, |
|
"loss": 0.3398, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.41845703125, |
|
"grad_norm": 1.587546706199646, |
|
"learning_rate": 6.540570934982917e-06, |
|
"loss": 0.3261, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.4189453125, |
|
"grad_norm": 2.1293222904205322, |
|
"learning_rate": 6.533044458182229e-06, |
|
"loss": 0.3755, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.41943359375, |
|
"grad_norm": 1.2648324966430664, |
|
"learning_rate": 6.5255141452285765e-06, |
|
"loss": 0.3001, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.419921875, |
|
"grad_norm": 1.4118512868881226, |
|
"learning_rate": 6.51798001496514e-06, |
|
"loss": 0.3376, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.42041015625, |
|
"grad_norm": 1.4707554578781128, |
|
"learning_rate": 6.510442086244649e-06, |
|
"loss": 0.3247, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.4208984375, |
|
"grad_norm": 1.3729053735733032, |
|
"learning_rate": 6.502900377929344e-06, |
|
"loss": 0.3039, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.42138671875, |
|
"grad_norm": 3.840740442276001, |
|
"learning_rate": 6.4953549088909194e-06, |
|
"loss": 0.3567, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.421875, |
|
"grad_norm": 1.3986668586730957, |
|
"learning_rate": 6.487805698010476e-06, |
|
"loss": 0.3313, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.42236328125, |
|
"grad_norm": 3.7465996742248535, |
|
"learning_rate": 6.4802527641784866e-06, |
|
"loss": 0.3357, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.4228515625, |
|
"grad_norm": 1.7644517421722412, |
|
"learning_rate": 6.472696126294733e-06, |
|
"loss": 0.3662, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.42333984375, |
|
"grad_norm": 1.2544833421707153, |
|
"learning_rate": 6.4651358032682694e-06, |
|
"loss": 0.3371, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.423828125, |
|
"grad_norm": 1.500871181488037, |
|
"learning_rate": 6.457571814017368e-06, |
|
"loss": 0.3224, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.42431640625, |
|
"grad_norm": 1.3260788917541504, |
|
"learning_rate": 6.45000417746948e-06, |
|
"loss": 0.3161, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.4248046875, |
|
"grad_norm": 1.334038257598877, |
|
"learning_rate": 6.442432912561178e-06, |
|
"loss": 0.3423, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.42529296875, |
|
"grad_norm": 1.378933310508728, |
|
"learning_rate": 6.434858038238118e-06, |
|
"loss": 0.3492, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.42578125, |
|
"grad_norm": 1.5512367486953735, |
|
"learning_rate": 6.427279573454985e-06, |
|
"loss": 0.3731, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.42626953125, |
|
"grad_norm": 1.4665623903274536, |
|
"learning_rate": 6.4196975371754514e-06, |
|
"loss": 0.3481, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.4267578125, |
|
"grad_norm": 1.5259501934051514, |
|
"learning_rate": 6.412111948372122e-06, |
|
"loss": 0.3439, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.42724609375, |
|
"grad_norm": 1.465909719467163, |
|
"learning_rate": 6.404522826026496e-06, |
|
"loss": 0.33, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.427734375, |
|
"grad_norm": 1.357045292854309, |
|
"learning_rate": 6.396930189128912e-06, |
|
"loss": 0.344, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.42822265625, |
|
"grad_norm": 1.352899193763733, |
|
"learning_rate": 6.3893340566785046e-06, |
|
"loss": 0.3021, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.4287109375, |
|
"grad_norm": 1.3821226358413696, |
|
"learning_rate": 6.381734447683152e-06, |
|
"loss": 0.3326, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.42919921875, |
|
"grad_norm": 1.675229787826538, |
|
"learning_rate": 6.374131381159436e-06, |
|
"loss": 0.4357, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.4296875, |
|
"grad_norm": 1.7067149877548218, |
|
"learning_rate": 6.366524876132589e-06, |
|
"loss": 0.3018, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.43017578125, |
|
"grad_norm": 1.4271488189697266, |
|
"learning_rate": 6.358914951636444e-06, |
|
"loss": 0.3468, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.4306640625, |
|
"grad_norm": 1.3299568891525269, |
|
"learning_rate": 6.351301626713398e-06, |
|
"loss": 0.3466, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.43115234375, |
|
"grad_norm": 1.6695646047592163, |
|
"learning_rate": 6.343684920414348e-06, |
|
"loss": 0.3214, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.431640625, |
|
"grad_norm": 1.3570027351379395, |
|
"learning_rate": 6.3360648517986605e-06, |
|
"loss": 0.3382, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.43212890625, |
|
"grad_norm": 1.385907769203186, |
|
"learning_rate": 6.32844143993411e-06, |
|
"loss": 0.3092, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.4326171875, |
|
"grad_norm": 1.5601329803466797, |
|
"learning_rate": 6.320814703896838e-06, |
|
"loss": 0.3587, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.43310546875, |
|
"grad_norm": 1.39394211769104, |
|
"learning_rate": 6.313184662771305e-06, |
|
"loss": 0.3404, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.43359375, |
|
"grad_norm": 1.2028573751449585, |
|
"learning_rate": 6.305551335650244e-06, |
|
"loss": 0.3548, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.43408203125, |
|
"grad_norm": 4.250852108001709, |
|
"learning_rate": 6.297914741634605e-06, |
|
"loss": 0.3454, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.4345703125, |
|
"grad_norm": 1.5344691276550293, |
|
"learning_rate": 6.290274899833517e-06, |
|
"loss": 0.3176, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.43505859375, |
|
"grad_norm": 1.7602498531341553, |
|
"learning_rate": 6.2826318293642385e-06, |
|
"loss": 0.339, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.435546875, |
|
"grad_norm": 1.1949964761734009, |
|
"learning_rate": 6.274985549352098e-06, |
|
"loss": 0.304, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.43603515625, |
|
"grad_norm": 1.1564438343048096, |
|
"learning_rate": 6.267336078930464e-06, |
|
"loss": 0.3145, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.4365234375, |
|
"grad_norm": 1.3757606744766235, |
|
"learning_rate": 6.259683437240683e-06, |
|
"loss": 0.3385, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.43701171875, |
|
"grad_norm": 1.8371174335479736, |
|
"learning_rate": 6.252027643432044e-06, |
|
"loss": 0.3355, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 1.334598422050476, |
|
"learning_rate": 6.244368716661714e-06, |
|
"loss": 0.3276, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.43798828125, |
|
"grad_norm": 1.5038282871246338, |
|
"learning_rate": 6.236706676094705e-06, |
|
"loss": 0.3522, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.4384765625, |
|
"grad_norm": 3.6733760833740234, |
|
"learning_rate": 6.229041540903823e-06, |
|
"loss": 0.3431, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.43896484375, |
|
"grad_norm": 1.5863288640975952, |
|
"learning_rate": 6.221373330269613e-06, |
|
"loss": 0.3324, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.439453125, |
|
"grad_norm": 1.4606237411499023, |
|
"learning_rate": 6.213702063380317e-06, |
|
"loss": 0.3226, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.43994140625, |
|
"grad_norm": 1.8370083570480347, |
|
"learning_rate": 6.206027759431825e-06, |
|
"loss": 0.3294, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.4404296875, |
|
"grad_norm": 1.6841802597045898, |
|
"learning_rate": 6.198350437627631e-06, |
|
"loss": 0.3238, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.44091796875, |
|
"grad_norm": 1.9791240692138672, |
|
"learning_rate": 6.190670117178772e-06, |
|
"loss": 0.3326, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.44140625, |
|
"grad_norm": 1.4503194093704224, |
|
"learning_rate": 6.182986817303794e-06, |
|
"loss": 0.3544, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.44189453125, |
|
"grad_norm": 1.9381232261657715, |
|
"learning_rate": 6.175300557228698e-06, |
|
"loss": 0.3278, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.4423828125, |
|
"grad_norm": 4.399080753326416, |
|
"learning_rate": 6.167611356186895e-06, |
|
"loss": 0.3367, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.44287109375, |
|
"grad_norm": 1.4784455299377441, |
|
"learning_rate": 6.159919233419147e-06, |
|
"loss": 0.3559, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.443359375, |
|
"grad_norm": 1.9754478931427002, |
|
"learning_rate": 6.152224208173533e-06, |
|
"loss": 0.3311, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.44384765625, |
|
"grad_norm": 1.5615670680999756, |
|
"learning_rate": 6.144526299705396e-06, |
|
"loss": 0.4023, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.4443359375, |
|
"grad_norm": 1.461332082748413, |
|
"learning_rate": 6.136825527277295e-06, |
|
"loss": 0.3026, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.44482421875, |
|
"grad_norm": 1.4366703033447266, |
|
"learning_rate": 6.129121910158945e-06, |
|
"loss": 0.336, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.4453125, |
|
"grad_norm": 2.06691575050354, |
|
"learning_rate": 6.12141546762719e-06, |
|
"loss": 0.342, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.44580078125, |
|
"grad_norm": 1.7794272899627686, |
|
"learning_rate": 6.11370621896594e-06, |
|
"loss": 0.3532, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.4462890625, |
|
"grad_norm": 1.4335381984710693, |
|
"learning_rate": 6.105994183466131e-06, |
|
"loss": 0.3471, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.44677734375, |
|
"grad_norm": 5.071071147918701, |
|
"learning_rate": 6.0982793804256636e-06, |
|
"loss": 0.336, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.447265625, |
|
"grad_norm": 1.2241181135177612, |
|
"learning_rate": 6.090561829149373e-06, |
|
"loss": 0.3232, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.44775390625, |
|
"grad_norm": 1.267858624458313, |
|
"learning_rate": 6.082841548948966e-06, |
|
"loss": 0.3556, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.4482421875, |
|
"grad_norm": 1.1905056238174438, |
|
"learning_rate": 6.07511855914298e-06, |
|
"loss": 0.2941, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.44873046875, |
|
"grad_norm": 1.2715431451797485, |
|
"learning_rate": 6.067392879056729e-06, |
|
"loss": 0.3159, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.44921875, |
|
"grad_norm": 1.2241966724395752, |
|
"learning_rate": 6.059664528022267e-06, |
|
"loss": 0.3141, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.44970703125, |
|
"grad_norm": 1.6341863870620728, |
|
"learning_rate": 6.051933525378323e-06, |
|
"loss": 0.3319, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.4501953125, |
|
"grad_norm": 3.6661813259124756, |
|
"learning_rate": 6.044199890470267e-06, |
|
"loss": 0.3482, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.45068359375, |
|
"grad_norm": 1.4551990032196045, |
|
"learning_rate": 6.036463642650049e-06, |
|
"loss": 0.3899, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.451171875, |
|
"grad_norm": 1.8738077878952026, |
|
"learning_rate": 6.028724801276167e-06, |
|
"loss": 0.3412, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.45166015625, |
|
"grad_norm": 1.3348729610443115, |
|
"learning_rate": 6.020983385713601e-06, |
|
"loss": 0.3194, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4521484375, |
|
"grad_norm": 1.675868034362793, |
|
"learning_rate": 6.013239415333776e-06, |
|
"loss": 0.338, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.45263671875, |
|
"grad_norm": 1.5089606046676636, |
|
"learning_rate": 6.005492909514507e-06, |
|
"loss": 0.3502, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.453125, |
|
"grad_norm": 1.6367465257644653, |
|
"learning_rate": 5.997743887639959e-06, |
|
"loss": 0.3356, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.45361328125, |
|
"grad_norm": 1.5445111989974976, |
|
"learning_rate": 5.989992369100586e-06, |
|
"loss": 0.3192, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.4541015625, |
|
"grad_norm": 1.2671817541122437, |
|
"learning_rate": 5.982238373293093e-06, |
|
"loss": 0.3282, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.45458984375, |
|
"grad_norm": 1.2266660928726196, |
|
"learning_rate": 5.974481919620386e-06, |
|
"loss": 0.3202, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.455078125, |
|
"grad_norm": 1.5652544498443604, |
|
"learning_rate": 5.966723027491518e-06, |
|
"loss": 0.3502, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.45556640625, |
|
"grad_norm": 1.2947496175765991, |
|
"learning_rate": 5.958961716321644e-06, |
|
"loss": 0.317, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.4560546875, |
|
"grad_norm": 2.053834915161133, |
|
"learning_rate": 5.951198005531974e-06, |
|
"loss": 0.308, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.45654296875, |
|
"grad_norm": 2.342907428741455, |
|
"learning_rate": 5.943431914549721e-06, |
|
"loss": 0.3314, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.45703125, |
|
"grad_norm": 1.5535999536514282, |
|
"learning_rate": 5.9356634628080555e-06, |
|
"loss": 0.3362, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.45751953125, |
|
"grad_norm": 1.607968807220459, |
|
"learning_rate": 5.927892669746054e-06, |
|
"loss": 0.317, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.4580078125, |
|
"grad_norm": 1.268129825592041, |
|
"learning_rate": 5.920119554808651e-06, |
|
"loss": 0.3278, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.45849609375, |
|
"grad_norm": 4.848256587982178, |
|
"learning_rate": 5.912344137446593e-06, |
|
"loss": 0.3448, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.458984375, |
|
"grad_norm": 1.1670955419540405, |
|
"learning_rate": 5.904566437116388e-06, |
|
"loss": 0.2967, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.45947265625, |
|
"grad_norm": 2.250368595123291, |
|
"learning_rate": 5.896786473280255e-06, |
|
"loss": 0.32, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.4599609375, |
|
"grad_norm": 1.5156008005142212, |
|
"learning_rate": 5.889004265406077e-06, |
|
"loss": 0.2914, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.46044921875, |
|
"grad_norm": 1.0980958938598633, |
|
"learning_rate": 5.8812198329673545e-06, |
|
"loss": 0.304, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.4609375, |
|
"grad_norm": 1.7652188539505005, |
|
"learning_rate": 5.873433195443152e-06, |
|
"loss": 0.3497, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.46142578125, |
|
"grad_norm": 1.977793574333191, |
|
"learning_rate": 5.865644372318053e-06, |
|
"loss": 0.3598, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.4619140625, |
|
"grad_norm": 1.490369200706482, |
|
"learning_rate": 5.857853383082112e-06, |
|
"loss": 0.3433, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.46240234375, |
|
"grad_norm": 5.214506149291992, |
|
"learning_rate": 5.8500602472307974e-06, |
|
"loss": 0.3506, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.462890625, |
|
"grad_norm": 1.304093837738037, |
|
"learning_rate": 5.842264984264958e-06, |
|
"loss": 0.3035, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.46337890625, |
|
"grad_norm": 1.2441211938858032, |
|
"learning_rate": 5.834467613690759e-06, |
|
"loss": 0.3308, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.4638671875, |
|
"grad_norm": 1.0881738662719727, |
|
"learning_rate": 5.82666815501964e-06, |
|
"loss": 0.3163, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.46435546875, |
|
"grad_norm": 1.4398066997528076, |
|
"learning_rate": 5.8188666277682695e-06, |
|
"loss": 0.327, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.46484375, |
|
"grad_norm": 1.81572425365448, |
|
"learning_rate": 5.8110630514584854e-06, |
|
"loss": 0.3328, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.46533203125, |
|
"grad_norm": 1.5575212240219116, |
|
"learning_rate": 5.803257445617263e-06, |
|
"loss": 0.3495, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.4658203125, |
|
"grad_norm": 1.3975605964660645, |
|
"learning_rate": 5.795449829776645e-06, |
|
"loss": 0.3448, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.46630859375, |
|
"grad_norm": 1.2950125932693481, |
|
"learning_rate": 5.787640223473713e-06, |
|
"loss": 0.3617, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.466796875, |
|
"grad_norm": 1.3984689712524414, |
|
"learning_rate": 5.779828646250522e-06, |
|
"loss": 0.3608, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.46728515625, |
|
"grad_norm": 1.0765591859817505, |
|
"learning_rate": 5.772015117654065e-06, |
|
"loss": 0.3093, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.4677734375, |
|
"grad_norm": 1.5954604148864746, |
|
"learning_rate": 5.764199657236214e-06, |
|
"loss": 0.3504, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.46826171875, |
|
"grad_norm": 1.6604746580123901, |
|
"learning_rate": 5.756382284553675e-06, |
|
"loss": 0.3096, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 1.3618206977844238, |
|
"learning_rate": 5.7485630191679456e-06, |
|
"loss": 0.3057, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.46923828125, |
|
"grad_norm": 1.217523217201233, |
|
"learning_rate": 5.740741880645248e-06, |
|
"loss": 0.3708, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.4697265625, |
|
"grad_norm": 1.2130963802337646, |
|
"learning_rate": 5.7329188885565e-06, |
|
"loss": 0.321, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.47021484375, |
|
"grad_norm": 1.3064903020858765, |
|
"learning_rate": 5.725094062477256e-06, |
|
"loss": 0.3211, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.470703125, |
|
"grad_norm": 1.5063132047653198, |
|
"learning_rate": 5.717267421987659e-06, |
|
"loss": 0.3307, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.47119140625, |
|
"grad_norm": 1.3585816621780396, |
|
"learning_rate": 5.7094389866723905e-06, |
|
"loss": 0.3631, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.4716796875, |
|
"grad_norm": 1.5815399885177612, |
|
"learning_rate": 5.701608776120627e-06, |
|
"loss": 0.352, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.47216796875, |
|
"grad_norm": 1.4560235738754272, |
|
"learning_rate": 5.6937768099259845e-06, |
|
"loss": 0.3109, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.47265625, |
|
"grad_norm": 1.8057149648666382, |
|
"learning_rate": 5.685943107686476e-06, |
|
"loss": 0.3218, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.47314453125, |
|
"grad_norm": 1.4362132549285889, |
|
"learning_rate": 5.678107689004449e-06, |
|
"loss": 0.3293, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.4736328125, |
|
"grad_norm": 2.0112991333007812, |
|
"learning_rate": 5.670270573486555e-06, |
|
"loss": 0.356, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.47412109375, |
|
"grad_norm": 1.2395293712615967, |
|
"learning_rate": 5.662431780743691e-06, |
|
"loss": 0.3439, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.474609375, |
|
"grad_norm": 1.4867768287658691, |
|
"learning_rate": 5.6545913303909495e-06, |
|
"loss": 0.3767, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.47509765625, |
|
"grad_norm": 1.210928201675415, |
|
"learning_rate": 5.646749242047567e-06, |
|
"loss": 0.3259, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.4755859375, |
|
"grad_norm": 1.157676100730896, |
|
"learning_rate": 5.6389055353368826e-06, |
|
"loss": 0.336, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.47607421875, |
|
"grad_norm": 1.485719919204712, |
|
"learning_rate": 5.631060229886287e-06, |
|
"loss": 0.3121, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4765625, |
|
"grad_norm": 1.1137949228286743, |
|
"learning_rate": 5.6232133453271676e-06, |
|
"loss": 0.3362, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.47705078125, |
|
"grad_norm": 1.213346004486084, |
|
"learning_rate": 5.615364901294863e-06, |
|
"loss": 0.3194, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.4775390625, |
|
"grad_norm": 1.3590606451034546, |
|
"learning_rate": 5.607514917428618e-06, |
|
"loss": 0.3484, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.47802734375, |
|
"grad_norm": 2.0311455726623535, |
|
"learning_rate": 5.599663413371527e-06, |
|
"loss": 0.3419, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.478515625, |
|
"grad_norm": 1.195672869682312, |
|
"learning_rate": 5.5918104087704925e-06, |
|
"loss": 0.339, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.47900390625, |
|
"grad_norm": 1.8912562131881714, |
|
"learning_rate": 5.583955923276163e-06, |
|
"loss": 0.3427, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.4794921875, |
|
"grad_norm": 2.002305030822754, |
|
"learning_rate": 5.576099976542904e-06, |
|
"loss": 0.3595, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.47998046875, |
|
"grad_norm": 1.4438331127166748, |
|
"learning_rate": 5.56824258822873e-06, |
|
"loss": 0.3632, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.48046875, |
|
"grad_norm": 1.366222620010376, |
|
"learning_rate": 5.560383777995264e-06, |
|
"loss": 0.3188, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.48095703125, |
|
"grad_norm": 1.3330532312393188, |
|
"learning_rate": 5.552523565507689e-06, |
|
"loss": 0.3262, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.4814453125, |
|
"grad_norm": 1.5084117650985718, |
|
"learning_rate": 5.544661970434696e-06, |
|
"loss": 0.325, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.48193359375, |
|
"grad_norm": 1.0425949096679688, |
|
"learning_rate": 5.536799012448435e-06, |
|
"loss": 0.315, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.482421875, |
|
"grad_norm": 2.695110559463501, |
|
"learning_rate": 5.528934711224467e-06, |
|
"loss": 0.3166, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.48291015625, |
|
"grad_norm": 1.3446696996688843, |
|
"learning_rate": 5.521069086441715e-06, |
|
"loss": 0.3437, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.4833984375, |
|
"grad_norm": 1.360203742980957, |
|
"learning_rate": 5.513202157782411e-06, |
|
"loss": 0.3472, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.48388671875, |
|
"grad_norm": 1.3492072820663452, |
|
"learning_rate": 5.505333944932053e-06, |
|
"loss": 0.3363, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.484375, |
|
"grad_norm": 1.1588752269744873, |
|
"learning_rate": 5.497464467579351e-06, |
|
"loss": 0.338, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.48486328125, |
|
"grad_norm": 1.4233770370483398, |
|
"learning_rate": 5.48959374541618e-06, |
|
"loss": 0.336, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.4853515625, |
|
"grad_norm": 1.3421063423156738, |
|
"learning_rate": 5.4817217981375286e-06, |
|
"loss": 0.324, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.48583984375, |
|
"grad_norm": 1.6678565740585327, |
|
"learning_rate": 5.473848645441452e-06, |
|
"loss": 0.3189, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.486328125, |
|
"grad_norm": 1.912955641746521, |
|
"learning_rate": 5.465974307029021e-06, |
|
"loss": 0.3643, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.48681640625, |
|
"grad_norm": 2.0670387744903564, |
|
"learning_rate": 5.458098802604273e-06, |
|
"loss": 0.332, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.4873046875, |
|
"grad_norm": 2.6159446239471436, |
|
"learning_rate": 5.450222151874166e-06, |
|
"loss": 0.3674, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.48779296875, |
|
"grad_norm": 1.3627862930297852, |
|
"learning_rate": 5.442344374548524e-06, |
|
"loss": 0.3496, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.48828125, |
|
"grad_norm": 1.4907851219177246, |
|
"learning_rate": 5.43446549033999e-06, |
|
"loss": 0.3475, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 2048, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.000488202654974e+18, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|