|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 19710, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.76103500761035, |
|
"grad_norm": 1.1888039112091064, |
|
"learning_rate": 0.0009746321664129883, |
|
"loss": 5.3071, |
|
"max_memory_allocated (GB)": 5.75, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.08006198116451355, |
|
"eval_loss": 6.240572929382324, |
|
"eval_runtime": 1138.1454, |
|
"eval_samples_per_second": 590.273, |
|
"eval_steps_per_second": 0.577, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 657, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5220700152207, |
|
"grad_norm": 1.0501078367233276, |
|
"learning_rate": 0.0009492643328259766, |
|
"loss": 3.1366, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 1000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.10465945339281382, |
|
"eval_loss": 5.848066329956055, |
|
"eval_runtime": 1134.9424, |
|
"eval_samples_per_second": 591.939, |
|
"eval_steps_per_second": 0.579, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 1314, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.2831050228310503, |
|
"grad_norm": 0.972637414932251, |
|
"learning_rate": 0.0009238964992389651, |
|
"loss": 2.6048, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 1500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.12382538697294054, |
|
"eval_loss": 5.552162170410156, |
|
"eval_runtime": 1132.6347, |
|
"eval_samples_per_second": 593.145, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 1971, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.0441400304414, |
|
"grad_norm": 0.8711762428283691, |
|
"learning_rate": 0.0008985286656519534, |
|
"loss": 2.3103, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 2000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.8051750380517504, |
|
"grad_norm": 0.859586775302887, |
|
"learning_rate": 0.0008731608320649417, |
|
"loss": 1.9918, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 2500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.1300875089496098, |
|
"eval_loss": 5.555095672607422, |
|
"eval_runtime": 1133.2064, |
|
"eval_samples_per_second": 592.846, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 2628, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.566210045662101, |
|
"grad_norm": 0.8556590676307678, |
|
"learning_rate": 0.00084779299847793, |
|
"loss": 1.8353, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 3000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.141504308464954, |
|
"eval_loss": 5.414204120635986, |
|
"eval_runtime": 1132.3487, |
|
"eval_samples_per_second": 593.295, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 3285, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.327245053272451, |
|
"grad_norm": 0.8324838280677795, |
|
"learning_rate": 0.0008224251648909183, |
|
"loss": 1.7262, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 3500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.14951988413511416, |
|
"eval_loss": 5.40610933303833, |
|
"eval_runtime": 1133.7065, |
|
"eval_samples_per_second": 592.585, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 3942, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.0882800608828, |
|
"grad_norm": 0.7686742544174194, |
|
"learning_rate": 0.0007970573313039067, |
|
"loss": 1.6381, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 4000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 6.8493150684931505, |
|
"grad_norm": 0.7999989986419678, |
|
"learning_rate": 0.000771689497716895, |
|
"loss": 1.5135, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 4500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.14675722704248328, |
|
"eval_loss": 5.426120758056641, |
|
"eval_runtime": 1131.9083, |
|
"eval_samples_per_second": 593.526, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 4599, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 7.610350076103501, |
|
"grad_norm": 0.8491269946098328, |
|
"learning_rate": 0.0007463216641298833, |
|
"loss": 1.4225, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 5000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.15733153522462218, |
|
"eval_loss": 5.333346843719482, |
|
"eval_runtime": 1130.7533, |
|
"eval_samples_per_second": 594.132, |
|
"eval_steps_per_second": 0.581, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 5256, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 8.37138508371385, |
|
"grad_norm": 0.7948514819145203, |
|
"learning_rate": 0.0007209538305428717, |
|
"loss": 1.354, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 5500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.16383479429666115, |
|
"eval_loss": 5.220494747161865, |
|
"eval_runtime": 1131.3928, |
|
"eval_samples_per_second": 593.796, |
|
"eval_steps_per_second": 0.581, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 5913, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 9.132420091324201, |
|
"grad_norm": 0.7878388166427612, |
|
"learning_rate": 0.00069558599695586, |
|
"loss": 1.3172, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 6000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 9.89345509893455, |
|
"grad_norm": 0.7506768703460693, |
|
"learning_rate": 0.0006702181633688484, |
|
"loss": 1.2511, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 6500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.17084414356885877, |
|
"eval_loss": 5.212928295135498, |
|
"eval_runtime": 1133.5622, |
|
"eval_samples_per_second": 592.66, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 6570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 10.654490106544902, |
|
"grad_norm": 0.7633622288703918, |
|
"learning_rate": 0.0006448503297818367, |
|
"loss": 1.1742, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 7000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.17239664968287494, |
|
"eval_loss": 5.200212001800537, |
|
"eval_runtime": 1129.7037, |
|
"eval_samples_per_second": 594.684, |
|
"eval_steps_per_second": 0.582, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 7227, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 11.415525114155251, |
|
"grad_norm": 0.7618717551231384, |
|
"learning_rate": 0.000619482496194825, |
|
"loss": 1.1342, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 7500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.17819584797645788, |
|
"eval_loss": 5.163547039031982, |
|
"eval_runtime": 1128.6949, |
|
"eval_samples_per_second": 595.216, |
|
"eval_steps_per_second": 0.582, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 7884, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 12.1765601217656, |
|
"grad_norm": 0.7090550661087036, |
|
"learning_rate": 0.0005941146626078133, |
|
"loss": 1.1111, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 8000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 12.937595129375952, |
|
"grad_norm": 0.7710525393486023, |
|
"learning_rate": 0.0005687468290208016, |
|
"loss": 1.0711, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 8500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.17787879735106435, |
|
"eval_loss": 5.143550872802734, |
|
"eval_runtime": 1133.3031, |
|
"eval_samples_per_second": 592.796, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 8541, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 13.698630136986301, |
|
"grad_norm": 0.7961007952690125, |
|
"learning_rate": 0.00054337899543379, |
|
"loss": 0.9971, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 9000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.18167893935402052, |
|
"eval_loss": 5.107571125030518, |
|
"eval_runtime": 1132.5925, |
|
"eval_samples_per_second": 593.167, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 9198, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 14.459665144596652, |
|
"grad_norm": 0.7081454992294312, |
|
"learning_rate": 0.0005180111618467784, |
|
"loss": 0.9774, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 9500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.19349168002595946, |
|
"eval_loss": 4.9076433181762695, |
|
"eval_runtime": 1135.4783, |
|
"eval_samples_per_second": 591.66, |
|
"eval_steps_per_second": 0.579, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 9855, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 15.220700152207002, |
|
"grad_norm": 0.706643283367157, |
|
"learning_rate": 0.0004926433282597666, |
|
"loss": 0.9457, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 10000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 15.981735159817351, |
|
"grad_norm": 0.7469919323921204, |
|
"learning_rate": 0.0004672754946727549, |
|
"loss": 0.9174, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 10500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.18904255176632923, |
|
"eval_loss": 5.03179407119751, |
|
"eval_runtime": 1127.5619, |
|
"eval_samples_per_second": 595.814, |
|
"eval_steps_per_second": 0.583, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 10512, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 16.7427701674277, |
|
"grad_norm": 0.7092038989067078, |
|
"learning_rate": 0.0004419076610857433, |
|
"loss": 0.8675, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 11000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.19512456517176552, |
|
"eval_loss": 5.039154052734375, |
|
"eval_runtime": 1129.1255, |
|
"eval_samples_per_second": 594.989, |
|
"eval_steps_per_second": 0.582, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 11169, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 17.503805175038053, |
|
"grad_norm": 0.722985029220581, |
|
"learning_rate": 0.0004165398274987316, |
|
"loss": 0.8499, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 11500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.19776069971435672, |
|
"eval_loss": 5.024279594421387, |
|
"eval_runtime": 1134.2545, |
|
"eval_samples_per_second": 592.298, |
|
"eval_steps_per_second": 0.579, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 11826, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 18.264840182648403, |
|
"grad_norm": 0.703146755695343, |
|
"learning_rate": 0.0003911719939117199, |
|
"loss": 0.8262, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 12000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.19716678797946466, |
|
"eval_loss": 5.084349632263184, |
|
"eval_runtime": 1134.1402, |
|
"eval_samples_per_second": 592.358, |
|
"eval_steps_per_second": 0.579, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 12483, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 19.025875190258752, |
|
"grad_norm": 0.6394225358963013, |
|
"learning_rate": 0.00036580416032470827, |
|
"loss": 0.8039, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 12500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 19.7869101978691, |
|
"grad_norm": 0.6850036978721619, |
|
"learning_rate": 0.0003404363267376966, |
|
"loss": 0.7623, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 13000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.20482214650715894, |
|
"eval_loss": 5.0004353523254395, |
|
"eval_runtime": 1132.7333, |
|
"eval_samples_per_second": 593.094, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 13140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 20.54794520547945, |
|
"grad_norm": 0.7249587178230286, |
|
"learning_rate": 0.00031506849315068495, |
|
"loss": 0.7481, |
|
"max_memory_allocated (GB)": 60.21, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 13500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.21318007731272057, |
|
"eval_loss": 4.842759609222412, |
|
"eval_runtime": 1132.229, |
|
"eval_samples_per_second": 593.358, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 13797, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 21.308980213089804, |
|
"grad_norm": 0.651644766330719, |
|
"learning_rate": 0.00028970065956367326, |
|
"loss": 0.7284, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 14000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.2148576174761877, |
|
"eval_loss": 4.846081733703613, |
|
"eval_runtime": 1134.6589, |
|
"eval_samples_per_second": 592.087, |
|
"eval_steps_per_second": 0.579, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 14454, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 22.070015220700153, |
|
"grad_norm": 0.6403504610061646, |
|
"learning_rate": 0.00026433282597666157, |
|
"loss": 0.706, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 14500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 22.831050228310502, |
|
"grad_norm": 0.6770262718200684, |
|
"learning_rate": 0.0002389649923896499, |
|
"loss": 0.6834, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 15000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.2159159413947548, |
|
"eval_loss": 4.8741374015808105, |
|
"eval_runtime": 1130.2975, |
|
"eval_samples_per_second": 594.372, |
|
"eval_steps_per_second": 0.581, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 15111, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 23.59208523592085, |
|
"grad_norm": 0.6229885816574097, |
|
"learning_rate": 0.00021359715880263824, |
|
"loss": 0.6591, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 15500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.2186681789832648, |
|
"eval_loss": 4.89931058883667, |
|
"eval_runtime": 1133.9373, |
|
"eval_samples_per_second": 592.464, |
|
"eval_steps_per_second": 0.579, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 15768, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 24.3531202435312, |
|
"grad_norm": 0.6464186310768127, |
|
"learning_rate": 0.00018822932521562658, |
|
"loss": 0.6447, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 16000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.21962528486180016, |
|
"eval_loss": 4.8415398597717285, |
|
"eval_runtime": 1126.1392, |
|
"eval_samples_per_second": 596.567, |
|
"eval_steps_per_second": 0.583, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 16425, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 25.114155251141554, |
|
"grad_norm": 0.695124626159668, |
|
"learning_rate": 0.00016286149162861492, |
|
"loss": 0.6323, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 16500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 25.875190258751903, |
|
"grad_norm": 0.7219062447547913, |
|
"learning_rate": 0.00013749365804160323, |
|
"loss": 0.6107, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 17000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.22164369166008005, |
|
"eval_loss": 4.859982967376709, |
|
"eval_runtime": 1131.8158, |
|
"eval_samples_per_second": 593.574, |
|
"eval_steps_per_second": 0.58, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 17082, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 26.636225266362253, |
|
"grad_norm": 0.6680580377578735, |
|
"learning_rate": 0.00011212582445459158, |
|
"loss": 0.5958, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 17500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.22447184277861382, |
|
"eval_loss": 4.839137554168701, |
|
"eval_runtime": 1135.3766, |
|
"eval_samples_per_second": 591.713, |
|
"eval_steps_per_second": 0.579, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 17739, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 27.397260273972602, |
|
"grad_norm": 0.6511676907539368, |
|
"learning_rate": 8.67579908675799e-05, |
|
"loss": 0.5836, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 18000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.22654234709749826, |
|
"eval_loss": 4.856111526489258, |
|
"eval_runtime": 1131.6209, |
|
"eval_samples_per_second": 593.677, |
|
"eval_steps_per_second": 0.581, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 18396, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 28.15829528158295, |
|
"grad_norm": 0.6694862842559814, |
|
"learning_rate": 6.139015728056824e-05, |
|
"loss": 0.5713, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 18500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 28.919330289193304, |
|
"grad_norm": 0.6698545813560486, |
|
"learning_rate": 3.6022323693556566e-05, |
|
"loss": 0.5547, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 19000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.2294940437648943, |
|
"eval_loss": 4.793288230895996, |
|
"eval_runtime": 1134.2517, |
|
"eval_samples_per_second": 592.3, |
|
"eval_steps_per_second": 0.579, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 19053, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 29.680365296803654, |
|
"grad_norm": 0.5940834879875183, |
|
"learning_rate": 1.06544901065449e-05, |
|
"loss": 0.547, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 19500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.22931691219483877, |
|
"eval_loss": 4.809002876281738, |
|
"eval_runtime": 1130.6604, |
|
"eval_samples_per_second": 594.181, |
|
"eval_steps_per_second": 0.581, |
|
"max_memory_allocated (GB)": 60.24, |
|
"memory_allocated (GB)": 3.2, |
|
"step": 19710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"max_memory_allocated (GB)": 1.42, |
|
"memory_allocated (GB)": 1.42, |
|
"step": 19710, |
|
"total_flos": 2.9333313524800244e+21, |
|
"total_memory_available (GB)": 94.62, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.2168, |
|
"train_samples_per_second": 92964947.027, |
|
"train_steps_per_second": 90914.595 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 19710, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.9333313524800244e+21, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|