Training in progress, step 337, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +690 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2e8037fd1742aafeefe9394eb6eb3056e379cec6904dd227fdd37e6f723d139
 size 67126760

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ff032c2b84a94327628cf41dd74556da8b5604505cb11b3a7fbcd44644cbb23
 size 67126760

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c117807c6d7ec4eab79023545c24abd813201ccbb0d506a573fa5dc435b019a
 size 134325882

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ab626055373aac8d72bf80d61010fbfe74b3561c5f47454fa675333369173bd
 size 134325882

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b355518e25eb3f8f0a23ea4bbd5bf062b989e7cbba231f8b4b77cf35bb4106d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ddb0dc9aaf9cc0dc4a6b0020adae3a2c77f9db62a9fc24dadaa9a18d3a18470
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0bd0e7decbf7c91ab9fd757f5ce5ae4cb006710e4bd76818c20a5be991c1f90
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:de435905b02e6e14253beeb6db9bdb52440d05130914ca4da5e77f7ebf47b817
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7076239822353811,
   "eval_steps": 500,
-  "global_step": 239,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1680,6 +1680,692 @@
       "learning_rate": 3.657954557919183e-05,
       "loss": 1.6068,
       "step": 239
     }
   ],
   "logging_steps": 1,
@@ -1694,12 +2380,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.142671224386355e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.997779422649889,
   "eval_steps": 500,
+  "global_step": 337,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.657954557919183e-05,
       "loss": 1.6068,
       "step": 239
+    },
+    {
+      "epoch": 0.7105847520355293,
+      "grad_norm": 0.6972988843917847,
+      "learning_rate": 3.5942280889623026e-05,
+      "loss": 0.6496,
+      "step": 240
+    },
+    {
+      "epoch": 0.7135455218356773,
+      "grad_norm": 0.620529055595398,
+      "learning_rate": 3.5307486283103966e-05,
+      "loss": 0.531,
+      "step": 241
+    },
+    {
+      "epoch": 0.7165062916358254,
+      "grad_norm": 0.71197110414505,
+      "learning_rate": 3.467527329945026e-05,
+      "loss": 0.6825,
+      "step": 242
+    },
+    {
+      "epoch": 0.7194670614359734,
+      "grad_norm": 0.6315649151802063,
+      "learning_rate": 3.404575302486039e-05,
+      "loss": 0.4259,
+      "step": 243
+    },
+    {
+      "epoch": 0.7224278312361214,
+      "grad_norm": 0.6250630617141724,
+      "learning_rate": 3.3419036072396616e-05,
+      "loss": 0.2993,
+      "step": 244
+    },
+    {
+      "epoch": 0.7253886010362695,
+      "grad_norm": 0.6370623111724854,
+      "learning_rate": 3.27952325625493e-05,
+      "loss": 0.3333,
+      "step": 245
+    },
+    {
+      "epoch": 0.7283493708364175,
+      "grad_norm": 0.494165301322937,
+      "learning_rate": 3.2174452103887456e-05,
+      "loss": 0.1763,
+      "step": 246
+    },
+    {
+      "epoch": 0.7313101406365655,
+      "grad_norm": 0.4754185676574707,
+      "learning_rate": 3.1556803773799614e-05,
+      "loss": 0.116,
+      "step": 247
+    },
+    {
+      "epoch": 0.7342709104367136,
+      "grad_norm": 0.5947216749191284,
+      "learning_rate": 3.094239609932764e-05,
+      "loss": 0.1401,
+      "step": 248
+    },
+    {
+      "epoch": 0.7372316802368616,
+      "grad_norm": 0.7304718494415283,
+      "learning_rate": 3.0331337038097597e-05,
+      "loss": 0.1343,
+      "step": 249
+    },
+    {
+      "epoch": 0.7401924500370096,
+      "grad_norm": 1.4958913326263428,
+      "learning_rate": 2.9723733959350307e-05,
+      "loss": 0.2833,
+      "step": 250
+    },
+    {
+      "epoch": 0.7431532198371577,
+      "grad_norm": 1.9902845621109009,
+      "learning_rate": 2.911969362507574e-05,
+      "loss": 2.3228,
+      "step": 251
+    },
+    {
+      "epoch": 0.7461139896373057,
+      "grad_norm": 1.4703550338745117,
+      "learning_rate": 2.8519322171253602e-05,
+      "loss": 1.8494,
+      "step": 252
+    },
+    {
+      "epoch": 0.7490747594374537,
+      "grad_norm": 1.096100926399231,
+      "learning_rate": 2.7922725089204426e-05,
+      "loss": 1.3966,
+      "step": 253
+    },
+    {
+      "epoch": 0.7520355292376018,
+      "grad_norm": 1.069754719734192,
+      "learning_rate": 2.733000720705341e-05,
+      "loss": 1.512,
+      "step": 254
+    },
+    {
+      "epoch": 0.7549962990377498,
+      "grad_norm": 1.1707820892333984,
+      "learning_rate": 2.674127267131131e-05,
+      "loss": 1.7061,
+      "step": 255
+    },
+    {
+      "epoch": 0.7579570688378978,
+      "grad_norm": 1.0361454486846924,
+      "learning_rate": 2.6156624928574707e-05,
+      "loss": 1.2677,
+      "step": 256
+    },
+    {
+      "epoch": 0.7609178386380459,
+      "grad_norm": 1.0314826965332031,
+      "learning_rate": 2.5576166707349385e-05,
+      "loss": 1.7485,
+      "step": 257
+    },
+    {
+      "epoch": 0.7638786084381939,
+      "grad_norm": 1.0188485383987427,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 1.7111,
+      "step": 258
+    },
+    {
+      "epoch": 0.7668393782383419,
+      "grad_norm": 1.0937459468841553,
+      "learning_rate": 2.4428226044828896e-05,
+      "loss": 1.9624,
+      "step": 259
+    },
+    {
+      "epoch": 0.7698001480384901,
+      "grad_norm": 1.023545742034912,
+      "learning_rate": 2.3860945308287552e-05,
+      "loss": 1.3442,
+      "step": 260
+    },
+    {
+      "epoch": 0.7727609178386381,
+      "grad_norm": 1.0937973260879517,
+      "learning_rate": 2.3298257467323604e-05,
+      "loss": 1.9485,
+      "step": 261
+    },
+    {
+      "epoch": 0.7757216876387861,
+      "grad_norm": 1.153558373451233,
+      "learning_rate": 2.2740261391866637e-05,
+      "loss": 1.8108,
+      "step": 262
+    },
+    {
+      "epoch": 0.7786824574389342,
+      "grad_norm": 1.0607295036315918,
+      "learning_rate": 2.2187055127455653e-05,
+      "loss": 1.7313,
+      "step": 263
+    },
+    {
+      "epoch": 0.7816432272390822,
+      "grad_norm": 1.0273983478546143,
+      "learning_rate": 2.16387358780116e-05,
+      "loss": 1.4125,
+      "step": 264
+    },
+    {
+      "epoch": 0.7846039970392302,
+      "grad_norm": 1.010677456855774,
+      "learning_rate": 2.1095399988757574e-05,
+      "loss": 1.6405,
+      "step": 265
+    },
+    {
+      "epoch": 0.7875647668393783,
+      "grad_norm": 1.0559284687042236,
+      "learning_rate": 2.0557142929290023e-05,
+      "loss": 1.9992,
+      "step": 266
+    },
+    {
+      "epoch": 0.7905255366395263,
+      "grad_norm": 1.1667604446411133,
+      "learning_rate": 2.002405927680374e-05,
+      "loss": 1.6065,
+      "step": 267
+    },
+    {
+      "epoch": 0.7934863064396743,
+      "grad_norm": 0.968020498752594,
+      "learning_rate": 1.9496242699473783e-05,
+      "loss": 1.6283,
+      "step": 268
+    },
+    {
+      "epoch": 0.7964470762398224,
+      "grad_norm": 1.1873533725738525,
+      "learning_rate": 1.897378593999693e-05,
+      "loss": 1.622,
+      "step": 269
+    },
+    {
+      "epoch": 0.7994078460399704,
+      "grad_norm": 1.0366755723953247,
+      "learning_rate": 1.8456780799295886e-05,
+      "loss": 1.7705,
+      "step": 270
+    },
+    {
+      "epoch": 0.8023686158401184,
+      "grad_norm": 0.9406031966209412,
+      "learning_rate": 1.794531812038901e-05,
+      "loss": 1.3649,
+      "step": 271
+    },
+    {
+      "epoch": 0.8053293856402665,
+      "grad_norm": 1.1950721740722656,
+      "learning_rate": 1.743948777242814e-05,
+      "loss": 2.0907,
+      "step": 272
+    },
+    {
+      "epoch": 0.8082901554404145,
+      "grad_norm": 0.8741153478622437,
+      "learning_rate": 1.6939378634907815e-05,
+      "loss": 1.3801,
+      "step": 273
+    },
+    {
+      "epoch": 0.8112509252405625,
+      "grad_norm": 0.9927524924278259,
+      "learning_rate": 1.6445078582048155e-05,
+      "loss": 1.6127,
+      "step": 274
+    },
+    {
+      "epoch": 0.8142116950407106,
+      "grad_norm": 1.0479118824005127,
+      "learning_rate": 1.5956674467354537e-05,
+      "loss": 2.0404,
+      "step": 275
+    },
+    {
+      "epoch": 0.8171724648408586,
+      "grad_norm": 0.9169769883155823,
+      "learning_rate": 1.5474252108356474e-05,
+      "loss": 1.5797,
+      "step": 276
+    },
+    {
+      "epoch": 0.8201332346410066,
+      "grad_norm": 0.9381272196769714,
+      "learning_rate": 1.4997896271528739e-05,
+      "loss": 1.0802,
+      "step": 277
+    },
+    {
+      "epoch": 0.8230940044411547,
+      "grad_norm": 0.9167252779006958,
+      "learning_rate": 1.452769065739688e-05,
+      "loss": 1.3707,
+      "step": 278
+    },
+    {
+      "epoch": 0.8260547742413027,
+      "grad_norm": 1.007392168045044,
+      "learning_rate": 1.4063717885830374e-05,
+      "loss": 1.79,
+      "step": 279
+    },
+    {
+      "epoch": 0.8290155440414507,
+      "grad_norm": 1.0070596933364868,
+      "learning_rate": 1.3606059481525296e-05,
+      "loss": 1.7477,
+      "step": 280
+    },
+    {
+      "epoch": 0.8319763138415989,
+      "grad_norm": 1.103658676147461,
+      "learning_rate": 1.315479585967978e-05,
+      "loss": 1.8807,
+      "step": 281
+    },
+    {
+      "epoch": 0.8349370836417469,
+      "grad_norm": 0.742683470249176,
+      "learning_rate": 1.2710006311864104e-05,
+      "loss": 0.99,
+      "step": 282
+    },
+    {
+      "epoch": 0.8378978534418949,
+      "grad_norm": 0.8142567276954651,
+      "learning_rate": 1.2271768992088489e-05,
+      "loss": 1.1157,
+      "step": 283
+    },
+    {
+      "epoch": 0.840858623242043,
+      "grad_norm": 0.8256039023399353,
+      "learning_rate": 1.184016090307059e-05,
+      "loss": 1.2341,
+      "step": 284
+    },
+    {
+      "epoch": 0.843819393042191,
+      "grad_norm": 1.0862884521484375,
+      "learning_rate": 1.1415257882705311e-05,
+      "loss": 1.7456,
+      "step": 285
+    },
+    {
+      "epoch": 0.846780162842339,
+      "grad_norm": 1.1565598249435425,
+      "learning_rate": 1.09971345907394e-05,
+      "loss": 1.6508,
+      "step": 286
+    },
+    {
+      "epoch": 0.8497409326424871,
+      "grad_norm": 1.1448659896850586,
+      "learning_rate": 1.0585864495652897e-05,
+      "loss": 1.9964,
+      "step": 287
+    },
+    {
+      "epoch": 0.8527017024426351,
+      "grad_norm": 0.968397855758667,
+      "learning_rate": 1.0181519861750078e-05,
+      "loss": 1.6825,
+      "step": 288
+    },
+    {
+      "epoch": 0.8556624722427831,
+      "grad_norm": 0.9276019930839539,
+      "learning_rate": 9.784171736461762e-06,
+      "loss": 1.2859,
+      "step": 289
+    },
+    {
+      "epoch": 0.8586232420429312,
+      "grad_norm": 0.7998013496398926,
+      "learning_rate": 9.393889937861694e-06,
+      "loss": 0.6289,
+      "step": 290
+    },
+    {
+      "epoch": 0.8615840118430792,
+      "grad_norm": 0.6690629124641418,
+      "learning_rate": 9.010743042398684e-06,
+      "loss": 0.581,
+      "step": 291
+    },
+    {
+      "epoch": 0.8645447816432272,
+      "grad_norm": 0.6341821551322937,
+      "learning_rate": 8.634798372847148e-06,
+      "loss": 0.3543,
+      "step": 292
+    },
+    {
+      "epoch": 0.8675055514433753,
+      "grad_norm": 0.5753948092460632,
+      "learning_rate": 8.266121986477699e-06,
+      "loss": 0.3731,
+      "step": 293
+    },
+    {
+      "epoch": 0.8704663212435233,
+      "grad_norm": 0.7309054136276245,
+      "learning_rate": 7.904778663450324e-06,
+      "loss": 0.6859,
+      "step": 294
+    },
+    {
+      "epoch": 0.8734270910436713,
+      "grad_norm": 0.5517750382423401,
+      "learning_rate": 7.550831895431798e-06,
+      "loss": 0.2878,
+      "step": 295
+    },
+    {
+      "epoch": 0.8763878608438194,
+      "grad_norm": 0.5744172930717468,
+      "learning_rate": 7.204343874439578e-06,
+      "loss": 0.202,
+      "step": 296
+    },
+    {
+      "epoch": 0.8793486306439674,
+      "grad_norm": 0.4893665909767151,
+      "learning_rate": 6.865375481914016e-06,
+      "loss": 0.1466,
+      "step": 297
+    },
+    {
+      "epoch": 0.8823094004441154,
+      "grad_norm": 0.503653347492218,
+      "learning_rate": 6.533986278020876e-06,
+      "loss": 0.1117,
+      "step": 298
+    },
+    {
+      "epoch": 0.8852701702442635,
+      "grad_norm": 0.754020631313324,
+      "learning_rate": 6.210234491186079e-06,
+      "loss": 0.1614,
+      "step": 299
+    },
+    {
+      "epoch": 0.8882309400444115,
+      "grad_norm": 1.9576098918914795,
+      "learning_rate": 5.894177007864271e-06,
+      "loss": 0.5595,
+      "step": 300
+    },
+    {
+      "epoch": 0.8911917098445595,
+      "grad_norm": 0.9034455418586731,
+      "learning_rate": 5.585869362543416e-06,
+      "loss": 1.3168,
+      "step": 301
+    },
+    {
+      "epoch": 0.8941524796447077,
+      "grad_norm": 1.1294760704040527,
+      "learning_rate": 5.285365727986707e-06,
+      "loss": 1.9353,
+      "step": 302
+    },
+    {
+      "epoch": 0.8971132494448557,
+      "grad_norm": 1.308358073234558,
+      "learning_rate": 4.9927189057139665e-06,
+      "loss": 2.1867,
+      "step": 303
+    },
+    {
+      "epoch": 0.9000740192450037,
+      "grad_norm": 1.1667540073394775,
+      "learning_rate": 4.707980316723837e-06,
+      "loss": 2.2503,
+      "step": 304
+    },
+    {
+      "epoch": 0.9030347890451518,
+      "grad_norm": 1.0320072174072266,
+      "learning_rate": 4.4311999924586065e-06,
+      "loss": 1.5944,
+      "step": 305
+    },
+    {
+      "epoch": 0.9059955588452998,
+      "grad_norm": 1.141965627670288,
+      "learning_rate": 4.16242656601315e-06,
+      "loss": 1.8177,
+      "step": 306
+    },
+    {
+      "epoch": 0.9089563286454478,
+      "grad_norm": 0.9115905165672302,
+      "learning_rate": 3.901707263589671e-06,
+      "loss": 1.436,
+      "step": 307
+    },
+    {
+      "epoch": 0.9119170984455959,
+      "grad_norm": 0.9493201375007629,
+      "learning_rate": 3.6490878961994878e-06,
+      "loss": 1.9393,
+      "step": 308
+    },
+    {
+      "epoch": 0.9148778682457439,
+      "grad_norm": 1.2201560735702515,
+      "learning_rate": 3.4046128516136755e-06,
+      "loss": 2.0329,
+      "step": 309
+    },
+    {
+      "epoch": 0.9178386380458919,
+      "grad_norm": 1.1415530443191528,
+      "learning_rate": 3.1683250865636114e-06,
+      "loss": 1.7484,
+      "step": 310
+    },
+    {
+      "epoch": 0.92079940784604,
+      "grad_norm": 1.055849313735962,
+      "learning_rate": 2.9402661191930804e-06,
+      "loss": 1.7875,
+      "step": 311
+    },
+    {
+      "epoch": 0.923760177646188,
+      "grad_norm": 0.9362370371818542,
+      "learning_rate": 2.7204760217631074e-06,
+      "loss": 1.2803,
+      "step": 312
+    },
+    {
+      "epoch": 0.926720947446336,
+      "grad_norm": 1.1764779090881348,
+      "learning_rate": 2.5089934136108664e-06,
+      "loss": 1.6859,
+      "step": 313
+    },
+    {
+      "epoch": 0.9296817172464841,
+      "grad_norm": 1.2218987941741943,
+      "learning_rate": 2.30585545436387e-06,
+      "loss": 2.025,
+      "step": 314
+    },
+    {
+      "epoch": 0.9326424870466321,
+      "grad_norm": 1.0182489156723022,
+      "learning_rate": 2.1110978374106192e-06,
+      "loss": 1.3178,
+      "step": 315
+    },
+    {
+      "epoch": 0.9356032568467801,
+      "grad_norm": 1.1746158599853516,
+      "learning_rate": 1.9247547836289793e-06,
+      "loss": 2.0641,
+      "step": 316
+    },
+    {
+      "epoch": 0.9385640266469282,
+      "grad_norm": 1.0268217325210571,
+      "learning_rate": 1.7468590353731495e-06,
+      "loss": 1.5677,
+      "step": 317
+    },
+    {
+      "epoch": 0.9415247964470762,
+      "grad_norm": 1.1525732278823853,
+      "learning_rate": 1.5774418507205679e-06,
+      "loss": 1.8057,
+      "step": 318
+    },
+    {
+      "epoch": 0.9444855662472242,
+      "grad_norm": 0.9357332587242126,
+      "learning_rate": 1.4165329979794973e-06,
+      "loss": 1.3016,
+      "step": 319
+    },
+    {
+      "epoch": 0.9474463360473723,
+      "grad_norm": 0.9553401470184326,
+      "learning_rate": 1.2641607504584928e-06,
+      "loss": 1.5646,
+      "step": 320
+    },
+    {
+      "epoch": 0.9504071058475203,
+      "grad_norm": 0.9898755550384521,
+      "learning_rate": 1.1203518814984214e-06,
+      "loss": 1.5453,
+      "step": 321
+    },
+    {
+      "epoch": 0.9533678756476683,
+      "grad_norm": 1.019761085510254,
+      "learning_rate": 9.851316597681958e-07,
+      "loss": 1.5678,
+      "step": 322
+    },
+    {
+      "epoch": 0.9563286454478165,
+      "grad_norm": 1.0154638290405273,
+      "learning_rate": 8.585238448247435e-07,
+      "loss": 1.6164,
+      "step": 323
+    },
+    {
+      "epoch": 0.9592894152479645,
+      "grad_norm": 1.1107819080352783,
+      "learning_rate": 7.405506829382735e-07,
+      "loss": 1.8083,
+      "step": 324
+    },
+    {
+      "epoch": 0.9622501850481125,
+      "grad_norm": 1.2360882759094238,
+      "learning_rate": 6.312329031833319e-07,
+      "loss": 2.0795,
+      "step": 325
+    },
+    {
+      "epoch": 0.9652109548482606,
+      "grad_norm": 0.8985037207603455,
+      "learning_rate": 5.305897137965199e-07,
+      "loss": 1.1981,
+      "step": 326
+    },
+    {
+      "epoch": 0.9681717246484086,
+      "grad_norm": 1.0836095809936523,
+      "learning_rate": 4.386387988014273e-07,
+      "loss": 2.1089,
+      "step": 327
+    },
+    {
+      "epoch": 0.9711324944485566,
+      "grad_norm": 1.0523475408554077,
+      "learning_rate": 3.553963149013295e-07,
+      "loss": 1.8185,
+      "step": 328
+    },
+    {
+      "epoch": 0.9740932642487047,
+      "grad_norm": 1.2510104179382324,
+      "learning_rate": 2.808768886403301e-07,
+      "loss": 2.1252,
+      "step": 329
+    },
+    {
+      "epoch": 0.9770540340488527,
+      "grad_norm": 1.1012005805969238,
+      "learning_rate": 2.1509361383330596e-07,
+      "loss": 1.8081,
+      "step": 330
+    },
+    {
+      "epoch": 0.9800148038490007,
+      "grad_norm": 0.6544106602668762,
+      "learning_rate": 1.580580492652084e-07,
+      "loss": 0.4715,
+      "step": 331
+    },
+    {
+      "epoch": 0.9829755736491488,
+      "grad_norm": 0.6254527568817139,
+      "learning_rate": 1.0978021666005478e-07,
+      "loss": 0.4152,
+      "step": 332
+    },
+    {
+      "epoch": 0.9859363434492968,
+      "grad_norm": 0.6432924866676331,
+      "learning_rate": 7.02685989200258e-08,
+      "loss": 0.3737,
+      "step": 333
+    },
+    {
+      "epoch": 0.9888971132494448,
+      "grad_norm": 0.595024585723877,
+      "learning_rate": 3.953013863490784e-08,
+      "loss": 0.2546,
+      "step": 334
+    },
+    {
+      "epoch": 0.9918578830495929,
+      "grad_norm": 0.4974139332771301,
+      "learning_rate": 1.7570236862241017e-08,
+      "loss": 0.2191,
+      "step": 335
+    },
+    {
+      "epoch": 0.9948186528497409,
+      "grad_norm": 0.5824573636054993,
+      "learning_rate": 4.392752178278281e-09,
+      "loss": 0.1276,
+      "step": 336
+    },
+    {
+      "epoch": 0.997779422649889,
+      "grad_norm": 0.7297351360321045,
+      "learning_rate": 0.0,
+      "loss": 0.1176,
+      "step": 337
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.0172541179920384e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null