error577 commited on
Commit
d53674b
·
verified ·
1 Parent(s): af82a2c

Training in progress, step 2800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a4bcf338cd5bcfdfca322442e0437d0756fba523cd3c6e969d1ab63c60649d6
3
  size 201361312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a26138f4d4689fa8b725947d495dbade33603970d0d8a9e87c0397e9d80c7171
3
  size 201361312
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d2c3f69d1b1d65ff82b5361a177d936c38774c67f7e7c9e80fbae95019ee61a
3
  size 102537812
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f14d6aacf8d6e623a81646d05b0bb1902fc343ef726e5e2b8552df92f76e54fc
3
  size 102537812
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f17f98bb0e89af780ffffb5792ad56e5907144164a75d1f2d972da513c20656
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf247cd6fc12421a25e03b9f7afc154d921f893b113fe13081b8331bed547105
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bee69a6ac763ec64bcb9204c4b05b515ace2ec25288895042da824c3899c85a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57890bbb36f903ddde92f175a8734aa474875f7006c17bc430e029ee05bcba34
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.602339506149292,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2200",
4
- "epoch": 0.12408872343725764,
5
  "eval_steps": 200,
6
- "global_step": 2600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -18319,6 +18319,1414 @@
18319
  "eval_samples_per_second": 8.737,
18320
  "eval_steps_per_second": 4.374,
18321
  "step": 2600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18322
  }
18323
  ],
18324
  "logging_steps": 1,
@@ -18333,7 +19741,7 @@
18333
  "early_stopping_threshold": 0.0
18334
  },
18335
  "attributes": {
18336
- "early_stopping_patience_counter": 2
18337
  }
18338
  },
18339
  "TrainerControl": {
@@ -18342,12 +19750,12 @@
18342
  "should_evaluate": false,
18343
  "should_log": false,
18344
  "should_save": true,
18345
- "should_training_stop": false
18346
  },
18347
  "attributes": {}
18348
  }
18349
  },
18350
- "total_flos": 2.410799318433792e+17,
18351
  "train_batch_size": 2,
18352
  "trial_name": null,
18353
  "trial_params": null
 
1
  {
2
  "best_metric": 1.602339506149292,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-2200",
4
+ "epoch": 0.13363400985550822,
5
  "eval_steps": 200,
6
+ "global_step": 2800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
18319
  "eval_samples_per_second": 8.737,
18320
  "eval_steps_per_second": 4.374,
18321
  "step": 2600
18322
+ },
18323
+ {
18324
+ "epoch": 0.1241364498693489,
18325
+ "grad_norm": 6.640268802642822,
18326
+ "learning_rate": 0.00019917472854104044,
18327
+ "loss": 6.6788,
18328
+ "step": 2601
18329
+ },
18330
+ {
18331
+ "epoch": 0.12418417630144014,
18332
+ "grad_norm": 6.098337650299072,
18333
+ "learning_rate": 0.0001991740873167308,
18334
+ "loss": 6.4633,
18335
+ "step": 2602
18336
+ },
18337
+ {
18338
+ "epoch": 0.1242319027335314,
18339
+ "grad_norm": 7.400532245635986,
18340
+ "learning_rate": 0.0001991734458444397,
18341
+ "loss": 6.3654,
18342
+ "step": 2603
18343
+ },
18344
+ {
18345
+ "epoch": 0.12427962916562266,
18346
+ "grad_norm": 6.543416500091553,
18347
+ "learning_rate": 0.00019917280412416882,
18348
+ "loss": 6.3276,
18349
+ "step": 2604
18350
+ },
18351
+ {
18352
+ "epoch": 0.1243273555977139,
18353
+ "grad_norm": 9.13476276397705,
18354
+ "learning_rate": 0.00019917216215591972,
18355
+ "loss": 7.5483,
18356
+ "step": 2605
18357
+ },
18358
+ {
18359
+ "epoch": 0.12437508202980516,
18360
+ "grad_norm": 6.472226142883301,
18361
+ "learning_rate": 0.000199171519939694,
18362
+ "loss": 7.5186,
18363
+ "step": 2606
18364
+ },
18365
+ {
18366
+ "epoch": 0.1244228084618964,
18367
+ "grad_norm": 6.057101249694824,
18368
+ "learning_rate": 0.0001991708774754933,
18369
+ "loss": 5.7719,
18370
+ "step": 2607
18371
+ },
18372
+ {
18373
+ "epoch": 0.12447053489398767,
18374
+ "grad_norm": 8.963512420654297,
18375
+ "learning_rate": 0.00019917023476331922,
18376
+ "loss": 6.3314,
18377
+ "step": 2608
18378
+ },
18379
+ {
18380
+ "epoch": 0.12451826132607892,
18381
+ "grad_norm": 6.93634557723999,
18382
+ "learning_rate": 0.00019916959180317335,
18383
+ "loss": 8.5299,
18384
+ "step": 2609
18385
+ },
18386
+ {
18387
+ "epoch": 0.12456598775817017,
18388
+ "grad_norm": 6.162479400634766,
18389
+ "learning_rate": 0.00019916894859505727,
18390
+ "loss": 5.6562,
18391
+ "step": 2610
18392
+ },
18393
+ {
18394
+ "epoch": 0.12461371419026142,
18395
+ "grad_norm": 5.48561429977417,
18396
+ "learning_rate": 0.00019916830513897266,
18397
+ "loss": 6.2665,
18398
+ "step": 2611
18399
+ },
18400
+ {
18401
+ "epoch": 0.12466144062235267,
18402
+ "grad_norm": 3.560473680496216,
18403
+ "learning_rate": 0.00019916766143492106,
18404
+ "loss": 3.051,
18405
+ "step": 2612
18406
+ },
18407
+ {
18408
+ "epoch": 0.12470916705444393,
18409
+ "grad_norm": 7.0898895263671875,
18410
+ "learning_rate": 0.00019916701748290416,
18411
+ "loss": 6.9835,
18412
+ "step": 2613
18413
+ },
18414
+ {
18415
+ "epoch": 0.12475689348653518,
18416
+ "grad_norm": 5.331742286682129,
18417
+ "learning_rate": 0.00019916637328292348,
18418
+ "loss": 6.0956,
18419
+ "step": 2614
18420
+ },
18421
+ {
18422
+ "epoch": 0.12480461991862643,
18423
+ "grad_norm": 8.475786209106445,
18424
+ "learning_rate": 0.00019916572883498068,
18425
+ "loss": 7.2774,
18426
+ "step": 2615
18427
+ },
18428
+ {
18429
+ "epoch": 0.12485234635071768,
18430
+ "grad_norm": 7.285261154174805,
18431
+ "learning_rate": 0.00019916508413907736,
18432
+ "loss": 9.4067,
18433
+ "step": 2616
18434
+ },
18435
+ {
18436
+ "epoch": 0.12490007278280894,
18437
+ "grad_norm": 6.492762565612793,
18438
+ "learning_rate": 0.00019916443919521513,
18439
+ "loss": 6.2198,
18440
+ "step": 2617
18441
+ },
18442
+ {
18443
+ "epoch": 0.1249477992149002,
18444
+ "grad_norm": 5.463344573974609,
18445
+ "learning_rate": 0.00019916379400339564,
18446
+ "loss": 6.9778,
18447
+ "step": 2618
18448
+ },
18449
+ {
18450
+ "epoch": 0.12499552564699144,
18451
+ "grad_norm": 5.793583393096924,
18452
+ "learning_rate": 0.00019916314856362045,
18453
+ "loss": 6.8516,
18454
+ "step": 2619
18455
+ },
18456
+ {
18457
+ "epoch": 0.1250432520790827,
18458
+ "grad_norm": 5.299160003662109,
18459
+ "learning_rate": 0.00019916250287589117,
18460
+ "loss": 5.9155,
18461
+ "step": 2620
18462
+ },
18463
+ {
18464
+ "epoch": 0.12509097851117396,
18465
+ "grad_norm": 5.868138790130615,
18466
+ "learning_rate": 0.00019916185694020947,
18467
+ "loss": 7.445,
18468
+ "step": 2621
18469
+ },
18470
+ {
18471
+ "epoch": 0.1251387049432652,
18472
+ "grad_norm": 6.175563335418701,
18473
+ "learning_rate": 0.00019916121075657693,
18474
+ "loss": 7.4463,
18475
+ "step": 2622
18476
+ },
18477
+ {
18478
+ "epoch": 0.12518643137535646,
18479
+ "grad_norm": 6.524340629577637,
18480
+ "learning_rate": 0.00019916056432499514,
18481
+ "loss": 5.5733,
18482
+ "step": 2623
18483
+ },
18484
+ {
18485
+ "epoch": 0.1252341578074477,
18486
+ "grad_norm": 5.758206367492676,
18487
+ "learning_rate": 0.0001991599176454658,
18488
+ "loss": 5.9721,
18489
+ "step": 2624
18490
+ },
18491
+ {
18492
+ "epoch": 0.12528188423953895,
18493
+ "grad_norm": 5.740457534790039,
18494
+ "learning_rate": 0.00019915927071799042,
18495
+ "loss": 6.1435,
18496
+ "step": 2625
18497
+ },
18498
+ {
18499
+ "epoch": 0.1253296106716302,
18500
+ "grad_norm": 5.045222759246826,
18501
+ "learning_rate": 0.00019915862354257072,
18502
+ "loss": 6.4285,
18503
+ "step": 2626
18504
+ },
18505
+ {
18506
+ "epoch": 0.12537733710372148,
18507
+ "grad_norm": 6.698198318481445,
18508
+ "learning_rate": 0.00019915797611920824,
18509
+ "loss": 7.0418,
18510
+ "step": 2627
18511
+ },
18512
+ {
18513
+ "epoch": 0.12542506353581273,
18514
+ "grad_norm": 5.3449602127075195,
18515
+ "learning_rate": 0.00019915732844790463,
18516
+ "loss": 5.8405,
18517
+ "step": 2628
18518
+ },
18519
+ {
18520
+ "epoch": 0.12547278996790398,
18521
+ "grad_norm": 8.230816841125488,
18522
+ "learning_rate": 0.0001991566805286615,
18523
+ "loss": 8.8511,
18524
+ "step": 2629
18525
+ },
18526
+ {
18527
+ "epoch": 0.12552051639999523,
18528
+ "grad_norm": 4.497639179229736,
18529
+ "learning_rate": 0.0001991560323614805,
18530
+ "loss": 5.1889,
18531
+ "step": 2630
18532
+ },
18533
+ {
18534
+ "epoch": 0.12556824283208648,
18535
+ "grad_norm": 7.639451503753662,
18536
+ "learning_rate": 0.00019915538394636321,
18537
+ "loss": 6.0825,
18538
+ "step": 2631
18539
+ },
18540
+ {
18541
+ "epoch": 0.12561596926417773,
18542
+ "grad_norm": 6.102507591247559,
18543
+ "learning_rate": 0.00019915473528331129,
18544
+ "loss": 5.6234,
18545
+ "step": 2632
18546
+ },
18547
+ {
18548
+ "epoch": 0.12566369569626898,
18549
+ "grad_norm": 5.559940338134766,
18550
+ "learning_rate": 0.00019915408637232634,
18551
+ "loss": 6.1196,
18552
+ "step": 2633
18553
+ },
18554
+ {
18555
+ "epoch": 0.12571142212836023,
18556
+ "grad_norm": 6.262628078460693,
18557
+ "learning_rate": 0.00019915343721341,
18558
+ "loss": 6.5523,
18559
+ "step": 2634
18560
+ },
18561
+ {
18562
+ "epoch": 0.1257591485604515,
18563
+ "grad_norm": 6.31156587600708,
18564
+ "learning_rate": 0.00019915278780656382,
18565
+ "loss": 6.9212,
18566
+ "step": 2635
18567
+ },
18568
+ {
18569
+ "epoch": 0.12580687499254276,
18570
+ "grad_norm": 5.397120475769043,
18571
+ "learning_rate": 0.0001991521381517895,
18572
+ "loss": 7.2011,
18573
+ "step": 2636
18574
+ },
18575
+ {
18576
+ "epoch": 0.125854601424634,
18577
+ "grad_norm": 5.896693229675293,
18578
+ "learning_rate": 0.00019915148824908868,
18579
+ "loss": 6.2354,
18580
+ "step": 2637
18581
+ },
18582
+ {
18583
+ "epoch": 0.12590232785672525,
18584
+ "grad_norm": 5.77620267868042,
18585
+ "learning_rate": 0.00019915083809846293,
18586
+ "loss": 5.7495,
18587
+ "step": 2638
18588
+ },
18589
+ {
18590
+ "epoch": 0.1259500542888165,
18591
+ "grad_norm": 4.042270660400391,
18592
+ "learning_rate": 0.00019915018769991387,
18593
+ "loss": 5.0322,
18594
+ "step": 2639
18595
+ },
18596
+ {
18597
+ "epoch": 0.12599778072090775,
18598
+ "grad_norm": 7.152609825134277,
18599
+ "learning_rate": 0.0001991495370534432,
18600
+ "loss": 7.4355,
18601
+ "step": 2640
18602
+ },
18603
+ {
18604
+ "epoch": 0.126045507152999,
18605
+ "grad_norm": 5.517868518829346,
18606
+ "learning_rate": 0.00019914888615905248,
18607
+ "loss": 5.7128,
18608
+ "step": 2641
18609
+ },
18610
+ {
18611
+ "epoch": 0.12609323358509025,
18612
+ "grad_norm": 6.580667495727539,
18613
+ "learning_rate": 0.00019914823501674333,
18614
+ "loss": 7.5049,
18615
+ "step": 2642
18616
+ },
18617
+ {
18618
+ "epoch": 0.1261409600171815,
18619
+ "grad_norm": 5.372723579406738,
18620
+ "learning_rate": 0.00019914758362651744,
18621
+ "loss": 6.6719,
18622
+ "step": 2643
18623
+ },
18624
+ {
18625
+ "epoch": 0.12618868644927278,
18626
+ "grad_norm": 6.25331974029541,
18627
+ "learning_rate": 0.0001991469319883764,
18628
+ "loss": 8.3484,
18629
+ "step": 2644
18630
+ },
18631
+ {
18632
+ "epoch": 0.12623641288136403,
18633
+ "grad_norm": 7.487483024597168,
18634
+ "learning_rate": 0.00019914628010232182,
18635
+ "loss": 6.6997,
18636
+ "step": 2645
18637
+ },
18638
+ {
18639
+ "epoch": 0.12628413931345528,
18640
+ "grad_norm": 7.138690948486328,
18641
+ "learning_rate": 0.00019914562796835542,
18642
+ "loss": 6.7894,
18643
+ "step": 2646
18644
+ },
18645
+ {
18646
+ "epoch": 0.12633186574554653,
18647
+ "grad_norm": 4.610174179077148,
18648
+ "learning_rate": 0.0001991449755864787,
18649
+ "loss": 4.4906,
18650
+ "step": 2647
18651
+ },
18652
+ {
18653
+ "epoch": 0.12637959217763778,
18654
+ "grad_norm": 6.854246139526367,
18655
+ "learning_rate": 0.00019914432295669337,
18656
+ "loss": 7.2979,
18657
+ "step": 2648
18658
+ },
18659
+ {
18660
+ "epoch": 0.12642731860972903,
18661
+ "grad_norm": 8.805462837219238,
18662
+ "learning_rate": 0.00019914367007900106,
18663
+ "loss": 7.6631,
18664
+ "step": 2649
18665
+ },
18666
+ {
18667
+ "epoch": 0.12647504504182028,
18668
+ "grad_norm": 8.960370063781738,
18669
+ "learning_rate": 0.00019914301695340338,
18670
+ "loss": 6.965,
18671
+ "step": 2650
18672
+ },
18673
+ {
18674
+ "epoch": 0.12652277147391153,
18675
+ "grad_norm": 6.149599552154541,
18676
+ "learning_rate": 0.00019914236357990197,
18677
+ "loss": 5.7883,
18678
+ "step": 2651
18679
+ },
18680
+ {
18681
+ "epoch": 0.1265704979060028,
18682
+ "grad_norm": 5.623478889465332,
18683
+ "learning_rate": 0.00019914170995849847,
18684
+ "loss": 6.1539,
18685
+ "step": 2652
18686
+ },
18687
+ {
18688
+ "epoch": 0.12661822433809405,
18689
+ "grad_norm": 6.2518792152404785,
18690
+ "learning_rate": 0.00019914105608919452,
18691
+ "loss": 6.0241,
18692
+ "step": 2653
18693
+ },
18694
+ {
18695
+ "epoch": 0.1266659507701853,
18696
+ "grad_norm": 6.36302375793457,
18697
+ "learning_rate": 0.00019914040197199174,
18698
+ "loss": 6.4629,
18699
+ "step": 2654
18700
+ },
18701
+ {
18702
+ "epoch": 0.12671367720227655,
18703
+ "grad_norm": 6.179405212402344,
18704
+ "learning_rate": 0.0001991397476068918,
18705
+ "loss": 6.0301,
18706
+ "step": 2655
18707
+ },
18708
+ {
18709
+ "epoch": 0.1267614036343678,
18710
+ "grad_norm": 6.06155252456665,
18711
+ "learning_rate": 0.00019913909299389623,
18712
+ "loss": 6.2193,
18713
+ "step": 2656
18714
+ },
18715
+ {
18716
+ "epoch": 0.12680913006645905,
18717
+ "grad_norm": 6.631749629974365,
18718
+ "learning_rate": 0.0001991384381330068,
18719
+ "loss": 6.6485,
18720
+ "step": 2657
18721
+ },
18722
+ {
18723
+ "epoch": 0.1268568564985503,
18724
+ "grad_norm": 8.34609603881836,
18725
+ "learning_rate": 0.0001991377830242251,
18726
+ "loss": 6.7854,
18727
+ "step": 2658
18728
+ },
18729
+ {
18730
+ "epoch": 0.12690458293064155,
18731
+ "grad_norm": 6.579798221588135,
18732
+ "learning_rate": 0.00019913712766755275,
18733
+ "loss": 7.3782,
18734
+ "step": 2659
18735
+ },
18736
+ {
18737
+ "epoch": 0.12695230936273283,
18738
+ "grad_norm": 6.286202907562256,
18739
+ "learning_rate": 0.00019913647206299138,
18740
+ "loss": 6.2319,
18741
+ "step": 2660
18742
+ },
18743
+ {
18744
+ "epoch": 0.12700003579482408,
18745
+ "grad_norm": 6.803478240966797,
18746
+ "learning_rate": 0.00019913581621054267,
18747
+ "loss": 6.5881,
18748
+ "step": 2661
18749
+ },
18750
+ {
18751
+ "epoch": 0.12704776222691533,
18752
+ "grad_norm": 5.428170680999756,
18753
+ "learning_rate": 0.00019913516011020824,
18754
+ "loss": 6.6578,
18755
+ "step": 2662
18756
+ },
18757
+ {
18758
+ "epoch": 0.12709548865900658,
18759
+ "grad_norm": 5.8981032371521,
18760
+ "learning_rate": 0.0001991345037619897,
18761
+ "loss": 6.648,
18762
+ "step": 2663
18763
+ },
18764
+ {
18765
+ "epoch": 0.12714321509109783,
18766
+ "grad_norm": 6.136375427246094,
18767
+ "learning_rate": 0.00019913384716588876,
18768
+ "loss": 6.4674,
18769
+ "step": 2664
18770
+ },
18771
+ {
18772
+ "epoch": 0.12719094152318908,
18773
+ "grad_norm": 7.333561420440674,
18774
+ "learning_rate": 0.00019913319032190702,
18775
+ "loss": 7.6672,
18776
+ "step": 2665
18777
+ },
18778
+ {
18779
+ "epoch": 0.12723866795528033,
18780
+ "grad_norm": 6.941676616668701,
18781
+ "learning_rate": 0.00019913253323004609,
18782
+ "loss": 6.5824,
18783
+ "step": 2666
18784
+ },
18785
+ {
18786
+ "epoch": 0.12728639438737158,
18787
+ "grad_norm": 5.5567779541015625,
18788
+ "learning_rate": 0.00019913187589030767,
18789
+ "loss": 6.1721,
18790
+ "step": 2667
18791
+ },
18792
+ {
18793
+ "epoch": 0.12733412081946283,
18794
+ "grad_norm": 7.499778747558594,
18795
+ "learning_rate": 0.00019913121830269335,
18796
+ "loss": 7.4446,
18797
+ "step": 2668
18798
+ },
18799
+ {
18800
+ "epoch": 0.1273818472515541,
18801
+ "grad_norm": 8.435744285583496,
18802
+ "learning_rate": 0.00019913056046720485,
18803
+ "loss": 7.4201,
18804
+ "step": 2669
18805
+ },
18806
+ {
18807
+ "epoch": 0.12742957368364535,
18808
+ "grad_norm": 5.2992095947265625,
18809
+ "learning_rate": 0.0001991299023838437,
18810
+ "loss": 6.645,
18811
+ "step": 2670
18812
+ },
18813
+ {
18814
+ "epoch": 0.1274773001157366,
18815
+ "grad_norm": 5.7956223487854,
18816
+ "learning_rate": 0.0001991292440526117,
18817
+ "loss": 5.3825,
18818
+ "step": 2671
18819
+ },
18820
+ {
18821
+ "epoch": 0.12752502654782785,
18822
+ "grad_norm": 5.5730509757995605,
18823
+ "learning_rate": 0.00019912858547351038,
18824
+ "loss": 5.0827,
18825
+ "step": 2672
18826
+ },
18827
+ {
18828
+ "epoch": 0.1275727529799191,
18829
+ "grad_norm": 6.531050205230713,
18830
+ "learning_rate": 0.00019912792664654143,
18831
+ "loss": 6.4528,
18832
+ "step": 2673
18833
+ },
18834
+ {
18835
+ "epoch": 0.12762047941201035,
18836
+ "grad_norm": 5.897129535675049,
18837
+ "learning_rate": 0.00019912726757170644,
18838
+ "loss": 6.4735,
18839
+ "step": 2674
18840
+ },
18841
+ {
18842
+ "epoch": 0.1276682058441016,
18843
+ "grad_norm": 5.176238536834717,
18844
+ "learning_rate": 0.00019912660824900717,
18845
+ "loss": 5.9754,
18846
+ "step": 2675
18847
+ },
18848
+ {
18849
+ "epoch": 0.12771593227619285,
18850
+ "grad_norm": 5.605959415435791,
18851
+ "learning_rate": 0.00019912594867844516,
18852
+ "loss": 5.5582,
18853
+ "step": 2676
18854
+ },
18855
+ {
18856
+ "epoch": 0.12776365870828413,
18857
+ "grad_norm": 6.690275192260742,
18858
+ "learning_rate": 0.0001991252888600221,
18859
+ "loss": 6.5006,
18860
+ "step": 2677
18861
+ },
18862
+ {
18863
+ "epoch": 0.12781138514037538,
18864
+ "grad_norm": 8.319185256958008,
18865
+ "learning_rate": 0.00019912462879373966,
18866
+ "loss": 6.3769,
18867
+ "step": 2678
18868
+ },
18869
+ {
18870
+ "epoch": 0.12785911157246663,
18871
+ "grad_norm": 5.938503265380859,
18872
+ "learning_rate": 0.00019912396847959947,
18873
+ "loss": 5.9018,
18874
+ "step": 2679
18875
+ },
18876
+ {
18877
+ "epoch": 0.12790683800455788,
18878
+ "grad_norm": 6.656704902648926,
18879
+ "learning_rate": 0.0001991233079176032,
18880
+ "loss": 4.3682,
18881
+ "step": 2680
18882
+ },
18883
+ {
18884
+ "epoch": 0.12795456443664913,
18885
+ "grad_norm": 6.898440361022949,
18886
+ "learning_rate": 0.00019912264710775243,
18887
+ "loss": 7.5726,
18888
+ "step": 2681
18889
+ },
18890
+ {
18891
+ "epoch": 0.12800229086874038,
18892
+ "grad_norm": 5.528750419616699,
18893
+ "learning_rate": 0.0001991219860500489,
18894
+ "loss": 4.7983,
18895
+ "step": 2682
18896
+ },
18897
+ {
18898
+ "epoch": 0.12805001730083163,
18899
+ "grad_norm": 6.4117817878723145,
18900
+ "learning_rate": 0.00019912132474449423,
18901
+ "loss": 6.4503,
18902
+ "step": 2683
18903
+ },
18904
+ {
18905
+ "epoch": 0.12809774373292288,
18906
+ "grad_norm": 6.662499904632568,
18907
+ "learning_rate": 0.00019912066319109008,
18908
+ "loss": 7.6737,
18909
+ "step": 2684
18910
+ },
18911
+ {
18912
+ "epoch": 0.12814547016501415,
18913
+ "grad_norm": 6.276695728302002,
18914
+ "learning_rate": 0.0001991200013898381,
18915
+ "loss": 6.7972,
18916
+ "step": 2685
18917
+ },
18918
+ {
18919
+ "epoch": 0.1281931965971054,
18920
+ "grad_norm": 6.167166709899902,
18921
+ "learning_rate": 0.00019911933934073993,
18922
+ "loss": 6.0611,
18923
+ "step": 2686
18924
+ },
18925
+ {
18926
+ "epoch": 0.12824092302919665,
18927
+ "grad_norm": 6.943721294403076,
18928
+ "learning_rate": 0.00019911867704379722,
18929
+ "loss": 6.0425,
18930
+ "step": 2687
18931
+ },
18932
+ {
18933
+ "epoch": 0.1282886494612879,
18934
+ "grad_norm": 6.147932052612305,
18935
+ "learning_rate": 0.0001991180144990117,
18936
+ "loss": 5.8839,
18937
+ "step": 2688
18938
+ },
18939
+ {
18940
+ "epoch": 0.12833637589337915,
18941
+ "grad_norm": 6.889720439910889,
18942
+ "learning_rate": 0.0001991173517063849,
18943
+ "loss": 5.5848,
18944
+ "step": 2689
18945
+ },
18946
+ {
18947
+ "epoch": 0.1283841023254704,
18948
+ "grad_norm": 5.700843811035156,
18949
+ "learning_rate": 0.00019911668866591858,
18950
+ "loss": 5.2723,
18951
+ "step": 2690
18952
+ },
18953
+ {
18954
+ "epoch": 0.12843182875756165,
18955
+ "grad_norm": 6.69716215133667,
18956
+ "learning_rate": 0.0001991160253776144,
18957
+ "loss": 6.8941,
18958
+ "step": 2691
18959
+ },
18960
+ {
18961
+ "epoch": 0.1284795551896529,
18962
+ "grad_norm": 9.107110977172852,
18963
+ "learning_rate": 0.00019911536184147397,
18964
+ "loss": 8.4972,
18965
+ "step": 2692
18966
+ },
18967
+ {
18968
+ "epoch": 0.12852728162174415,
18969
+ "grad_norm": 6.4476776123046875,
18970
+ "learning_rate": 0.00019911469805749895,
18971
+ "loss": 7.1879,
18972
+ "step": 2693
18973
+ },
18974
+ {
18975
+ "epoch": 0.12857500805383543,
18976
+ "grad_norm": 6.365779399871826,
18977
+ "learning_rate": 0.000199114034025691,
18978
+ "loss": 5.6422,
18979
+ "step": 2694
18980
+ },
18981
+ {
18982
+ "epoch": 0.12862273448592668,
18983
+ "grad_norm": 5.086087703704834,
18984
+ "learning_rate": 0.00019911336974605179,
18985
+ "loss": 4.4789,
18986
+ "step": 2695
18987
+ },
18988
+ {
18989
+ "epoch": 0.12867046091801793,
18990
+ "grad_norm": 4.119086265563965,
18991
+ "learning_rate": 0.000199112705218583,
18992
+ "loss": 4.5278,
18993
+ "step": 2696
18994
+ },
18995
+ {
18996
+ "epoch": 0.12871818735010918,
18997
+ "grad_norm": 5.206000804901123,
18998
+ "learning_rate": 0.0001991120404432863,
18999
+ "loss": 5.2369,
19000
+ "step": 2697
19001
+ },
19002
+ {
19003
+ "epoch": 0.12876591378220043,
19004
+ "grad_norm": 4.440513610839844,
19005
+ "learning_rate": 0.00019911137542016333,
19006
+ "loss": 5.9666,
19007
+ "step": 2698
19008
+ },
19009
+ {
19010
+ "epoch": 0.12881364021429167,
19011
+ "grad_norm": 8.883622169494629,
19012
+ "learning_rate": 0.0001991107101492157,
19013
+ "loss": 7.3967,
19014
+ "step": 2699
19015
+ },
19016
+ {
19017
+ "epoch": 0.12886136664638292,
19018
+ "grad_norm": 6.536487579345703,
19019
+ "learning_rate": 0.00019911004463044516,
19020
+ "loss": 6.1909,
19021
+ "step": 2700
19022
+ },
19023
+ {
19024
+ "epoch": 0.12890909307847417,
19025
+ "grad_norm": 4.277031898498535,
19026
+ "learning_rate": 0.00019910937886385334,
19027
+ "loss": 4.1263,
19028
+ "step": 2701
19029
+ },
19030
+ {
19031
+ "epoch": 0.12895681951056545,
19032
+ "grad_norm": 5.570382118225098,
19033
+ "learning_rate": 0.00019910871284944192,
19034
+ "loss": 6.8265,
19035
+ "step": 2702
19036
+ },
19037
+ {
19038
+ "epoch": 0.1290045459426567,
19039
+ "grad_norm": 4.961894989013672,
19040
+ "learning_rate": 0.0001991080465872125,
19041
+ "loss": 4.9882,
19042
+ "step": 2703
19043
+ },
19044
+ {
19045
+ "epoch": 0.12905227237474795,
19046
+ "grad_norm": 5.48018741607666,
19047
+ "learning_rate": 0.00019910738007716683,
19048
+ "loss": 6.3094,
19049
+ "step": 2704
19050
+ },
19051
+ {
19052
+ "epoch": 0.1290999988068392,
19053
+ "grad_norm": 5.8967671394348145,
19054
+ "learning_rate": 0.00019910671331930654,
19055
+ "loss": 7.0458,
19056
+ "step": 2705
19057
+ },
19058
+ {
19059
+ "epoch": 0.12914772523893045,
19060
+ "grad_norm": 7.660339832305908,
19061
+ "learning_rate": 0.00019910604631363327,
19062
+ "loss": 7.6331,
19063
+ "step": 2706
19064
+ },
19065
+ {
19066
+ "epoch": 0.1291954516710217,
19067
+ "grad_norm": 5.007476329803467,
19068
+ "learning_rate": 0.00019910537906014873,
19069
+ "loss": 6.0415,
19070
+ "step": 2707
19071
+ },
19072
+ {
19073
+ "epoch": 0.12924317810311295,
19074
+ "grad_norm": 7.231809616088867,
19075
+ "learning_rate": 0.00019910471155885459,
19076
+ "loss": 6.9033,
19077
+ "step": 2708
19078
+ },
19079
+ {
19080
+ "epoch": 0.1292909045352042,
19081
+ "grad_norm": 7.8866753578186035,
19082
+ "learning_rate": 0.0001991040438097525,
19083
+ "loss": 8.1581,
19084
+ "step": 2709
19085
+ },
19086
+ {
19087
+ "epoch": 0.12933863096729548,
19088
+ "grad_norm": 4.540760517120361,
19089
+ "learning_rate": 0.00019910337581284415,
19090
+ "loss": 3.9487,
19091
+ "step": 2710
19092
+ },
19093
+ {
19094
+ "epoch": 0.12938635739938673,
19095
+ "grad_norm": 6.535691261291504,
19096
+ "learning_rate": 0.0001991027075681312,
19097
+ "loss": 6.8769,
19098
+ "step": 2711
19099
+ },
19100
+ {
19101
+ "epoch": 0.12943408383147798,
19102
+ "grad_norm": 7.321287631988525,
19103
+ "learning_rate": 0.00019910203907561527,
19104
+ "loss": 7.6307,
19105
+ "step": 2712
19106
+ },
19107
+ {
19108
+ "epoch": 0.12948181026356922,
19109
+ "grad_norm": 8.327848434448242,
19110
+ "learning_rate": 0.0001991013703352981,
19111
+ "loss": 6.3343,
19112
+ "step": 2713
19113
+ },
19114
+ {
19115
+ "epoch": 0.12952953669566047,
19116
+ "grad_norm": 4.456166744232178,
19117
+ "learning_rate": 0.00019910070134718133,
19118
+ "loss": 4.3715,
19119
+ "step": 2714
19120
+ },
19121
+ {
19122
+ "epoch": 0.12957726312775172,
19123
+ "grad_norm": 7.4280781745910645,
19124
+ "learning_rate": 0.00019910003211126668,
19125
+ "loss": 7.9487,
19126
+ "step": 2715
19127
+ },
19128
+ {
19129
+ "epoch": 0.12962498955984297,
19130
+ "grad_norm": 6.105531215667725,
19131
+ "learning_rate": 0.00019909936262755572,
19132
+ "loss": 5.3206,
19133
+ "step": 2716
19134
+ },
19135
+ {
19136
+ "epoch": 0.12967271599193422,
19137
+ "grad_norm": 6.178251266479492,
19138
+ "learning_rate": 0.00019909869289605023,
19139
+ "loss": 7.429,
19140
+ "step": 2717
19141
+ },
19142
+ {
19143
+ "epoch": 0.12972044242402547,
19144
+ "grad_norm": 7.50347375869751,
19145
+ "learning_rate": 0.00019909802291675187,
19146
+ "loss": 6.884,
19147
+ "step": 2718
19148
+ },
19149
+ {
19150
+ "epoch": 0.12976816885611675,
19151
+ "grad_norm": 8.008402824401855,
19152
+ "learning_rate": 0.00019909735268966222,
19153
+ "loss": 6.4672,
19154
+ "step": 2719
19155
+ },
19156
+ {
19157
+ "epoch": 0.129815895288208,
19158
+ "grad_norm": 6.246631622314453,
19159
+ "learning_rate": 0.0001990966822147831,
19160
+ "loss": 5.7752,
19161
+ "step": 2720
19162
+ },
19163
+ {
19164
+ "epoch": 0.12986362172029925,
19165
+ "grad_norm": 5.764650821685791,
19166
+ "learning_rate": 0.00019909601149211605,
19167
+ "loss": 6.088,
19168
+ "step": 2721
19169
+ },
19170
+ {
19171
+ "epoch": 0.1299113481523905,
19172
+ "grad_norm": 6.757874488830566,
19173
+ "learning_rate": 0.00019909534052166286,
19174
+ "loss": 6.4215,
19175
+ "step": 2722
19176
+ },
19177
+ {
19178
+ "epoch": 0.12995907458448175,
19179
+ "grad_norm": 7.595560073852539,
19180
+ "learning_rate": 0.0001990946693034251,
19181
+ "loss": 7.4822,
19182
+ "step": 2723
19183
+ },
19184
+ {
19185
+ "epoch": 0.130006801016573,
19186
+ "grad_norm": 6.582478046417236,
19187
+ "learning_rate": 0.00019909399783740454,
19188
+ "loss": 6.6361,
19189
+ "step": 2724
19190
+ },
19191
+ {
19192
+ "epoch": 0.13005452744866425,
19193
+ "grad_norm": 5.431997299194336,
19194
+ "learning_rate": 0.00019909332612360285,
19195
+ "loss": 6.3359,
19196
+ "step": 2725
19197
+ },
19198
+ {
19199
+ "epoch": 0.1301022538807555,
19200
+ "grad_norm": 5.795412540435791,
19201
+ "learning_rate": 0.00019909265416202164,
19202
+ "loss": 7.2463,
19203
+ "step": 2726
19204
+ },
19205
+ {
19206
+ "epoch": 0.13014998031284677,
19207
+ "grad_norm": 6.594300270080566,
19208
+ "learning_rate": 0.00019909198195266266,
19209
+ "loss": 5.7899,
19210
+ "step": 2727
19211
+ },
19212
+ {
19213
+ "epoch": 0.13019770674493802,
19214
+ "grad_norm": 6.408435344696045,
19215
+ "learning_rate": 0.00019909130949552755,
19216
+ "loss": 7.3634,
19217
+ "step": 2728
19218
+ },
19219
+ {
19220
+ "epoch": 0.13024543317702927,
19221
+ "grad_norm": 6.5589919090271,
19222
+ "learning_rate": 0.000199090636790618,
19223
+ "loss": 6.6704,
19224
+ "step": 2729
19225
+ },
19226
+ {
19227
+ "epoch": 0.13029315960912052,
19228
+ "grad_norm": 6.496981620788574,
19229
+ "learning_rate": 0.0001990899638379357,
19230
+ "loss": 6.1406,
19231
+ "step": 2730
19232
+ },
19233
+ {
19234
+ "epoch": 0.13034088604121177,
19235
+ "grad_norm": 4.503755569458008,
19236
+ "learning_rate": 0.00019908929063748235,
19237
+ "loss": 5.0579,
19238
+ "step": 2731
19239
+ },
19240
+ {
19241
+ "epoch": 0.13038861247330302,
19242
+ "grad_norm": 7.117122173309326,
19243
+ "learning_rate": 0.0001990886171892596,
19244
+ "loss": 7.4509,
19245
+ "step": 2732
19246
+ },
19247
+ {
19248
+ "epoch": 0.13043633890539427,
19249
+ "grad_norm": 6.266332149505615,
19250
+ "learning_rate": 0.00019908794349326913,
19251
+ "loss": 6.4409,
19252
+ "step": 2733
19253
+ },
19254
+ {
19255
+ "epoch": 0.13048406533748552,
19256
+ "grad_norm": 6.61918306350708,
19257
+ "learning_rate": 0.00019908726954951267,
19258
+ "loss": 7.2602,
19259
+ "step": 2734
19260
+ },
19261
+ {
19262
+ "epoch": 0.1305317917695768,
19263
+ "grad_norm": 6.399169445037842,
19264
+ "learning_rate": 0.00019908659535799187,
19265
+ "loss": 7.1302,
19266
+ "step": 2735
19267
+ },
19268
+ {
19269
+ "epoch": 0.13057951820166805,
19270
+ "grad_norm": 7.0500898361206055,
19271
+ "learning_rate": 0.00019908592091870842,
19272
+ "loss": 7.6507,
19273
+ "step": 2736
19274
+ },
19275
+ {
19276
+ "epoch": 0.1306272446337593,
19277
+ "grad_norm": 5.893949508666992,
19278
+ "learning_rate": 0.00019908524623166401,
19279
+ "loss": 6.7635,
19280
+ "step": 2737
19281
+ },
19282
+ {
19283
+ "epoch": 0.13067497106585055,
19284
+ "grad_norm": 6.369676113128662,
19285
+ "learning_rate": 0.0001990845712968603,
19286
+ "loss": 7.2699,
19287
+ "step": 2738
19288
+ },
19289
+ {
19290
+ "epoch": 0.1307226974979418,
19291
+ "grad_norm": 6.691352844238281,
19292
+ "learning_rate": 0.00019908389611429905,
19293
+ "loss": 7.8422,
19294
+ "step": 2739
19295
+ },
19296
+ {
19297
+ "epoch": 0.13077042393003305,
19298
+ "grad_norm": 6.010880470275879,
19299
+ "learning_rate": 0.00019908322068398188,
19300
+ "loss": 7.8595,
19301
+ "step": 2740
19302
+ },
19303
+ {
19304
+ "epoch": 0.1308181503621243,
19305
+ "grad_norm": 6.41002893447876,
19306
+ "learning_rate": 0.0001990825450059105,
19307
+ "loss": 6.4199,
19308
+ "step": 2741
19309
+ },
19310
+ {
19311
+ "epoch": 0.13086587679421555,
19312
+ "grad_norm": 6.322947025299072,
19313
+ "learning_rate": 0.0001990818690800866,
19314
+ "loss": 5.793,
19315
+ "step": 2742
19316
+ },
19317
+ {
19318
+ "epoch": 0.1309136032263068,
19319
+ "grad_norm": 6.068185329437256,
19320
+ "learning_rate": 0.0001990811929065119,
19321
+ "loss": 5.8207,
19322
+ "step": 2743
19323
+ },
19324
+ {
19325
+ "epoch": 0.13096132965839807,
19326
+ "grad_norm": 4.704338073730469,
19327
+ "learning_rate": 0.00019908051648518803,
19328
+ "loss": 5.3277,
19329
+ "step": 2744
19330
+ },
19331
+ {
19332
+ "epoch": 0.13100905609048932,
19333
+ "grad_norm": 5.012578964233398,
19334
+ "learning_rate": 0.00019907983981611672,
19335
+ "loss": 5.0888,
19336
+ "step": 2745
19337
+ },
19338
+ {
19339
+ "epoch": 0.13105678252258057,
19340
+ "grad_norm": 6.814549922943115,
19341
+ "learning_rate": 0.00019907916289929966,
19342
+ "loss": 7.5414,
19343
+ "step": 2746
19344
+ },
19345
+ {
19346
+ "epoch": 0.13110450895467182,
19347
+ "grad_norm": 5.401822090148926,
19348
+ "learning_rate": 0.00019907848573473853,
19349
+ "loss": 5.8487,
19350
+ "step": 2747
19351
+ },
19352
+ {
19353
+ "epoch": 0.13115223538676307,
19354
+ "grad_norm": 6.063152313232422,
19355
+ "learning_rate": 0.00019907780832243506,
19356
+ "loss": 7.1534,
19357
+ "step": 2748
19358
+ },
19359
+ {
19360
+ "epoch": 0.13119996181885432,
19361
+ "grad_norm": 4.4824395179748535,
19362
+ "learning_rate": 0.00019907713066239088,
19363
+ "loss": 4.1908,
19364
+ "step": 2749
19365
+ },
19366
+ {
19367
+ "epoch": 0.13124768825094557,
19368
+ "grad_norm": 7.166471004486084,
19369
+ "learning_rate": 0.00019907645275460775,
19370
+ "loss": 7.4206,
19371
+ "step": 2750
19372
+ },
19373
+ {
19374
+ "epoch": 0.13129541468303682,
19375
+ "grad_norm": 8.25527286529541,
19376
+ "learning_rate": 0.0001990757745990873,
19377
+ "loss": 8.5766,
19378
+ "step": 2751
19379
+ },
19380
+ {
19381
+ "epoch": 0.1313431411151281,
19382
+ "grad_norm": 5.488870143890381,
19383
+ "learning_rate": 0.0001990750961958313,
19384
+ "loss": 5.4217,
19385
+ "step": 2752
19386
+ },
19387
+ {
19388
+ "epoch": 0.13139086754721935,
19389
+ "grad_norm": 7.472160339355469,
19390
+ "learning_rate": 0.00019907441754484138,
19391
+ "loss": 7.9112,
19392
+ "step": 2753
19393
+ },
19394
+ {
19395
+ "epoch": 0.1314385939793106,
19396
+ "grad_norm": 6.098803997039795,
19397
+ "learning_rate": 0.00019907373864611925,
19398
+ "loss": 5.8821,
19399
+ "step": 2754
19400
+ },
19401
+ {
19402
+ "epoch": 0.13148632041140185,
19403
+ "grad_norm": 5.743438243865967,
19404
+ "learning_rate": 0.00019907305949966664,
19405
+ "loss": 6.1745,
19406
+ "step": 2755
19407
+ },
19408
+ {
19409
+ "epoch": 0.1315340468434931,
19410
+ "grad_norm": 5.455843448638916,
19411
+ "learning_rate": 0.00019907238010548523,
19412
+ "loss": 5.7785,
19413
+ "step": 2756
19414
+ },
19415
+ {
19416
+ "epoch": 0.13158177327558435,
19417
+ "grad_norm": 5.50465726852417,
19418
+ "learning_rate": 0.00019907170046357673,
19419
+ "loss": 5.6001,
19420
+ "step": 2757
19421
+ },
19422
+ {
19423
+ "epoch": 0.1316294997076756,
19424
+ "grad_norm": 6.299639701843262,
19425
+ "learning_rate": 0.0001990710205739428,
19426
+ "loss": 8.1939,
19427
+ "step": 2758
19428
+ },
19429
+ {
19430
+ "epoch": 0.13167722613976685,
19431
+ "grad_norm": 6.706847667694092,
19432
+ "learning_rate": 0.0001990703404365852,
19433
+ "loss": 6.7525,
19434
+ "step": 2759
19435
+ },
19436
+ {
19437
+ "epoch": 0.13172495257185812,
19438
+ "grad_norm": 8.025195121765137,
19439
+ "learning_rate": 0.00019906966005150557,
19440
+ "loss": 7.904,
19441
+ "step": 2760
19442
+ },
19443
+ {
19444
+ "epoch": 0.13177267900394937,
19445
+ "grad_norm": 9.603436470031738,
19446
+ "learning_rate": 0.00019906897941870564,
19447
+ "loss": 6.8963,
19448
+ "step": 2761
19449
+ },
19450
+ {
19451
+ "epoch": 0.13182040543604062,
19452
+ "grad_norm": 6.5206217765808105,
19453
+ "learning_rate": 0.00019906829853818714,
19454
+ "loss": 6.6906,
19455
+ "step": 2762
19456
+ },
19457
+ {
19458
+ "epoch": 0.13186813186813187,
19459
+ "grad_norm": 5.41235876083374,
19460
+ "learning_rate": 0.00019906761740995173,
19461
+ "loss": 6.1441,
19462
+ "step": 2763
19463
+ },
19464
+ {
19465
+ "epoch": 0.13191585830022312,
19466
+ "grad_norm": 5.746955871582031,
19467
+ "learning_rate": 0.00019906693603400112,
19468
+ "loss": 5.5281,
19469
+ "step": 2764
19470
+ },
19471
+ {
19472
+ "epoch": 0.13196358473231437,
19473
+ "grad_norm": 6.709540843963623,
19474
+ "learning_rate": 0.000199066254410337,
19475
+ "loss": 6.3966,
19476
+ "step": 2765
19477
+ },
19478
+ {
19479
+ "epoch": 0.13201131116440562,
19480
+ "grad_norm": 5.8970417976379395,
19481
+ "learning_rate": 0.00019906557253896115,
19482
+ "loss": 6.9662,
19483
+ "step": 2766
19484
+ },
19485
+ {
19486
+ "epoch": 0.13205903759649687,
19487
+ "grad_norm": 5.076374053955078,
19488
+ "learning_rate": 0.00019906489041987517,
19489
+ "loss": 5.1472,
19490
+ "step": 2767
19491
+ },
19492
+ {
19493
+ "epoch": 0.13210676402858812,
19494
+ "grad_norm": 6.5956902503967285,
19495
+ "learning_rate": 0.00019906420805308083,
19496
+ "loss": 7.3836,
19497
+ "step": 2768
19498
+ },
19499
+ {
19500
+ "epoch": 0.1321544904606794,
19501
+ "grad_norm": 5.3043928146362305,
19502
+ "learning_rate": 0.00019906352543857983,
19503
+ "loss": 6.8485,
19504
+ "step": 2769
19505
+ },
19506
+ {
19507
+ "epoch": 0.13220221689277065,
19508
+ "grad_norm": 7.224258899688721,
19509
+ "learning_rate": 0.00019906284257637386,
19510
+ "loss": 6.2999,
19511
+ "step": 2770
19512
+ },
19513
+ {
19514
+ "epoch": 0.1322499433248619,
19515
+ "grad_norm": 4.787478923797607,
19516
+ "learning_rate": 0.00019906215946646465,
19517
+ "loss": 4.6901,
19518
+ "step": 2771
19519
+ },
19520
+ {
19521
+ "epoch": 0.13229766975695315,
19522
+ "grad_norm": 5.938333511352539,
19523
+ "learning_rate": 0.0001990614761088539,
19524
+ "loss": 7.4666,
19525
+ "step": 2772
19526
+ },
19527
+ {
19528
+ "epoch": 0.1323453961890444,
19529
+ "grad_norm": 6.651096820831299,
19530
+ "learning_rate": 0.0001990607925035433,
19531
+ "loss": 7.231,
19532
+ "step": 2773
19533
+ },
19534
+ {
19535
+ "epoch": 0.13239312262113564,
19536
+ "grad_norm": 4.987978935241699,
19537
+ "learning_rate": 0.00019906010865053454,
19538
+ "loss": 5.4362,
19539
+ "step": 2774
19540
+ },
19541
+ {
19542
+ "epoch": 0.1324408490532269,
19543
+ "grad_norm": 6.788287162780762,
19544
+ "learning_rate": 0.0001990594245498294,
19545
+ "loss": 7.0538,
19546
+ "step": 2775
19547
+ },
19548
+ {
19549
+ "epoch": 0.13248857548531814,
19550
+ "grad_norm": 5.706051826477051,
19551
+ "learning_rate": 0.00019905874020142953,
19552
+ "loss": 6.9948,
19553
+ "step": 2776
19554
+ },
19555
+ {
19556
+ "epoch": 0.13253630191740942,
19557
+ "grad_norm": 7.513382434844971,
19558
+ "learning_rate": 0.0001990580556053367,
19559
+ "loss": 6.8821,
19560
+ "step": 2777
19561
+ },
19562
+ {
19563
+ "epoch": 0.13258402834950067,
19564
+ "grad_norm": 7.37601375579834,
19565
+ "learning_rate": 0.00019905737076155258,
19566
+ "loss": 8.9933,
19567
+ "step": 2778
19568
+ },
19569
+ {
19570
+ "epoch": 0.13263175478159192,
19571
+ "grad_norm": 4.514930248260498,
19572
+ "learning_rate": 0.00019905668567007887,
19573
+ "loss": 4.4424,
19574
+ "step": 2779
19575
+ },
19576
+ {
19577
+ "epoch": 0.13267948121368317,
19578
+ "grad_norm": 6.982491970062256,
19579
+ "learning_rate": 0.0001990560003309173,
19580
+ "loss": 7.462,
19581
+ "step": 2780
19582
+ },
19583
+ {
19584
+ "epoch": 0.13272720764577442,
19585
+ "grad_norm": 7.289805889129639,
19586
+ "learning_rate": 0.0001990553147440696,
19587
+ "loss": 6.7003,
19588
+ "step": 2781
19589
+ },
19590
+ {
19591
+ "epoch": 0.13277493407786567,
19592
+ "grad_norm": 5.799755573272705,
19593
+ "learning_rate": 0.00019905462890953745,
19594
+ "loss": 6.4667,
19595
+ "step": 2782
19596
+ },
19597
+ {
19598
+ "epoch": 0.13282266050995692,
19599
+ "grad_norm": 5.055026531219482,
19600
+ "learning_rate": 0.0001990539428273226,
19601
+ "loss": 5.4229,
19602
+ "step": 2783
19603
+ },
19604
+ {
19605
+ "epoch": 0.13287038694204817,
19606
+ "grad_norm": 4.6647844314575195,
19607
+ "learning_rate": 0.00019905325649742674,
19608
+ "loss": 4.7695,
19609
+ "step": 2784
19610
+ },
19611
+ {
19612
+ "epoch": 0.13291811337413945,
19613
+ "grad_norm": 7.971372604370117,
19614
+ "learning_rate": 0.00019905256991985162,
19615
+ "loss": 6.1439,
19616
+ "step": 2785
19617
+ },
19618
+ {
19619
+ "epoch": 0.1329658398062307,
19620
+ "grad_norm": 6.43125581741333,
19621
+ "learning_rate": 0.00019905188309459891,
19622
+ "loss": 6.2026,
19623
+ "step": 2786
19624
+ },
19625
+ {
19626
+ "epoch": 0.13301356623832195,
19627
+ "grad_norm": 4.872565269470215,
19628
+ "learning_rate": 0.00019905119602167036,
19629
+ "loss": 4.4752,
19630
+ "step": 2787
19631
+ },
19632
+ {
19633
+ "epoch": 0.1330612926704132,
19634
+ "grad_norm": 5.977827548980713,
19635
+ "learning_rate": 0.00019905050870106767,
19636
+ "loss": 5.6266,
19637
+ "step": 2788
19638
+ },
19639
+ {
19640
+ "epoch": 0.13310901910250444,
19641
+ "grad_norm": 7.002156734466553,
19642
+ "learning_rate": 0.0001990498211327926,
19643
+ "loss": 7.0112,
19644
+ "step": 2789
19645
+ },
19646
+ {
19647
+ "epoch": 0.1331567455345957,
19648
+ "grad_norm": 6.144350528717041,
19649
+ "learning_rate": 0.0001990491333168468,
19650
+ "loss": 6.1978,
19651
+ "step": 2790
19652
+ },
19653
+ {
19654
+ "epoch": 0.13320447196668694,
19655
+ "grad_norm": 7.993871212005615,
19656
+ "learning_rate": 0.000199048445253232,
19657
+ "loss": 6.4143,
19658
+ "step": 2791
19659
+ },
19660
+ {
19661
+ "epoch": 0.1332521983987782,
19662
+ "grad_norm": 6.021445274353027,
19663
+ "learning_rate": 0.00019904775694195,
19664
+ "loss": 5.4286,
19665
+ "step": 2792
19666
+ },
19667
+ {
19668
+ "epoch": 0.13329992483086944,
19669
+ "grad_norm": 5.774000644683838,
19670
+ "learning_rate": 0.00019904706838300243,
19671
+ "loss": 5.5089,
19672
+ "step": 2793
19673
+ },
19674
+ {
19675
+ "epoch": 0.13334765126296072,
19676
+ "grad_norm": 6.7085185050964355,
19677
+ "learning_rate": 0.00019904637957639108,
19678
+ "loss": 7.9083,
19679
+ "step": 2794
19680
+ },
19681
+ {
19682
+ "epoch": 0.13339537769505197,
19683
+ "grad_norm": 7.979789733886719,
19684
+ "learning_rate": 0.00019904569052211764,
19685
+ "loss": 6.8796,
19686
+ "step": 2795
19687
+ },
19688
+ {
19689
+ "epoch": 0.13344310412714322,
19690
+ "grad_norm": 9.78246784210205,
19691
+ "learning_rate": 0.0001990450012201838,
19692
+ "loss": 9.3304,
19693
+ "step": 2796
19694
+ },
19695
+ {
19696
+ "epoch": 0.13349083055923447,
19697
+ "grad_norm": 5.552200794219971,
19698
+ "learning_rate": 0.00019904431167059135,
19699
+ "loss": 5.1558,
19700
+ "step": 2797
19701
+ },
19702
+ {
19703
+ "epoch": 0.13353855699132572,
19704
+ "grad_norm": 6.368596076965332,
19705
+ "learning_rate": 0.00019904362187334198,
19706
+ "loss": 6.2315,
19707
+ "step": 2798
19708
+ },
19709
+ {
19710
+ "epoch": 0.13358628342341697,
19711
+ "grad_norm": 4.584041118621826,
19712
+ "learning_rate": 0.00019904293182843738,
19713
+ "loss": 5.0416,
19714
+ "step": 2799
19715
+ },
19716
+ {
19717
+ "epoch": 0.13363400985550822,
19718
+ "grad_norm": 6.690604209899902,
19719
+ "learning_rate": 0.00019904224153587935,
19720
+ "loss": 6.5086,
19721
+ "step": 2800
19722
+ },
19723
+ {
19724
+ "epoch": 0.13363400985550822,
19725
+ "eval_loss": 1.6191966533660889,
19726
+ "eval_runtime": 96.5269,
19727
+ "eval_samples_per_second": 8.733,
19728
+ "eval_steps_per_second": 4.372,
19729
+ "step": 2800
19730
  }
19731
  ],
19732
  "logging_steps": 1,
 
19741
  "early_stopping_threshold": 0.0
19742
  },
19743
  "attributes": {
19744
+ "early_stopping_patience_counter": 3
19745
  }
19746
  },
19747
  "TrainerControl": {
 
19750
  "should_evaluate": false,
19751
  "should_log": false,
19752
  "should_save": true,
19753
+ "should_training_stop": true
19754
  },
19755
  "attributes": {}
19756
  }
19757
  },
19758
+ "total_flos": 2.596245419851776e+17,
19759
  "train_batch_size": 2,
19760
  "trial_name": null,
19761
  "trial_params": null