Training in progress, step 2800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201361312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a26138f4d4689fa8b725947d495dbade33603970d0d8a9e87c0397e9d80c7171
|
3 |
size 201361312
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102537812
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f14d6aacf8d6e623a81646d05b0bb1902fc343ef726e5e2b8552df92f76e54fc
|
3 |
size 102537812
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf247cd6fc12421a25e03b9f7afc154d921f893b113fe13081b8331bed547105
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57890bbb36f903ddde92f175a8734aa474875f7006c17bc430e029ee05bcba34
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.602339506149292,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2200",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -18319,6 +18319,1414 @@
|
|
18319 |
"eval_samples_per_second": 8.737,
|
18320 |
"eval_steps_per_second": 4.374,
|
18321 |
"step": 2600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18322 |
}
|
18323 |
],
|
18324 |
"logging_steps": 1,
|
@@ -18333,7 +19741,7 @@
|
|
18333 |
"early_stopping_threshold": 0.0
|
18334 |
},
|
18335 |
"attributes": {
|
18336 |
-
"early_stopping_patience_counter":
|
18337 |
}
|
18338 |
},
|
18339 |
"TrainerControl": {
|
@@ -18342,12 +19750,12 @@
|
|
18342 |
"should_evaluate": false,
|
18343 |
"should_log": false,
|
18344 |
"should_save": true,
|
18345 |
-
"should_training_stop":
|
18346 |
},
|
18347 |
"attributes": {}
|
18348 |
}
|
18349 |
},
|
18350 |
-
"total_flos": 2.
|
18351 |
"train_batch_size": 2,
|
18352 |
"trial_name": null,
|
18353 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.602339506149292,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2200",
|
4 |
+
"epoch": 0.13363400985550822,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 2800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
18319 |
"eval_samples_per_second": 8.737,
|
18320 |
"eval_steps_per_second": 4.374,
|
18321 |
"step": 2600
|
18322 |
+
},
|
18323 |
+
{
|
18324 |
+
"epoch": 0.1241364498693489,
|
18325 |
+
"grad_norm": 6.640268802642822,
|
18326 |
+
"learning_rate": 0.00019917472854104044,
|
18327 |
+
"loss": 6.6788,
|
18328 |
+
"step": 2601
|
18329 |
+
},
|
18330 |
+
{
|
18331 |
+
"epoch": 0.12418417630144014,
|
18332 |
+
"grad_norm": 6.098337650299072,
|
18333 |
+
"learning_rate": 0.0001991740873167308,
|
18334 |
+
"loss": 6.4633,
|
18335 |
+
"step": 2602
|
18336 |
+
},
|
18337 |
+
{
|
18338 |
+
"epoch": 0.1242319027335314,
|
18339 |
+
"grad_norm": 7.400532245635986,
|
18340 |
+
"learning_rate": 0.0001991734458444397,
|
18341 |
+
"loss": 6.3654,
|
18342 |
+
"step": 2603
|
18343 |
+
},
|
18344 |
+
{
|
18345 |
+
"epoch": 0.12427962916562266,
|
18346 |
+
"grad_norm": 6.543416500091553,
|
18347 |
+
"learning_rate": 0.00019917280412416882,
|
18348 |
+
"loss": 6.3276,
|
18349 |
+
"step": 2604
|
18350 |
+
},
|
18351 |
+
{
|
18352 |
+
"epoch": 0.1243273555977139,
|
18353 |
+
"grad_norm": 9.13476276397705,
|
18354 |
+
"learning_rate": 0.00019917216215591972,
|
18355 |
+
"loss": 7.5483,
|
18356 |
+
"step": 2605
|
18357 |
+
},
|
18358 |
+
{
|
18359 |
+
"epoch": 0.12437508202980516,
|
18360 |
+
"grad_norm": 6.472226142883301,
|
18361 |
+
"learning_rate": 0.000199171519939694,
|
18362 |
+
"loss": 7.5186,
|
18363 |
+
"step": 2606
|
18364 |
+
},
|
18365 |
+
{
|
18366 |
+
"epoch": 0.1244228084618964,
|
18367 |
+
"grad_norm": 6.057101249694824,
|
18368 |
+
"learning_rate": 0.0001991708774754933,
|
18369 |
+
"loss": 5.7719,
|
18370 |
+
"step": 2607
|
18371 |
+
},
|
18372 |
+
{
|
18373 |
+
"epoch": 0.12447053489398767,
|
18374 |
+
"grad_norm": 8.963512420654297,
|
18375 |
+
"learning_rate": 0.00019917023476331922,
|
18376 |
+
"loss": 6.3314,
|
18377 |
+
"step": 2608
|
18378 |
+
},
|
18379 |
+
{
|
18380 |
+
"epoch": 0.12451826132607892,
|
18381 |
+
"grad_norm": 6.93634557723999,
|
18382 |
+
"learning_rate": 0.00019916959180317335,
|
18383 |
+
"loss": 8.5299,
|
18384 |
+
"step": 2609
|
18385 |
+
},
|
18386 |
+
{
|
18387 |
+
"epoch": 0.12456598775817017,
|
18388 |
+
"grad_norm": 6.162479400634766,
|
18389 |
+
"learning_rate": 0.00019916894859505727,
|
18390 |
+
"loss": 5.6562,
|
18391 |
+
"step": 2610
|
18392 |
+
},
|
18393 |
+
{
|
18394 |
+
"epoch": 0.12461371419026142,
|
18395 |
+
"grad_norm": 5.48561429977417,
|
18396 |
+
"learning_rate": 0.00019916830513897266,
|
18397 |
+
"loss": 6.2665,
|
18398 |
+
"step": 2611
|
18399 |
+
},
|
18400 |
+
{
|
18401 |
+
"epoch": 0.12466144062235267,
|
18402 |
+
"grad_norm": 3.560473680496216,
|
18403 |
+
"learning_rate": 0.00019916766143492106,
|
18404 |
+
"loss": 3.051,
|
18405 |
+
"step": 2612
|
18406 |
+
},
|
18407 |
+
{
|
18408 |
+
"epoch": 0.12470916705444393,
|
18409 |
+
"grad_norm": 7.0898895263671875,
|
18410 |
+
"learning_rate": 0.00019916701748290416,
|
18411 |
+
"loss": 6.9835,
|
18412 |
+
"step": 2613
|
18413 |
+
},
|
18414 |
+
{
|
18415 |
+
"epoch": 0.12475689348653518,
|
18416 |
+
"grad_norm": 5.331742286682129,
|
18417 |
+
"learning_rate": 0.00019916637328292348,
|
18418 |
+
"loss": 6.0956,
|
18419 |
+
"step": 2614
|
18420 |
+
},
|
18421 |
+
{
|
18422 |
+
"epoch": 0.12480461991862643,
|
18423 |
+
"grad_norm": 8.475786209106445,
|
18424 |
+
"learning_rate": 0.00019916572883498068,
|
18425 |
+
"loss": 7.2774,
|
18426 |
+
"step": 2615
|
18427 |
+
},
|
18428 |
+
{
|
18429 |
+
"epoch": 0.12485234635071768,
|
18430 |
+
"grad_norm": 7.285261154174805,
|
18431 |
+
"learning_rate": 0.00019916508413907736,
|
18432 |
+
"loss": 9.4067,
|
18433 |
+
"step": 2616
|
18434 |
+
},
|
18435 |
+
{
|
18436 |
+
"epoch": 0.12490007278280894,
|
18437 |
+
"grad_norm": 6.492762565612793,
|
18438 |
+
"learning_rate": 0.00019916443919521513,
|
18439 |
+
"loss": 6.2198,
|
18440 |
+
"step": 2617
|
18441 |
+
},
|
18442 |
+
{
|
18443 |
+
"epoch": 0.1249477992149002,
|
18444 |
+
"grad_norm": 5.463344573974609,
|
18445 |
+
"learning_rate": 0.00019916379400339564,
|
18446 |
+
"loss": 6.9778,
|
18447 |
+
"step": 2618
|
18448 |
+
},
|
18449 |
+
{
|
18450 |
+
"epoch": 0.12499552564699144,
|
18451 |
+
"grad_norm": 5.793583393096924,
|
18452 |
+
"learning_rate": 0.00019916314856362045,
|
18453 |
+
"loss": 6.8516,
|
18454 |
+
"step": 2619
|
18455 |
+
},
|
18456 |
+
{
|
18457 |
+
"epoch": 0.1250432520790827,
|
18458 |
+
"grad_norm": 5.299160003662109,
|
18459 |
+
"learning_rate": 0.00019916250287589117,
|
18460 |
+
"loss": 5.9155,
|
18461 |
+
"step": 2620
|
18462 |
+
},
|
18463 |
+
{
|
18464 |
+
"epoch": 0.12509097851117396,
|
18465 |
+
"grad_norm": 5.868138790130615,
|
18466 |
+
"learning_rate": 0.00019916185694020947,
|
18467 |
+
"loss": 7.445,
|
18468 |
+
"step": 2621
|
18469 |
+
},
|
18470 |
+
{
|
18471 |
+
"epoch": 0.1251387049432652,
|
18472 |
+
"grad_norm": 6.175563335418701,
|
18473 |
+
"learning_rate": 0.00019916121075657693,
|
18474 |
+
"loss": 7.4463,
|
18475 |
+
"step": 2622
|
18476 |
+
},
|
18477 |
+
{
|
18478 |
+
"epoch": 0.12518643137535646,
|
18479 |
+
"grad_norm": 6.524340629577637,
|
18480 |
+
"learning_rate": 0.00019916056432499514,
|
18481 |
+
"loss": 5.5733,
|
18482 |
+
"step": 2623
|
18483 |
+
},
|
18484 |
+
{
|
18485 |
+
"epoch": 0.1252341578074477,
|
18486 |
+
"grad_norm": 5.758206367492676,
|
18487 |
+
"learning_rate": 0.0001991599176454658,
|
18488 |
+
"loss": 5.9721,
|
18489 |
+
"step": 2624
|
18490 |
+
},
|
18491 |
+
{
|
18492 |
+
"epoch": 0.12528188423953895,
|
18493 |
+
"grad_norm": 5.740457534790039,
|
18494 |
+
"learning_rate": 0.00019915927071799042,
|
18495 |
+
"loss": 6.1435,
|
18496 |
+
"step": 2625
|
18497 |
+
},
|
18498 |
+
{
|
18499 |
+
"epoch": 0.1253296106716302,
|
18500 |
+
"grad_norm": 5.045222759246826,
|
18501 |
+
"learning_rate": 0.00019915862354257072,
|
18502 |
+
"loss": 6.4285,
|
18503 |
+
"step": 2626
|
18504 |
+
},
|
18505 |
+
{
|
18506 |
+
"epoch": 0.12537733710372148,
|
18507 |
+
"grad_norm": 6.698198318481445,
|
18508 |
+
"learning_rate": 0.00019915797611920824,
|
18509 |
+
"loss": 7.0418,
|
18510 |
+
"step": 2627
|
18511 |
+
},
|
18512 |
+
{
|
18513 |
+
"epoch": 0.12542506353581273,
|
18514 |
+
"grad_norm": 5.3449602127075195,
|
18515 |
+
"learning_rate": 0.00019915732844790463,
|
18516 |
+
"loss": 5.8405,
|
18517 |
+
"step": 2628
|
18518 |
+
},
|
18519 |
+
{
|
18520 |
+
"epoch": 0.12547278996790398,
|
18521 |
+
"grad_norm": 8.230816841125488,
|
18522 |
+
"learning_rate": 0.0001991566805286615,
|
18523 |
+
"loss": 8.8511,
|
18524 |
+
"step": 2629
|
18525 |
+
},
|
18526 |
+
{
|
18527 |
+
"epoch": 0.12552051639999523,
|
18528 |
+
"grad_norm": 4.497639179229736,
|
18529 |
+
"learning_rate": 0.0001991560323614805,
|
18530 |
+
"loss": 5.1889,
|
18531 |
+
"step": 2630
|
18532 |
+
},
|
18533 |
+
{
|
18534 |
+
"epoch": 0.12556824283208648,
|
18535 |
+
"grad_norm": 7.639451503753662,
|
18536 |
+
"learning_rate": 0.00019915538394636321,
|
18537 |
+
"loss": 6.0825,
|
18538 |
+
"step": 2631
|
18539 |
+
},
|
18540 |
+
{
|
18541 |
+
"epoch": 0.12561596926417773,
|
18542 |
+
"grad_norm": 6.102507591247559,
|
18543 |
+
"learning_rate": 0.00019915473528331129,
|
18544 |
+
"loss": 5.6234,
|
18545 |
+
"step": 2632
|
18546 |
+
},
|
18547 |
+
{
|
18548 |
+
"epoch": 0.12566369569626898,
|
18549 |
+
"grad_norm": 5.559940338134766,
|
18550 |
+
"learning_rate": 0.00019915408637232634,
|
18551 |
+
"loss": 6.1196,
|
18552 |
+
"step": 2633
|
18553 |
+
},
|
18554 |
+
{
|
18555 |
+
"epoch": 0.12571142212836023,
|
18556 |
+
"grad_norm": 6.262628078460693,
|
18557 |
+
"learning_rate": 0.00019915343721341,
|
18558 |
+
"loss": 6.5523,
|
18559 |
+
"step": 2634
|
18560 |
+
},
|
18561 |
+
{
|
18562 |
+
"epoch": 0.1257591485604515,
|
18563 |
+
"grad_norm": 6.31156587600708,
|
18564 |
+
"learning_rate": 0.00019915278780656382,
|
18565 |
+
"loss": 6.9212,
|
18566 |
+
"step": 2635
|
18567 |
+
},
|
18568 |
+
{
|
18569 |
+
"epoch": 0.12580687499254276,
|
18570 |
+
"grad_norm": 5.397120475769043,
|
18571 |
+
"learning_rate": 0.0001991521381517895,
|
18572 |
+
"loss": 7.2011,
|
18573 |
+
"step": 2636
|
18574 |
+
},
|
18575 |
+
{
|
18576 |
+
"epoch": 0.125854601424634,
|
18577 |
+
"grad_norm": 5.896693229675293,
|
18578 |
+
"learning_rate": 0.00019915148824908868,
|
18579 |
+
"loss": 6.2354,
|
18580 |
+
"step": 2637
|
18581 |
+
},
|
18582 |
+
{
|
18583 |
+
"epoch": 0.12590232785672525,
|
18584 |
+
"grad_norm": 5.77620267868042,
|
18585 |
+
"learning_rate": 0.00019915083809846293,
|
18586 |
+
"loss": 5.7495,
|
18587 |
+
"step": 2638
|
18588 |
+
},
|
18589 |
+
{
|
18590 |
+
"epoch": 0.1259500542888165,
|
18591 |
+
"grad_norm": 4.042270660400391,
|
18592 |
+
"learning_rate": 0.00019915018769991387,
|
18593 |
+
"loss": 5.0322,
|
18594 |
+
"step": 2639
|
18595 |
+
},
|
18596 |
+
{
|
18597 |
+
"epoch": 0.12599778072090775,
|
18598 |
+
"grad_norm": 7.152609825134277,
|
18599 |
+
"learning_rate": 0.0001991495370534432,
|
18600 |
+
"loss": 7.4355,
|
18601 |
+
"step": 2640
|
18602 |
+
},
|
18603 |
+
{
|
18604 |
+
"epoch": 0.126045507152999,
|
18605 |
+
"grad_norm": 5.517868518829346,
|
18606 |
+
"learning_rate": 0.00019914888615905248,
|
18607 |
+
"loss": 5.7128,
|
18608 |
+
"step": 2641
|
18609 |
+
},
|
18610 |
+
{
|
18611 |
+
"epoch": 0.12609323358509025,
|
18612 |
+
"grad_norm": 6.580667495727539,
|
18613 |
+
"learning_rate": 0.00019914823501674333,
|
18614 |
+
"loss": 7.5049,
|
18615 |
+
"step": 2642
|
18616 |
+
},
|
18617 |
+
{
|
18618 |
+
"epoch": 0.1261409600171815,
|
18619 |
+
"grad_norm": 5.372723579406738,
|
18620 |
+
"learning_rate": 0.00019914758362651744,
|
18621 |
+
"loss": 6.6719,
|
18622 |
+
"step": 2643
|
18623 |
+
},
|
18624 |
+
{
|
18625 |
+
"epoch": 0.12618868644927278,
|
18626 |
+
"grad_norm": 6.25331974029541,
|
18627 |
+
"learning_rate": 0.0001991469319883764,
|
18628 |
+
"loss": 8.3484,
|
18629 |
+
"step": 2644
|
18630 |
+
},
|
18631 |
+
{
|
18632 |
+
"epoch": 0.12623641288136403,
|
18633 |
+
"grad_norm": 7.487483024597168,
|
18634 |
+
"learning_rate": 0.00019914628010232182,
|
18635 |
+
"loss": 6.6997,
|
18636 |
+
"step": 2645
|
18637 |
+
},
|
18638 |
+
{
|
18639 |
+
"epoch": 0.12628413931345528,
|
18640 |
+
"grad_norm": 7.138690948486328,
|
18641 |
+
"learning_rate": 0.00019914562796835542,
|
18642 |
+
"loss": 6.7894,
|
18643 |
+
"step": 2646
|
18644 |
+
},
|
18645 |
+
{
|
18646 |
+
"epoch": 0.12633186574554653,
|
18647 |
+
"grad_norm": 4.610174179077148,
|
18648 |
+
"learning_rate": 0.0001991449755864787,
|
18649 |
+
"loss": 4.4906,
|
18650 |
+
"step": 2647
|
18651 |
+
},
|
18652 |
+
{
|
18653 |
+
"epoch": 0.12637959217763778,
|
18654 |
+
"grad_norm": 6.854246139526367,
|
18655 |
+
"learning_rate": 0.00019914432295669337,
|
18656 |
+
"loss": 7.2979,
|
18657 |
+
"step": 2648
|
18658 |
+
},
|
18659 |
+
{
|
18660 |
+
"epoch": 0.12642731860972903,
|
18661 |
+
"grad_norm": 8.805462837219238,
|
18662 |
+
"learning_rate": 0.00019914367007900106,
|
18663 |
+
"loss": 7.6631,
|
18664 |
+
"step": 2649
|
18665 |
+
},
|
18666 |
+
{
|
18667 |
+
"epoch": 0.12647504504182028,
|
18668 |
+
"grad_norm": 8.960370063781738,
|
18669 |
+
"learning_rate": 0.00019914301695340338,
|
18670 |
+
"loss": 6.965,
|
18671 |
+
"step": 2650
|
18672 |
+
},
|
18673 |
+
{
|
18674 |
+
"epoch": 0.12652277147391153,
|
18675 |
+
"grad_norm": 6.149599552154541,
|
18676 |
+
"learning_rate": 0.00019914236357990197,
|
18677 |
+
"loss": 5.7883,
|
18678 |
+
"step": 2651
|
18679 |
+
},
|
18680 |
+
{
|
18681 |
+
"epoch": 0.1265704979060028,
|
18682 |
+
"grad_norm": 5.623478889465332,
|
18683 |
+
"learning_rate": 0.00019914170995849847,
|
18684 |
+
"loss": 6.1539,
|
18685 |
+
"step": 2652
|
18686 |
+
},
|
18687 |
+
{
|
18688 |
+
"epoch": 0.12661822433809405,
|
18689 |
+
"grad_norm": 6.2518792152404785,
|
18690 |
+
"learning_rate": 0.00019914105608919452,
|
18691 |
+
"loss": 6.0241,
|
18692 |
+
"step": 2653
|
18693 |
+
},
|
18694 |
+
{
|
18695 |
+
"epoch": 0.1266659507701853,
|
18696 |
+
"grad_norm": 6.36302375793457,
|
18697 |
+
"learning_rate": 0.00019914040197199174,
|
18698 |
+
"loss": 6.4629,
|
18699 |
+
"step": 2654
|
18700 |
+
},
|
18701 |
+
{
|
18702 |
+
"epoch": 0.12671367720227655,
|
18703 |
+
"grad_norm": 6.179405212402344,
|
18704 |
+
"learning_rate": 0.0001991397476068918,
|
18705 |
+
"loss": 6.0301,
|
18706 |
+
"step": 2655
|
18707 |
+
},
|
18708 |
+
{
|
18709 |
+
"epoch": 0.1267614036343678,
|
18710 |
+
"grad_norm": 6.06155252456665,
|
18711 |
+
"learning_rate": 0.00019913909299389623,
|
18712 |
+
"loss": 6.2193,
|
18713 |
+
"step": 2656
|
18714 |
+
},
|
18715 |
+
{
|
18716 |
+
"epoch": 0.12680913006645905,
|
18717 |
+
"grad_norm": 6.631749629974365,
|
18718 |
+
"learning_rate": 0.0001991384381330068,
|
18719 |
+
"loss": 6.6485,
|
18720 |
+
"step": 2657
|
18721 |
+
},
|
18722 |
+
{
|
18723 |
+
"epoch": 0.1268568564985503,
|
18724 |
+
"grad_norm": 8.34609603881836,
|
18725 |
+
"learning_rate": 0.0001991377830242251,
|
18726 |
+
"loss": 6.7854,
|
18727 |
+
"step": 2658
|
18728 |
+
},
|
18729 |
+
{
|
18730 |
+
"epoch": 0.12690458293064155,
|
18731 |
+
"grad_norm": 6.579798221588135,
|
18732 |
+
"learning_rate": 0.00019913712766755275,
|
18733 |
+
"loss": 7.3782,
|
18734 |
+
"step": 2659
|
18735 |
+
},
|
18736 |
+
{
|
18737 |
+
"epoch": 0.12695230936273283,
|
18738 |
+
"grad_norm": 6.286202907562256,
|
18739 |
+
"learning_rate": 0.00019913647206299138,
|
18740 |
+
"loss": 6.2319,
|
18741 |
+
"step": 2660
|
18742 |
+
},
|
18743 |
+
{
|
18744 |
+
"epoch": 0.12700003579482408,
|
18745 |
+
"grad_norm": 6.803478240966797,
|
18746 |
+
"learning_rate": 0.00019913581621054267,
|
18747 |
+
"loss": 6.5881,
|
18748 |
+
"step": 2661
|
18749 |
+
},
|
18750 |
+
{
|
18751 |
+
"epoch": 0.12704776222691533,
|
18752 |
+
"grad_norm": 5.428170680999756,
|
18753 |
+
"learning_rate": 0.00019913516011020824,
|
18754 |
+
"loss": 6.6578,
|
18755 |
+
"step": 2662
|
18756 |
+
},
|
18757 |
+
{
|
18758 |
+
"epoch": 0.12709548865900658,
|
18759 |
+
"grad_norm": 5.8981032371521,
|
18760 |
+
"learning_rate": 0.0001991345037619897,
|
18761 |
+
"loss": 6.648,
|
18762 |
+
"step": 2663
|
18763 |
+
},
|
18764 |
+
{
|
18765 |
+
"epoch": 0.12714321509109783,
|
18766 |
+
"grad_norm": 6.136375427246094,
|
18767 |
+
"learning_rate": 0.00019913384716588876,
|
18768 |
+
"loss": 6.4674,
|
18769 |
+
"step": 2664
|
18770 |
+
},
|
18771 |
+
{
|
18772 |
+
"epoch": 0.12719094152318908,
|
18773 |
+
"grad_norm": 7.333561420440674,
|
18774 |
+
"learning_rate": 0.00019913319032190702,
|
18775 |
+
"loss": 7.6672,
|
18776 |
+
"step": 2665
|
18777 |
+
},
|
18778 |
+
{
|
18779 |
+
"epoch": 0.12723866795528033,
|
18780 |
+
"grad_norm": 6.941676616668701,
|
18781 |
+
"learning_rate": 0.00019913253323004609,
|
18782 |
+
"loss": 6.5824,
|
18783 |
+
"step": 2666
|
18784 |
+
},
|
18785 |
+
{
|
18786 |
+
"epoch": 0.12728639438737158,
|
18787 |
+
"grad_norm": 5.5567779541015625,
|
18788 |
+
"learning_rate": 0.00019913187589030767,
|
18789 |
+
"loss": 6.1721,
|
18790 |
+
"step": 2667
|
18791 |
+
},
|
18792 |
+
{
|
18793 |
+
"epoch": 0.12733412081946283,
|
18794 |
+
"grad_norm": 7.499778747558594,
|
18795 |
+
"learning_rate": 0.00019913121830269335,
|
18796 |
+
"loss": 7.4446,
|
18797 |
+
"step": 2668
|
18798 |
+
},
|
18799 |
+
{
|
18800 |
+
"epoch": 0.1273818472515541,
|
18801 |
+
"grad_norm": 8.435744285583496,
|
18802 |
+
"learning_rate": 0.00019913056046720485,
|
18803 |
+
"loss": 7.4201,
|
18804 |
+
"step": 2669
|
18805 |
+
},
|
18806 |
+
{
|
18807 |
+
"epoch": 0.12742957368364535,
|
18808 |
+
"grad_norm": 5.2992095947265625,
|
18809 |
+
"learning_rate": 0.0001991299023838437,
|
18810 |
+
"loss": 6.645,
|
18811 |
+
"step": 2670
|
18812 |
+
},
|
18813 |
+
{
|
18814 |
+
"epoch": 0.1274773001157366,
|
18815 |
+
"grad_norm": 5.7956223487854,
|
18816 |
+
"learning_rate": 0.0001991292440526117,
|
18817 |
+
"loss": 5.3825,
|
18818 |
+
"step": 2671
|
18819 |
+
},
|
18820 |
+
{
|
18821 |
+
"epoch": 0.12752502654782785,
|
18822 |
+
"grad_norm": 5.5730509757995605,
|
18823 |
+
"learning_rate": 0.00019912858547351038,
|
18824 |
+
"loss": 5.0827,
|
18825 |
+
"step": 2672
|
18826 |
+
},
|
18827 |
+
{
|
18828 |
+
"epoch": 0.1275727529799191,
|
18829 |
+
"grad_norm": 6.531050205230713,
|
18830 |
+
"learning_rate": 0.00019912792664654143,
|
18831 |
+
"loss": 6.4528,
|
18832 |
+
"step": 2673
|
18833 |
+
},
|
18834 |
+
{
|
18835 |
+
"epoch": 0.12762047941201035,
|
18836 |
+
"grad_norm": 5.897129535675049,
|
18837 |
+
"learning_rate": 0.00019912726757170644,
|
18838 |
+
"loss": 6.4735,
|
18839 |
+
"step": 2674
|
18840 |
+
},
|
18841 |
+
{
|
18842 |
+
"epoch": 0.1276682058441016,
|
18843 |
+
"grad_norm": 5.176238536834717,
|
18844 |
+
"learning_rate": 0.00019912660824900717,
|
18845 |
+
"loss": 5.9754,
|
18846 |
+
"step": 2675
|
18847 |
+
},
|
18848 |
+
{
|
18849 |
+
"epoch": 0.12771593227619285,
|
18850 |
+
"grad_norm": 5.605959415435791,
|
18851 |
+
"learning_rate": 0.00019912594867844516,
|
18852 |
+
"loss": 5.5582,
|
18853 |
+
"step": 2676
|
18854 |
+
},
|
18855 |
+
{
|
18856 |
+
"epoch": 0.12776365870828413,
|
18857 |
+
"grad_norm": 6.690275192260742,
|
18858 |
+
"learning_rate": 0.0001991252888600221,
|
18859 |
+
"loss": 6.5006,
|
18860 |
+
"step": 2677
|
18861 |
+
},
|
18862 |
+
{
|
18863 |
+
"epoch": 0.12781138514037538,
|
18864 |
+
"grad_norm": 8.319185256958008,
|
18865 |
+
"learning_rate": 0.00019912462879373966,
|
18866 |
+
"loss": 6.3769,
|
18867 |
+
"step": 2678
|
18868 |
+
},
|
18869 |
+
{
|
18870 |
+
"epoch": 0.12785911157246663,
|
18871 |
+
"grad_norm": 5.938503265380859,
|
18872 |
+
"learning_rate": 0.00019912396847959947,
|
18873 |
+
"loss": 5.9018,
|
18874 |
+
"step": 2679
|
18875 |
+
},
|
18876 |
+
{
|
18877 |
+
"epoch": 0.12790683800455788,
|
18878 |
+
"grad_norm": 6.656704902648926,
|
18879 |
+
"learning_rate": 0.0001991233079176032,
|
18880 |
+
"loss": 4.3682,
|
18881 |
+
"step": 2680
|
18882 |
+
},
|
18883 |
+
{
|
18884 |
+
"epoch": 0.12795456443664913,
|
18885 |
+
"grad_norm": 6.898440361022949,
|
18886 |
+
"learning_rate": 0.00019912264710775243,
|
18887 |
+
"loss": 7.5726,
|
18888 |
+
"step": 2681
|
18889 |
+
},
|
18890 |
+
{
|
18891 |
+
"epoch": 0.12800229086874038,
|
18892 |
+
"grad_norm": 5.528750419616699,
|
18893 |
+
"learning_rate": 0.0001991219860500489,
|
18894 |
+
"loss": 4.7983,
|
18895 |
+
"step": 2682
|
18896 |
+
},
|
18897 |
+
{
|
18898 |
+
"epoch": 0.12805001730083163,
|
18899 |
+
"grad_norm": 6.4117817878723145,
|
18900 |
+
"learning_rate": 0.00019912132474449423,
|
18901 |
+
"loss": 6.4503,
|
18902 |
+
"step": 2683
|
18903 |
+
},
|
18904 |
+
{
|
18905 |
+
"epoch": 0.12809774373292288,
|
18906 |
+
"grad_norm": 6.662499904632568,
|
18907 |
+
"learning_rate": 0.00019912066319109008,
|
18908 |
+
"loss": 7.6737,
|
18909 |
+
"step": 2684
|
18910 |
+
},
|
18911 |
+
{
|
18912 |
+
"epoch": 0.12814547016501415,
|
18913 |
+
"grad_norm": 6.276695728302002,
|
18914 |
+
"learning_rate": 0.0001991200013898381,
|
18915 |
+
"loss": 6.7972,
|
18916 |
+
"step": 2685
|
18917 |
+
},
|
18918 |
+
{
|
18919 |
+
"epoch": 0.1281931965971054,
|
18920 |
+
"grad_norm": 6.167166709899902,
|
18921 |
+
"learning_rate": 0.00019911933934073993,
|
18922 |
+
"loss": 6.0611,
|
18923 |
+
"step": 2686
|
18924 |
+
},
|
18925 |
+
{
|
18926 |
+
"epoch": 0.12824092302919665,
|
18927 |
+
"grad_norm": 6.943721294403076,
|
18928 |
+
"learning_rate": 0.00019911867704379722,
|
18929 |
+
"loss": 6.0425,
|
18930 |
+
"step": 2687
|
18931 |
+
},
|
18932 |
+
{
|
18933 |
+
"epoch": 0.1282886494612879,
|
18934 |
+
"grad_norm": 6.147932052612305,
|
18935 |
+
"learning_rate": 0.0001991180144990117,
|
18936 |
+
"loss": 5.8839,
|
18937 |
+
"step": 2688
|
18938 |
+
},
|
18939 |
+
{
|
18940 |
+
"epoch": 0.12833637589337915,
|
18941 |
+
"grad_norm": 6.889720439910889,
|
18942 |
+
"learning_rate": 0.0001991173517063849,
|
18943 |
+
"loss": 5.5848,
|
18944 |
+
"step": 2689
|
18945 |
+
},
|
18946 |
+
{
|
18947 |
+
"epoch": 0.1283841023254704,
|
18948 |
+
"grad_norm": 5.700843811035156,
|
18949 |
+
"learning_rate": 0.00019911668866591858,
|
18950 |
+
"loss": 5.2723,
|
18951 |
+
"step": 2690
|
18952 |
+
},
|
18953 |
+
{
|
18954 |
+
"epoch": 0.12843182875756165,
|
18955 |
+
"grad_norm": 6.69716215133667,
|
18956 |
+
"learning_rate": 0.0001991160253776144,
|
18957 |
+
"loss": 6.8941,
|
18958 |
+
"step": 2691
|
18959 |
+
},
|
18960 |
+
{
|
18961 |
+
"epoch": 0.1284795551896529,
|
18962 |
+
"grad_norm": 9.107110977172852,
|
18963 |
+
"learning_rate": 0.00019911536184147397,
|
18964 |
+
"loss": 8.4972,
|
18965 |
+
"step": 2692
|
18966 |
+
},
|
18967 |
+
{
|
18968 |
+
"epoch": 0.12852728162174415,
|
18969 |
+
"grad_norm": 6.4476776123046875,
|
18970 |
+
"learning_rate": 0.00019911469805749895,
|
18971 |
+
"loss": 7.1879,
|
18972 |
+
"step": 2693
|
18973 |
+
},
|
18974 |
+
{
|
18975 |
+
"epoch": 0.12857500805383543,
|
18976 |
+
"grad_norm": 6.365779399871826,
|
18977 |
+
"learning_rate": 0.000199114034025691,
|
18978 |
+
"loss": 5.6422,
|
18979 |
+
"step": 2694
|
18980 |
+
},
|
18981 |
+
{
|
18982 |
+
"epoch": 0.12862273448592668,
|
18983 |
+
"grad_norm": 5.086087703704834,
|
18984 |
+
"learning_rate": 0.00019911336974605179,
|
18985 |
+
"loss": 4.4789,
|
18986 |
+
"step": 2695
|
18987 |
+
},
|
18988 |
+
{
|
18989 |
+
"epoch": 0.12867046091801793,
|
18990 |
+
"grad_norm": 4.119086265563965,
|
18991 |
+
"learning_rate": 0.000199112705218583,
|
18992 |
+
"loss": 4.5278,
|
18993 |
+
"step": 2696
|
18994 |
+
},
|
18995 |
+
{
|
18996 |
+
"epoch": 0.12871818735010918,
|
18997 |
+
"grad_norm": 5.206000804901123,
|
18998 |
+
"learning_rate": 0.0001991120404432863,
|
18999 |
+
"loss": 5.2369,
|
19000 |
+
"step": 2697
|
19001 |
+
},
|
19002 |
+
{
|
19003 |
+
"epoch": 0.12876591378220043,
|
19004 |
+
"grad_norm": 4.440513610839844,
|
19005 |
+
"learning_rate": 0.00019911137542016333,
|
19006 |
+
"loss": 5.9666,
|
19007 |
+
"step": 2698
|
19008 |
+
},
|
19009 |
+
{
|
19010 |
+
"epoch": 0.12881364021429167,
|
19011 |
+
"grad_norm": 8.883622169494629,
|
19012 |
+
"learning_rate": 0.0001991107101492157,
|
19013 |
+
"loss": 7.3967,
|
19014 |
+
"step": 2699
|
19015 |
+
},
|
19016 |
+
{
|
19017 |
+
"epoch": 0.12886136664638292,
|
19018 |
+
"grad_norm": 6.536487579345703,
|
19019 |
+
"learning_rate": 0.00019911004463044516,
|
19020 |
+
"loss": 6.1909,
|
19021 |
+
"step": 2700
|
19022 |
+
},
|
19023 |
+
{
|
19024 |
+
"epoch": 0.12890909307847417,
|
19025 |
+
"grad_norm": 4.277031898498535,
|
19026 |
+
"learning_rate": 0.00019910937886385334,
|
19027 |
+
"loss": 4.1263,
|
19028 |
+
"step": 2701
|
19029 |
+
},
|
19030 |
+
{
|
19031 |
+
"epoch": 0.12895681951056545,
|
19032 |
+
"grad_norm": 5.570382118225098,
|
19033 |
+
"learning_rate": 0.00019910871284944192,
|
19034 |
+
"loss": 6.8265,
|
19035 |
+
"step": 2702
|
19036 |
+
},
|
19037 |
+
{
|
19038 |
+
"epoch": 0.1290045459426567,
|
19039 |
+
"grad_norm": 4.961894989013672,
|
19040 |
+
"learning_rate": 0.0001991080465872125,
|
19041 |
+
"loss": 4.9882,
|
19042 |
+
"step": 2703
|
19043 |
+
},
|
19044 |
+
{
|
19045 |
+
"epoch": 0.12905227237474795,
|
19046 |
+
"grad_norm": 5.48018741607666,
|
19047 |
+
"learning_rate": 0.00019910738007716683,
|
19048 |
+
"loss": 6.3094,
|
19049 |
+
"step": 2704
|
19050 |
+
},
|
19051 |
+
{
|
19052 |
+
"epoch": 0.1290999988068392,
|
19053 |
+
"grad_norm": 5.8967671394348145,
|
19054 |
+
"learning_rate": 0.00019910671331930654,
|
19055 |
+
"loss": 7.0458,
|
19056 |
+
"step": 2705
|
19057 |
+
},
|
19058 |
+
{
|
19059 |
+
"epoch": 0.12914772523893045,
|
19060 |
+
"grad_norm": 7.660339832305908,
|
19061 |
+
"learning_rate": 0.00019910604631363327,
|
19062 |
+
"loss": 7.6331,
|
19063 |
+
"step": 2706
|
19064 |
+
},
|
19065 |
+
{
|
19066 |
+
"epoch": 0.1291954516710217,
|
19067 |
+
"grad_norm": 5.007476329803467,
|
19068 |
+
"learning_rate": 0.00019910537906014873,
|
19069 |
+
"loss": 6.0415,
|
19070 |
+
"step": 2707
|
19071 |
+
},
|
19072 |
+
{
|
19073 |
+
"epoch": 0.12924317810311295,
|
19074 |
+
"grad_norm": 7.231809616088867,
|
19075 |
+
"learning_rate": 0.00019910471155885459,
|
19076 |
+
"loss": 6.9033,
|
19077 |
+
"step": 2708
|
19078 |
+
},
|
19079 |
+
{
|
19080 |
+
"epoch": 0.1292909045352042,
|
19081 |
+
"grad_norm": 7.8866753578186035,
|
19082 |
+
"learning_rate": 0.0001991040438097525,
|
19083 |
+
"loss": 8.1581,
|
19084 |
+
"step": 2709
|
19085 |
+
},
|
19086 |
+
{
|
19087 |
+
"epoch": 0.12933863096729548,
|
19088 |
+
"grad_norm": 4.540760517120361,
|
19089 |
+
"learning_rate": 0.00019910337581284415,
|
19090 |
+
"loss": 3.9487,
|
19091 |
+
"step": 2710
|
19092 |
+
},
|
19093 |
+
{
|
19094 |
+
"epoch": 0.12938635739938673,
|
19095 |
+
"grad_norm": 6.535691261291504,
|
19096 |
+
"learning_rate": 0.0001991027075681312,
|
19097 |
+
"loss": 6.8769,
|
19098 |
+
"step": 2711
|
19099 |
+
},
|
19100 |
+
{
|
19101 |
+
"epoch": 0.12943408383147798,
|
19102 |
+
"grad_norm": 7.321287631988525,
|
19103 |
+
"learning_rate": 0.00019910203907561527,
|
19104 |
+
"loss": 7.6307,
|
19105 |
+
"step": 2712
|
19106 |
+
},
|
19107 |
+
{
|
19108 |
+
"epoch": 0.12948181026356922,
|
19109 |
+
"grad_norm": 8.327848434448242,
|
19110 |
+
"learning_rate": 0.0001991013703352981,
|
19111 |
+
"loss": 6.3343,
|
19112 |
+
"step": 2713
|
19113 |
+
},
|
19114 |
+
{
|
19115 |
+
"epoch": 0.12952953669566047,
|
19116 |
+
"grad_norm": 4.456166744232178,
|
19117 |
+
"learning_rate": 0.00019910070134718133,
|
19118 |
+
"loss": 4.3715,
|
19119 |
+
"step": 2714
|
19120 |
+
},
|
19121 |
+
{
|
19122 |
+
"epoch": 0.12957726312775172,
|
19123 |
+
"grad_norm": 7.4280781745910645,
|
19124 |
+
"learning_rate": 0.00019910003211126668,
|
19125 |
+
"loss": 7.9487,
|
19126 |
+
"step": 2715
|
19127 |
+
},
|
19128 |
+
{
|
19129 |
+
"epoch": 0.12962498955984297,
|
19130 |
+
"grad_norm": 6.105531215667725,
|
19131 |
+
"learning_rate": 0.00019909936262755572,
|
19132 |
+
"loss": 5.3206,
|
19133 |
+
"step": 2716
|
19134 |
+
},
|
19135 |
+
{
|
19136 |
+
"epoch": 0.12967271599193422,
|
19137 |
+
"grad_norm": 6.178251266479492,
|
19138 |
+
"learning_rate": 0.00019909869289605023,
|
19139 |
+
"loss": 7.429,
|
19140 |
+
"step": 2717
|
19141 |
+
},
|
19142 |
+
{
|
19143 |
+
"epoch": 0.12972044242402547,
|
19144 |
+
"grad_norm": 7.50347375869751,
|
19145 |
+
"learning_rate": 0.00019909802291675187,
|
19146 |
+
"loss": 6.884,
|
19147 |
+
"step": 2718
|
19148 |
+
},
|
19149 |
+
{
|
19150 |
+
"epoch": 0.12976816885611675,
|
19151 |
+
"grad_norm": 8.008402824401855,
|
19152 |
+
"learning_rate": 0.00019909735268966222,
|
19153 |
+
"loss": 6.4672,
|
19154 |
+
"step": 2719
|
19155 |
+
},
|
19156 |
+
{
|
19157 |
+
"epoch": 0.129815895288208,
|
19158 |
+
"grad_norm": 6.246631622314453,
|
19159 |
+
"learning_rate": 0.0001990966822147831,
|
19160 |
+
"loss": 5.7752,
|
19161 |
+
"step": 2720
|
19162 |
+
},
|
19163 |
+
{
|
19164 |
+
"epoch": 0.12986362172029925,
|
19165 |
+
"grad_norm": 5.764650821685791,
|
19166 |
+
"learning_rate": 0.00019909601149211605,
|
19167 |
+
"loss": 6.088,
|
19168 |
+
"step": 2721
|
19169 |
+
},
|
19170 |
+
{
|
19171 |
+
"epoch": 0.1299113481523905,
|
19172 |
+
"grad_norm": 6.757874488830566,
|
19173 |
+
"learning_rate": 0.00019909534052166286,
|
19174 |
+
"loss": 6.4215,
|
19175 |
+
"step": 2722
|
19176 |
+
},
|
19177 |
+
{
|
19178 |
+
"epoch": 0.12995907458448175,
|
19179 |
+
"grad_norm": 7.595560073852539,
|
19180 |
+
"learning_rate": 0.0001990946693034251,
|
19181 |
+
"loss": 7.4822,
|
19182 |
+
"step": 2723
|
19183 |
+
},
|
19184 |
+
{
|
19185 |
+
"epoch": 0.130006801016573,
|
19186 |
+
"grad_norm": 6.582478046417236,
|
19187 |
+
"learning_rate": 0.00019909399783740454,
|
19188 |
+
"loss": 6.6361,
|
19189 |
+
"step": 2724
|
19190 |
+
},
|
19191 |
+
{
|
19192 |
+
"epoch": 0.13005452744866425,
|
19193 |
+
"grad_norm": 5.431997299194336,
|
19194 |
+
"learning_rate": 0.00019909332612360285,
|
19195 |
+
"loss": 6.3359,
|
19196 |
+
"step": 2725
|
19197 |
+
},
|
19198 |
+
{
|
19199 |
+
"epoch": 0.1301022538807555,
|
19200 |
+
"grad_norm": 5.795412540435791,
|
19201 |
+
"learning_rate": 0.00019909265416202164,
|
19202 |
+
"loss": 7.2463,
|
19203 |
+
"step": 2726
|
19204 |
+
},
|
19205 |
+
{
|
19206 |
+
"epoch": 0.13014998031284677,
|
19207 |
+
"grad_norm": 6.594300270080566,
|
19208 |
+
"learning_rate": 0.00019909198195266266,
|
19209 |
+
"loss": 5.7899,
|
19210 |
+
"step": 2727
|
19211 |
+
},
|
19212 |
+
{
|
19213 |
+
"epoch": 0.13019770674493802,
|
19214 |
+
"grad_norm": 6.408435344696045,
|
19215 |
+
"learning_rate": 0.00019909130949552755,
|
19216 |
+
"loss": 7.3634,
|
19217 |
+
"step": 2728
|
19218 |
+
},
|
19219 |
+
{
|
19220 |
+
"epoch": 0.13024543317702927,
|
19221 |
+
"grad_norm": 6.5589919090271,
|
19222 |
+
"learning_rate": 0.000199090636790618,
|
19223 |
+
"loss": 6.6704,
|
19224 |
+
"step": 2729
|
19225 |
+
},
|
19226 |
+
{
|
19227 |
+
"epoch": 0.13029315960912052,
|
19228 |
+
"grad_norm": 6.496981620788574,
|
19229 |
+
"learning_rate": 0.0001990899638379357,
|
19230 |
+
"loss": 6.1406,
|
19231 |
+
"step": 2730
|
19232 |
+
},
|
19233 |
+
{
|
19234 |
+
"epoch": 0.13034088604121177,
|
19235 |
+
"grad_norm": 4.503755569458008,
|
19236 |
+
"learning_rate": 0.00019908929063748235,
|
19237 |
+
"loss": 5.0579,
|
19238 |
+
"step": 2731
|
19239 |
+
},
|
19240 |
+
{
|
19241 |
+
"epoch": 0.13038861247330302,
|
19242 |
+
"grad_norm": 7.117122173309326,
|
19243 |
+
"learning_rate": 0.0001990886171892596,
|
19244 |
+
"loss": 7.4509,
|
19245 |
+
"step": 2732
|
19246 |
+
},
|
19247 |
+
{
|
19248 |
+
"epoch": 0.13043633890539427,
|
19249 |
+
"grad_norm": 6.266332149505615,
|
19250 |
+
"learning_rate": 0.00019908794349326913,
|
19251 |
+
"loss": 6.4409,
|
19252 |
+
"step": 2733
|
19253 |
+
},
|
19254 |
+
{
|
19255 |
+
"epoch": 0.13048406533748552,
|
19256 |
+
"grad_norm": 6.61918306350708,
|
19257 |
+
"learning_rate": 0.00019908726954951267,
|
19258 |
+
"loss": 7.2602,
|
19259 |
+
"step": 2734
|
19260 |
+
},
|
19261 |
+
{
|
19262 |
+
"epoch": 0.1305317917695768,
|
19263 |
+
"grad_norm": 6.399169445037842,
|
19264 |
+
"learning_rate": 0.00019908659535799187,
|
19265 |
+
"loss": 7.1302,
|
19266 |
+
"step": 2735
|
19267 |
+
},
|
19268 |
+
{
|
19269 |
+
"epoch": 0.13057951820166805,
|
19270 |
+
"grad_norm": 7.0500898361206055,
|
19271 |
+
"learning_rate": 0.00019908592091870842,
|
19272 |
+
"loss": 7.6507,
|
19273 |
+
"step": 2736
|
19274 |
+
},
|
19275 |
+
{
|
19276 |
+
"epoch": 0.1306272446337593,
|
19277 |
+
"grad_norm": 5.893949508666992,
|
19278 |
+
"learning_rate": 0.00019908524623166401,
|
19279 |
+
"loss": 6.7635,
|
19280 |
+
"step": 2737
|
19281 |
+
},
|
19282 |
+
{
|
19283 |
+
"epoch": 0.13067497106585055,
|
19284 |
+
"grad_norm": 6.369676113128662,
|
19285 |
+
"learning_rate": 0.0001990845712968603,
|
19286 |
+
"loss": 7.2699,
|
19287 |
+
"step": 2738
|
19288 |
+
},
|
19289 |
+
{
|
19290 |
+
"epoch": 0.1307226974979418,
|
19291 |
+
"grad_norm": 6.691352844238281,
|
19292 |
+
"learning_rate": 0.00019908389611429905,
|
19293 |
+
"loss": 7.8422,
|
19294 |
+
"step": 2739
|
19295 |
+
},
|
19296 |
+
{
|
19297 |
+
"epoch": 0.13077042393003305,
|
19298 |
+
"grad_norm": 6.010880470275879,
|
19299 |
+
"learning_rate": 0.00019908322068398188,
|
19300 |
+
"loss": 7.8595,
|
19301 |
+
"step": 2740
|
19302 |
+
},
|
19303 |
+
{
|
19304 |
+
"epoch": 0.1308181503621243,
|
19305 |
+
"grad_norm": 6.41002893447876,
|
19306 |
+
"learning_rate": 0.0001990825450059105,
|
19307 |
+
"loss": 6.4199,
|
19308 |
+
"step": 2741
|
19309 |
+
},
|
19310 |
+
{
|
19311 |
+
"epoch": 0.13086587679421555,
|
19312 |
+
"grad_norm": 6.322947025299072,
|
19313 |
+
"learning_rate": 0.0001990818690800866,
|
19314 |
+
"loss": 5.793,
|
19315 |
+
"step": 2742
|
19316 |
+
},
|
19317 |
+
{
|
19318 |
+
"epoch": 0.1309136032263068,
|
19319 |
+
"grad_norm": 6.068185329437256,
|
19320 |
+
"learning_rate": 0.0001990811929065119,
|
19321 |
+
"loss": 5.8207,
|
19322 |
+
"step": 2743
|
19323 |
+
},
|
19324 |
+
{
|
19325 |
+
"epoch": 0.13096132965839807,
|
19326 |
+
"grad_norm": 4.704338073730469,
|
19327 |
+
"learning_rate": 0.00019908051648518803,
|
19328 |
+
"loss": 5.3277,
|
19329 |
+
"step": 2744
|
19330 |
+
},
|
19331 |
+
{
|
19332 |
+
"epoch": 0.13100905609048932,
|
19333 |
+
"grad_norm": 5.012578964233398,
|
19334 |
+
"learning_rate": 0.00019907983981611672,
|
19335 |
+
"loss": 5.0888,
|
19336 |
+
"step": 2745
|
19337 |
+
},
|
19338 |
+
{
|
19339 |
+
"epoch": 0.13105678252258057,
|
19340 |
+
"grad_norm": 6.814549922943115,
|
19341 |
+
"learning_rate": 0.00019907916289929966,
|
19342 |
+
"loss": 7.5414,
|
19343 |
+
"step": 2746
|
19344 |
+
},
|
19345 |
+
{
|
19346 |
+
"epoch": 0.13110450895467182,
|
19347 |
+
"grad_norm": 5.401822090148926,
|
19348 |
+
"learning_rate": 0.00019907848573473853,
|
19349 |
+
"loss": 5.8487,
|
19350 |
+
"step": 2747
|
19351 |
+
},
|
19352 |
+
{
|
19353 |
+
"epoch": 0.13115223538676307,
|
19354 |
+
"grad_norm": 6.063152313232422,
|
19355 |
+
"learning_rate": 0.00019907780832243506,
|
19356 |
+
"loss": 7.1534,
|
19357 |
+
"step": 2748
|
19358 |
+
},
|
19359 |
+
{
|
19360 |
+
"epoch": 0.13119996181885432,
|
19361 |
+
"grad_norm": 4.4824395179748535,
|
19362 |
+
"learning_rate": 0.00019907713066239088,
|
19363 |
+
"loss": 4.1908,
|
19364 |
+
"step": 2749
|
19365 |
+
},
|
19366 |
+
{
|
19367 |
+
"epoch": 0.13124768825094557,
|
19368 |
+
"grad_norm": 7.166471004486084,
|
19369 |
+
"learning_rate": 0.00019907645275460775,
|
19370 |
+
"loss": 7.4206,
|
19371 |
+
"step": 2750
|
19372 |
+
},
|
19373 |
+
{
|
19374 |
+
"epoch": 0.13129541468303682,
|
19375 |
+
"grad_norm": 8.25527286529541,
|
19376 |
+
"learning_rate": 0.0001990757745990873,
|
19377 |
+
"loss": 8.5766,
|
19378 |
+
"step": 2751
|
19379 |
+
},
|
19380 |
+
{
|
19381 |
+
"epoch": 0.1313431411151281,
|
19382 |
+
"grad_norm": 5.488870143890381,
|
19383 |
+
"learning_rate": 0.0001990750961958313,
|
19384 |
+
"loss": 5.4217,
|
19385 |
+
"step": 2752
|
19386 |
+
},
|
19387 |
+
{
|
19388 |
+
"epoch": 0.13139086754721935,
|
19389 |
+
"grad_norm": 7.472160339355469,
|
19390 |
+
"learning_rate": 0.00019907441754484138,
|
19391 |
+
"loss": 7.9112,
|
19392 |
+
"step": 2753
|
19393 |
+
},
|
19394 |
+
{
|
19395 |
+
"epoch": 0.1314385939793106,
|
19396 |
+
"grad_norm": 6.098803997039795,
|
19397 |
+
"learning_rate": 0.00019907373864611925,
|
19398 |
+
"loss": 5.8821,
|
19399 |
+
"step": 2754
|
19400 |
+
},
|
19401 |
+
{
|
19402 |
+
"epoch": 0.13148632041140185,
|
19403 |
+
"grad_norm": 5.743438243865967,
|
19404 |
+
"learning_rate": 0.00019907305949966664,
|
19405 |
+
"loss": 6.1745,
|
19406 |
+
"step": 2755
|
19407 |
+
},
|
19408 |
+
{
|
19409 |
+
"epoch": 0.1315340468434931,
|
19410 |
+
"grad_norm": 5.455843448638916,
|
19411 |
+
"learning_rate": 0.00019907238010548523,
|
19412 |
+
"loss": 5.7785,
|
19413 |
+
"step": 2756
|
19414 |
+
},
|
19415 |
+
{
|
19416 |
+
"epoch": 0.13158177327558435,
|
19417 |
+
"grad_norm": 5.50465726852417,
|
19418 |
+
"learning_rate": 0.00019907170046357673,
|
19419 |
+
"loss": 5.6001,
|
19420 |
+
"step": 2757
|
19421 |
+
},
|
19422 |
+
{
|
19423 |
+
"epoch": 0.1316294997076756,
|
19424 |
+
"grad_norm": 6.299639701843262,
|
19425 |
+
"learning_rate": 0.0001990710205739428,
|
19426 |
+
"loss": 8.1939,
|
19427 |
+
"step": 2758
|
19428 |
+
},
|
19429 |
+
{
|
19430 |
+
"epoch": 0.13167722613976685,
|
19431 |
+
"grad_norm": 6.706847667694092,
|
19432 |
+
"learning_rate": 0.0001990703404365852,
|
19433 |
+
"loss": 6.7525,
|
19434 |
+
"step": 2759
|
19435 |
+
},
|
19436 |
+
{
|
19437 |
+
"epoch": 0.13172495257185812,
|
19438 |
+
"grad_norm": 8.025195121765137,
|
19439 |
+
"learning_rate": 0.00019906966005150557,
|
19440 |
+
"loss": 7.904,
|
19441 |
+
"step": 2760
|
19442 |
+
},
|
19443 |
+
{
|
19444 |
+
"epoch": 0.13177267900394937,
|
19445 |
+
"grad_norm": 9.603436470031738,
|
19446 |
+
"learning_rate": 0.00019906897941870564,
|
19447 |
+
"loss": 6.8963,
|
19448 |
+
"step": 2761
|
19449 |
+
},
|
19450 |
+
{
|
19451 |
+
"epoch": 0.13182040543604062,
|
19452 |
+
"grad_norm": 6.5206217765808105,
|
19453 |
+
"learning_rate": 0.00019906829853818714,
|
19454 |
+
"loss": 6.6906,
|
19455 |
+
"step": 2762
|
19456 |
+
},
|
19457 |
+
{
|
19458 |
+
"epoch": 0.13186813186813187,
|
19459 |
+
"grad_norm": 5.41235876083374,
|
19460 |
+
"learning_rate": 0.00019906761740995173,
|
19461 |
+
"loss": 6.1441,
|
19462 |
+
"step": 2763
|
19463 |
+
},
|
19464 |
+
{
|
19465 |
+
"epoch": 0.13191585830022312,
|
19466 |
+
"grad_norm": 5.746955871582031,
|
19467 |
+
"learning_rate": 0.00019906693603400112,
|
19468 |
+
"loss": 5.5281,
|
19469 |
+
"step": 2764
|
19470 |
+
},
|
19471 |
+
{
|
19472 |
+
"epoch": 0.13196358473231437,
|
19473 |
+
"grad_norm": 6.709540843963623,
|
19474 |
+
"learning_rate": 0.000199066254410337,
|
19475 |
+
"loss": 6.3966,
|
19476 |
+
"step": 2765
|
19477 |
+
},
|
19478 |
+
{
|
19479 |
+
"epoch": 0.13201131116440562,
|
19480 |
+
"grad_norm": 5.8970417976379395,
|
19481 |
+
"learning_rate": 0.00019906557253896115,
|
19482 |
+
"loss": 6.9662,
|
19483 |
+
"step": 2766
|
19484 |
+
},
|
19485 |
+
{
|
19486 |
+
"epoch": 0.13205903759649687,
|
19487 |
+
"grad_norm": 5.076374053955078,
|
19488 |
+
"learning_rate": 0.00019906489041987517,
|
19489 |
+
"loss": 5.1472,
|
19490 |
+
"step": 2767
|
19491 |
+
},
|
19492 |
+
{
|
19493 |
+
"epoch": 0.13210676402858812,
|
19494 |
+
"grad_norm": 6.5956902503967285,
|
19495 |
+
"learning_rate": 0.00019906420805308083,
|
19496 |
+
"loss": 7.3836,
|
19497 |
+
"step": 2768
|
19498 |
+
},
|
19499 |
+
{
|
19500 |
+
"epoch": 0.1321544904606794,
|
19501 |
+
"grad_norm": 5.3043928146362305,
|
19502 |
+
"learning_rate": 0.00019906352543857983,
|
19503 |
+
"loss": 6.8485,
|
19504 |
+
"step": 2769
|
19505 |
+
},
|
19506 |
+
{
|
19507 |
+
"epoch": 0.13220221689277065,
|
19508 |
+
"grad_norm": 7.224258899688721,
|
19509 |
+
"learning_rate": 0.00019906284257637386,
|
19510 |
+
"loss": 6.2999,
|
19511 |
+
"step": 2770
|
19512 |
+
},
|
19513 |
+
{
|
19514 |
+
"epoch": 0.1322499433248619,
|
19515 |
+
"grad_norm": 4.787478923797607,
|
19516 |
+
"learning_rate": 0.00019906215946646465,
|
19517 |
+
"loss": 4.6901,
|
19518 |
+
"step": 2771
|
19519 |
+
},
|
19520 |
+
{
|
19521 |
+
"epoch": 0.13229766975695315,
|
19522 |
+
"grad_norm": 5.938333511352539,
|
19523 |
+
"learning_rate": 0.0001990614761088539,
|
19524 |
+
"loss": 7.4666,
|
19525 |
+
"step": 2772
|
19526 |
+
},
|
19527 |
+
{
|
19528 |
+
"epoch": 0.1323453961890444,
|
19529 |
+
"grad_norm": 6.651096820831299,
|
19530 |
+
"learning_rate": 0.0001990607925035433,
|
19531 |
+
"loss": 7.231,
|
19532 |
+
"step": 2773
|
19533 |
+
},
|
19534 |
+
{
|
19535 |
+
"epoch": 0.13239312262113564,
|
19536 |
+
"grad_norm": 4.987978935241699,
|
19537 |
+
"learning_rate": 0.00019906010865053454,
|
19538 |
+
"loss": 5.4362,
|
19539 |
+
"step": 2774
|
19540 |
+
},
|
19541 |
+
{
|
19542 |
+
"epoch": 0.1324408490532269,
|
19543 |
+
"grad_norm": 6.788287162780762,
|
19544 |
+
"learning_rate": 0.0001990594245498294,
|
19545 |
+
"loss": 7.0538,
|
19546 |
+
"step": 2775
|
19547 |
+
},
|
19548 |
+
{
|
19549 |
+
"epoch": 0.13248857548531814,
|
19550 |
+
"grad_norm": 5.706051826477051,
|
19551 |
+
"learning_rate": 0.00019905874020142953,
|
19552 |
+
"loss": 6.9948,
|
19553 |
+
"step": 2776
|
19554 |
+
},
|
19555 |
+
{
|
19556 |
+
"epoch": 0.13253630191740942,
|
19557 |
+
"grad_norm": 7.513382434844971,
|
19558 |
+
"learning_rate": 0.0001990580556053367,
|
19559 |
+
"loss": 6.8821,
|
19560 |
+
"step": 2777
|
19561 |
+
},
|
19562 |
+
{
|
19563 |
+
"epoch": 0.13258402834950067,
|
19564 |
+
"grad_norm": 7.37601375579834,
|
19565 |
+
"learning_rate": 0.00019905737076155258,
|
19566 |
+
"loss": 8.9933,
|
19567 |
+
"step": 2778
|
19568 |
+
},
|
19569 |
+
{
|
19570 |
+
"epoch": 0.13263175478159192,
|
19571 |
+
"grad_norm": 4.514930248260498,
|
19572 |
+
"learning_rate": 0.00019905668567007887,
|
19573 |
+
"loss": 4.4424,
|
19574 |
+
"step": 2779
|
19575 |
+
},
|
19576 |
+
{
|
19577 |
+
"epoch": 0.13267948121368317,
|
19578 |
+
"grad_norm": 6.982491970062256,
|
19579 |
+
"learning_rate": 0.0001990560003309173,
|
19580 |
+
"loss": 7.462,
|
19581 |
+
"step": 2780
|
19582 |
+
},
|
19583 |
+
{
|
19584 |
+
"epoch": 0.13272720764577442,
|
19585 |
+
"grad_norm": 7.289805889129639,
|
19586 |
+
"learning_rate": 0.0001990553147440696,
|
19587 |
+
"loss": 6.7003,
|
19588 |
+
"step": 2781
|
19589 |
+
},
|
19590 |
+
{
|
19591 |
+
"epoch": 0.13277493407786567,
|
19592 |
+
"grad_norm": 5.799755573272705,
|
19593 |
+
"learning_rate": 0.00019905462890953745,
|
19594 |
+
"loss": 6.4667,
|
19595 |
+
"step": 2782
|
19596 |
+
},
|
19597 |
+
{
|
19598 |
+
"epoch": 0.13282266050995692,
|
19599 |
+
"grad_norm": 5.055026531219482,
|
19600 |
+
"learning_rate": 0.0001990539428273226,
|
19601 |
+
"loss": 5.4229,
|
19602 |
+
"step": 2783
|
19603 |
+
},
|
19604 |
+
{
|
19605 |
+
"epoch": 0.13287038694204817,
|
19606 |
+
"grad_norm": 4.6647844314575195,
|
19607 |
+
"learning_rate": 0.00019905325649742674,
|
19608 |
+
"loss": 4.7695,
|
19609 |
+
"step": 2784
|
19610 |
+
},
|
19611 |
+
{
|
19612 |
+
"epoch": 0.13291811337413945,
|
19613 |
+
"grad_norm": 7.971372604370117,
|
19614 |
+
"learning_rate": 0.00019905256991985162,
|
19615 |
+
"loss": 6.1439,
|
19616 |
+
"step": 2785
|
19617 |
+
},
|
19618 |
+
{
|
19619 |
+
"epoch": 0.1329658398062307,
|
19620 |
+
"grad_norm": 6.43125581741333,
|
19621 |
+
"learning_rate": 0.00019905188309459891,
|
19622 |
+
"loss": 6.2026,
|
19623 |
+
"step": 2786
|
19624 |
+
},
|
19625 |
+
{
|
19626 |
+
"epoch": 0.13301356623832195,
|
19627 |
+
"grad_norm": 4.872565269470215,
|
19628 |
+
"learning_rate": 0.00019905119602167036,
|
19629 |
+
"loss": 4.4752,
|
19630 |
+
"step": 2787
|
19631 |
+
},
|
19632 |
+
{
|
19633 |
+
"epoch": 0.1330612926704132,
|
19634 |
+
"grad_norm": 5.977827548980713,
|
19635 |
+
"learning_rate": 0.00019905050870106767,
|
19636 |
+
"loss": 5.6266,
|
19637 |
+
"step": 2788
|
19638 |
+
},
|
19639 |
+
{
|
19640 |
+
"epoch": 0.13310901910250444,
|
19641 |
+
"grad_norm": 7.002156734466553,
|
19642 |
+
"learning_rate": 0.0001990498211327926,
|
19643 |
+
"loss": 7.0112,
|
19644 |
+
"step": 2789
|
19645 |
+
},
|
19646 |
+
{
|
19647 |
+
"epoch": 0.1331567455345957,
|
19648 |
+
"grad_norm": 6.144350528717041,
|
19649 |
+
"learning_rate": 0.0001990491333168468,
|
19650 |
+
"loss": 6.1978,
|
19651 |
+
"step": 2790
|
19652 |
+
},
|
19653 |
+
{
|
19654 |
+
"epoch": 0.13320447196668694,
|
19655 |
+
"grad_norm": 7.993871212005615,
|
19656 |
+
"learning_rate": 0.000199048445253232,
|
19657 |
+
"loss": 6.4143,
|
19658 |
+
"step": 2791
|
19659 |
+
},
|
19660 |
+
{
|
19661 |
+
"epoch": 0.1332521983987782,
|
19662 |
+
"grad_norm": 6.021445274353027,
|
19663 |
+
"learning_rate": 0.00019904775694195,
|
19664 |
+
"loss": 5.4286,
|
19665 |
+
"step": 2792
|
19666 |
+
},
|
19667 |
+
{
|
19668 |
+
"epoch": 0.13329992483086944,
|
19669 |
+
"grad_norm": 5.774000644683838,
|
19670 |
+
"learning_rate": 0.00019904706838300243,
|
19671 |
+
"loss": 5.5089,
|
19672 |
+
"step": 2793
|
19673 |
+
},
|
19674 |
+
{
|
19675 |
+
"epoch": 0.13334765126296072,
|
19676 |
+
"grad_norm": 6.7085185050964355,
|
19677 |
+
"learning_rate": 0.00019904637957639108,
|
19678 |
+
"loss": 7.9083,
|
19679 |
+
"step": 2794
|
19680 |
+
},
|
19681 |
+
{
|
19682 |
+
"epoch": 0.13339537769505197,
|
19683 |
+
"grad_norm": 7.979789733886719,
|
19684 |
+
"learning_rate": 0.00019904569052211764,
|
19685 |
+
"loss": 6.8796,
|
19686 |
+
"step": 2795
|
19687 |
+
},
|
19688 |
+
{
|
19689 |
+
"epoch": 0.13344310412714322,
|
19690 |
+
"grad_norm": 9.78246784210205,
|
19691 |
+
"learning_rate": 0.0001990450012201838,
|
19692 |
+
"loss": 9.3304,
|
19693 |
+
"step": 2796
|
19694 |
+
},
|
19695 |
+
{
|
19696 |
+
"epoch": 0.13349083055923447,
|
19697 |
+
"grad_norm": 5.552200794219971,
|
19698 |
+
"learning_rate": 0.00019904431167059135,
|
19699 |
+
"loss": 5.1558,
|
19700 |
+
"step": 2797
|
19701 |
+
},
|
19702 |
+
{
|
19703 |
+
"epoch": 0.13353855699132572,
|
19704 |
+
"grad_norm": 6.368596076965332,
|
19705 |
+
"learning_rate": 0.00019904362187334198,
|
19706 |
+
"loss": 6.2315,
|
19707 |
+
"step": 2798
|
19708 |
+
},
|
19709 |
+
{
|
19710 |
+
"epoch": 0.13358628342341697,
|
19711 |
+
"grad_norm": 4.584041118621826,
|
19712 |
+
"learning_rate": 0.00019904293182843738,
|
19713 |
+
"loss": 5.0416,
|
19714 |
+
"step": 2799
|
19715 |
+
},
|
19716 |
+
{
|
19717 |
+
"epoch": 0.13363400985550822,
|
19718 |
+
"grad_norm": 6.690604209899902,
|
19719 |
+
"learning_rate": 0.00019904224153587935,
|
19720 |
+
"loss": 6.5086,
|
19721 |
+
"step": 2800
|
19722 |
+
},
|
19723 |
+
{
|
19724 |
+
"epoch": 0.13363400985550822,
|
19725 |
+
"eval_loss": 1.6191966533660889,
|
19726 |
+
"eval_runtime": 96.5269,
|
19727 |
+
"eval_samples_per_second": 8.733,
|
19728 |
+
"eval_steps_per_second": 4.372,
|
19729 |
+
"step": 2800
|
19730 |
}
|
19731 |
],
|
19732 |
"logging_steps": 1,
|
|
|
19741 |
"early_stopping_threshold": 0.0
|
19742 |
},
|
19743 |
"attributes": {
|
19744 |
+
"early_stopping_patience_counter": 3
|
19745 |
}
|
19746 |
},
|
19747 |
"TrainerControl": {
|
|
|
19750 |
"should_evaluate": false,
|
19751 |
"should_log": false,
|
19752 |
"should_save": true,
|
19753 |
+
"should_training_stop": true
|
19754 |
},
|
19755 |
"attributes": {}
|
19756 |
}
|
19757 |
},
|
19758 |
+
"total_flos": 2.596245419851776e+17,
|
19759 |
"train_batch_size": 2,
|
19760 |
"trial_name": null,
|
19761 |
"trial_params": null
|