Training in progress, step 3400, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 323014168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27625dd0ad80925409d5e395a4d3571979885fa86a9f14fb7a92578dfe04c272
|
3 |
size 323014168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 164465012
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4705717a64fae0e3fafa2ba0c4675d11630fe43c7a066161ca0b3d5292ddb651
|
3 |
size 164465012
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83d95f0537d327a1c7ab0207875fcf65955efc9ff31889f266af602212cccf15
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5afae7fd69239db02d490c1894b1ca5f1654d030d0dd2e520f22f3c9832684d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -22543,6 +22543,1414 @@
|
|
22543 |
"eval_samples_per_second": 5.426,
|
22544 |
"eval_steps_per_second": 2.724,
|
22545 |
"step": 3200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22546 |
}
|
22547 |
],
|
22548 |
"logging_steps": 1,
|
@@ -22571,7 +23979,7 @@
|
|
22571 |
"attributes": {}
|
22572 |
}
|
22573 |
},
|
22574 |
-
"total_flos": 5.
|
22575 |
"train_batch_size": 2,
|
22576 |
"trial_name": null,
|
22577 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.4874334335327148,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-3400",
|
4 |
+
"epoch": 0.5465139642354833,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 3400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
22543 |
"eval_samples_per_second": 5.426,
|
22544 |
"eval_steps_per_second": 2.724,
|
22545 |
"step": 3200
|
22546 |
+
},
|
22547 |
+
{
|
22548 |
+
"epoch": 0.5145268233875829,
|
22549 |
+
"grad_norm": 0.8541935682296753,
|
22550 |
+
"learning_rate": 0.00018604509353885694,
|
22551 |
+
"loss": 2.0232,
|
22552 |
+
"step": 3201
|
22553 |
+
},
|
22554 |
+
{
|
22555 |
+
"epoch": 0.5146875627888287,
|
22556 |
+
"grad_norm": 0.7530422210693359,
|
22557 |
+
"learning_rate": 0.00018603650139609773,
|
22558 |
+
"loss": 1.5164,
|
22559 |
+
"step": 3202
|
22560 |
+
},
|
22561 |
+
{
|
22562 |
+
"epoch": 0.5148483021900744,
|
22563 |
+
"grad_norm": 0.8122800588607788,
|
22564 |
+
"learning_rate": 0.00018602790680756118,
|
22565 |
+
"loss": 1.5982,
|
22566 |
+
"step": 3203
|
22567 |
+
},
|
22568 |
+
{
|
22569 |
+
"epoch": 0.5150090415913201,
|
22570 |
+
"grad_norm": 0.7053490877151489,
|
22571 |
+
"learning_rate": 0.00018601930977349155,
|
22572 |
+
"loss": 1.2421,
|
22573 |
+
"step": 3204
|
22574 |
+
},
|
22575 |
+
{
|
22576 |
+
"epoch": 0.5151697809925658,
|
22577 |
+
"grad_norm": 0.7418240904808044,
|
22578 |
+
"learning_rate": 0.00018601071029413331,
|
22579 |
+
"loss": 1.4113,
|
22580 |
+
"step": 3205
|
22581 |
+
},
|
22582 |
+
{
|
22583 |
+
"epoch": 0.5153305203938116,
|
22584 |
+
"grad_norm": 0.7965177893638611,
|
22585 |
+
"learning_rate": 0.00018600210836973089,
|
22586 |
+
"loss": 1.5034,
|
22587 |
+
"step": 3206
|
22588 |
+
},
|
22589 |
+
{
|
22590 |
+
"epoch": 0.5154912597950573,
|
22591 |
+
"grad_norm": 0.7014358043670654,
|
22592 |
+
"learning_rate": 0.00018599350400052883,
|
22593 |
+
"loss": 1.2735,
|
22594 |
+
"step": 3207
|
22595 |
+
},
|
22596 |
+
{
|
22597 |
+
"epoch": 0.515651999196303,
|
22598 |
+
"grad_norm": 0.7904342412948608,
|
22599 |
+
"learning_rate": 0.0001859848971867717,
|
22600 |
+
"loss": 1.5788,
|
22601 |
+
"step": 3208
|
22602 |
+
},
|
22603 |
+
{
|
22604 |
+
"epoch": 0.5158127385975487,
|
22605 |
+
"grad_norm": 0.8906430006027222,
|
22606 |
+
"learning_rate": 0.0001859762879287042,
|
22607 |
+
"loss": 1.7,
|
22608 |
+
"step": 3209
|
22609 |
+
},
|
22610 |
+
{
|
22611 |
+
"epoch": 0.5159734779987944,
|
22612 |
+
"grad_norm": 0.7783416509628296,
|
22613 |
+
"learning_rate": 0.00018596767622657104,
|
22614 |
+
"loss": 1.6071,
|
22615 |
+
"step": 3210
|
22616 |
+
},
|
22617 |
+
{
|
22618 |
+
"epoch": 0.5161342174000402,
|
22619 |
+
"grad_norm": 0.7956631183624268,
|
22620 |
+
"learning_rate": 0.00018595906208061707,
|
22621 |
+
"loss": 1.3444,
|
22622 |
+
"step": 3211
|
22623 |
+
},
|
22624 |
+
{
|
22625 |
+
"epoch": 0.5162949568012859,
|
22626 |
+
"grad_norm": 0.7637962102890015,
|
22627 |
+
"learning_rate": 0.0001859504454910871,
|
22628 |
+
"loss": 1.3372,
|
22629 |
+
"step": 3212
|
22630 |
+
},
|
22631 |
+
{
|
22632 |
+
"epoch": 0.5164556962025316,
|
22633 |
+
"grad_norm": 0.7456954121589661,
|
22634 |
+
"learning_rate": 0.00018594182645822614,
|
22635 |
+
"loss": 1.3977,
|
22636 |
+
"step": 3213
|
22637 |
+
},
|
22638 |
+
{
|
22639 |
+
"epoch": 0.5166164356037773,
|
22640 |
+
"grad_norm": 0.8508870601654053,
|
22641 |
+
"learning_rate": 0.00018593320498227914,
|
22642 |
+
"loss": 1.6368,
|
22643 |
+
"step": 3214
|
22644 |
+
},
|
22645 |
+
{
|
22646 |
+
"epoch": 0.5167771750050231,
|
22647 |
+
"grad_norm": 0.6762863397598267,
|
22648 |
+
"learning_rate": 0.00018592458106349126,
|
22649 |
+
"loss": 1.3411,
|
22650 |
+
"step": 3215
|
22651 |
+
},
|
22652 |
+
{
|
22653 |
+
"epoch": 0.5169379144062688,
|
22654 |
+
"grad_norm": 0.8175486922264099,
|
22655 |
+
"learning_rate": 0.0001859159547021076,
|
22656 |
+
"loss": 1.4413,
|
22657 |
+
"step": 3216
|
22658 |
+
},
|
22659 |
+
{
|
22660 |
+
"epoch": 0.5170986538075145,
|
22661 |
+
"grad_norm": 0.86971116065979,
|
22662 |
+
"learning_rate": 0.0001859073258983734,
|
22663 |
+
"loss": 1.5145,
|
22664 |
+
"step": 3217
|
22665 |
+
},
|
22666 |
+
{
|
22667 |
+
"epoch": 0.5172593932087602,
|
22668 |
+
"grad_norm": 0.9065823554992676,
|
22669 |
+
"learning_rate": 0.00018589869465253394,
|
22670 |
+
"loss": 1.7302,
|
22671 |
+
"step": 3218
|
22672 |
+
},
|
22673 |
+
{
|
22674 |
+
"epoch": 0.5174201326100061,
|
22675 |
+
"grad_norm": 0.8765391111373901,
|
22676 |
+
"learning_rate": 0.00018589006096483458,
|
22677 |
+
"loss": 1.7279,
|
22678 |
+
"step": 3219
|
22679 |
+
},
|
22680 |
+
{
|
22681 |
+
"epoch": 0.5175808720112518,
|
22682 |
+
"grad_norm": 0.8304060697555542,
|
22683 |
+
"learning_rate": 0.0001858814248355208,
|
22684 |
+
"loss": 1.4744,
|
22685 |
+
"step": 3220
|
22686 |
+
},
|
22687 |
+
{
|
22688 |
+
"epoch": 0.5177416114124975,
|
22689 |
+
"grad_norm": 0.785413384437561,
|
22690 |
+
"learning_rate": 0.00018587278626483805,
|
22691 |
+
"loss": 1.4817,
|
22692 |
+
"step": 3221
|
22693 |
+
},
|
22694 |
+
{
|
22695 |
+
"epoch": 0.5179023508137433,
|
22696 |
+
"grad_norm": 0.7497190833091736,
|
22697 |
+
"learning_rate": 0.00018586414525303193,
|
22698 |
+
"loss": 1.4684,
|
22699 |
+
"step": 3222
|
22700 |
+
},
|
22701 |
+
{
|
22702 |
+
"epoch": 0.518063090214989,
|
22703 |
+
"grad_norm": 0.8957980275154114,
|
22704 |
+
"learning_rate": 0.00018585550180034806,
|
22705 |
+
"loss": 1.4137,
|
22706 |
+
"step": 3223
|
22707 |
+
},
|
22708 |
+
{
|
22709 |
+
"epoch": 0.5182238296162347,
|
22710 |
+
"grad_norm": 0.7416465282440186,
|
22711 |
+
"learning_rate": 0.00018584685590703213,
|
22712 |
+
"loss": 1.5414,
|
22713 |
+
"step": 3224
|
22714 |
+
},
|
22715 |
+
{
|
22716 |
+
"epoch": 0.5183845690174804,
|
22717 |
+
"grad_norm": 0.8946303129196167,
|
22718 |
+
"learning_rate": 0.00018583820757333,
|
22719 |
+
"loss": 1.5228,
|
22720 |
+
"step": 3225
|
22721 |
+
},
|
22722 |
+
{
|
22723 |
+
"epoch": 0.5185453084187261,
|
22724 |
+
"grad_norm": 0.7932084202766418,
|
22725 |
+
"learning_rate": 0.0001858295567994874,
|
22726 |
+
"loss": 1.5331,
|
22727 |
+
"step": 3226
|
22728 |
+
},
|
22729 |
+
{
|
22730 |
+
"epoch": 0.5187060478199719,
|
22731 |
+
"grad_norm": 0.854015588760376,
|
22732 |
+
"learning_rate": 0.00018582090358575034,
|
22733 |
+
"loss": 1.5853,
|
22734 |
+
"step": 3227
|
22735 |
+
},
|
22736 |
+
{
|
22737 |
+
"epoch": 0.5188667872212176,
|
22738 |
+
"grad_norm": 0.8538203835487366,
|
22739 |
+
"learning_rate": 0.00018581224793236479,
|
22740 |
+
"loss": 1.4788,
|
22741 |
+
"step": 3228
|
22742 |
+
},
|
22743 |
+
{
|
22744 |
+
"epoch": 0.5190275266224633,
|
22745 |
+
"grad_norm": 0.8172786235809326,
|
22746 |
+
"learning_rate": 0.00018580358983957674,
|
22747 |
+
"loss": 1.4009,
|
22748 |
+
"step": 3229
|
22749 |
+
},
|
22750 |
+
{
|
22751 |
+
"epoch": 0.519188266023709,
|
22752 |
+
"grad_norm": 0.7360570430755615,
|
22753 |
+
"learning_rate": 0.00018579492930763242,
|
22754 |
+
"loss": 1.347,
|
22755 |
+
"step": 3230
|
22756 |
+
},
|
22757 |
+
{
|
22758 |
+
"epoch": 0.5193490054249548,
|
22759 |
+
"grad_norm": 0.7721585035324097,
|
22760 |
+
"learning_rate": 0.00018578626633677795,
|
22761 |
+
"loss": 1.6243,
|
22762 |
+
"step": 3231
|
22763 |
+
},
|
22764 |
+
{
|
22765 |
+
"epoch": 0.5195097448262005,
|
22766 |
+
"grad_norm": 0.9231551289558411,
|
22767 |
+
"learning_rate": 0.0001857776009272596,
|
22768 |
+
"loss": 1.6949,
|
22769 |
+
"step": 3232
|
22770 |
+
},
|
22771 |
+
{
|
22772 |
+
"epoch": 0.5196704842274462,
|
22773 |
+
"grad_norm": 0.837478756904602,
|
22774 |
+
"learning_rate": 0.00018576893307932374,
|
22775 |
+
"loss": 1.5876,
|
22776 |
+
"step": 3233
|
22777 |
+
},
|
22778 |
+
{
|
22779 |
+
"epoch": 0.5198312236286919,
|
22780 |
+
"grad_norm": 0.8564349412918091,
|
22781 |
+
"learning_rate": 0.00018576026279321678,
|
22782 |
+
"loss": 1.4926,
|
22783 |
+
"step": 3234
|
22784 |
+
},
|
22785 |
+
{
|
22786 |
+
"epoch": 0.5199919630299377,
|
22787 |
+
"grad_norm": 0.8425780534744263,
|
22788 |
+
"learning_rate": 0.00018575159006918511,
|
22789 |
+
"loss": 1.6162,
|
22790 |
+
"step": 3235
|
22791 |
+
},
|
22792 |
+
{
|
22793 |
+
"epoch": 0.5201527024311834,
|
22794 |
+
"grad_norm": 0.8263263702392578,
|
22795 |
+
"learning_rate": 0.00018574291490747538,
|
22796 |
+
"loss": 1.55,
|
22797 |
+
"step": 3236
|
22798 |
+
},
|
22799 |
+
{
|
22800 |
+
"epoch": 0.5203134418324292,
|
22801 |
+
"grad_norm": 0.8159098029136658,
|
22802 |
+
"learning_rate": 0.0001857342373083341,
|
22803 |
+
"loss": 1.5944,
|
22804 |
+
"step": 3237
|
22805 |
+
},
|
22806 |
+
{
|
22807 |
+
"epoch": 0.520474181233675,
|
22808 |
+
"grad_norm": 0.7525100708007812,
|
22809 |
+
"learning_rate": 0.00018572555727200803,
|
22810 |
+
"loss": 1.3546,
|
22811 |
+
"step": 3238
|
22812 |
+
},
|
22813 |
+
{
|
22814 |
+
"epoch": 0.5206349206349207,
|
22815 |
+
"grad_norm": 0.7954659461975098,
|
22816 |
+
"learning_rate": 0.00018571687479874386,
|
22817 |
+
"loss": 1.3472,
|
22818 |
+
"step": 3239
|
22819 |
+
},
|
22820 |
+
{
|
22821 |
+
"epoch": 0.5207956600361664,
|
22822 |
+
"grad_norm": 0.882394552230835,
|
22823 |
+
"learning_rate": 0.00018570818988878843,
|
22824 |
+
"loss": 1.5075,
|
22825 |
+
"step": 3240
|
22826 |
+
},
|
22827 |
+
{
|
22828 |
+
"epoch": 0.5209563994374121,
|
22829 |
+
"grad_norm": 0.6685717105865479,
|
22830 |
+
"learning_rate": 0.00018569950254238867,
|
22831 |
+
"loss": 1.1479,
|
22832 |
+
"step": 3241
|
22833 |
+
},
|
22834 |
+
{
|
22835 |
+
"epoch": 0.5211171388386578,
|
22836 |
+
"grad_norm": 0.7421262264251709,
|
22837 |
+
"learning_rate": 0.00018569081275979145,
|
22838 |
+
"loss": 1.4706,
|
22839 |
+
"step": 3242
|
22840 |
+
},
|
22841 |
+
{
|
22842 |
+
"epoch": 0.5212778782399036,
|
22843 |
+
"grad_norm": 0.841813325881958,
|
22844 |
+
"learning_rate": 0.00018568212054124387,
|
22845 |
+
"loss": 1.5812,
|
22846 |
+
"step": 3243
|
22847 |
+
},
|
22848 |
+
{
|
22849 |
+
"epoch": 0.5214386176411493,
|
22850 |
+
"grad_norm": 0.7762925028800964,
|
22851 |
+
"learning_rate": 0.00018567342588699299,
|
22852 |
+
"loss": 1.4953,
|
22853 |
+
"step": 3244
|
22854 |
+
},
|
22855 |
+
{
|
22856 |
+
"epoch": 0.521599357042395,
|
22857 |
+
"grad_norm": 0.9392681121826172,
|
22858 |
+
"learning_rate": 0.000185664728797286,
|
22859 |
+
"loss": 1.5375,
|
22860 |
+
"step": 3245
|
22861 |
+
},
|
22862 |
+
{
|
22863 |
+
"epoch": 0.5217600964436407,
|
22864 |
+
"grad_norm": 0.8206340074539185,
|
22865 |
+
"learning_rate": 0.0001856560292723701,
|
22866 |
+
"loss": 1.5184,
|
22867 |
+
"step": 3246
|
22868 |
+
},
|
22869 |
+
{
|
22870 |
+
"epoch": 0.5219208358448865,
|
22871 |
+
"grad_norm": 0.7266818284988403,
|
22872 |
+
"learning_rate": 0.00018564732731249261,
|
22873 |
+
"loss": 1.3673,
|
22874 |
+
"step": 3247
|
22875 |
+
},
|
22876 |
+
{
|
22877 |
+
"epoch": 0.5220815752461322,
|
22878 |
+
"grad_norm": 0.849731981754303,
|
22879 |
+
"learning_rate": 0.00018563862291790092,
|
22880 |
+
"loss": 1.6291,
|
22881 |
+
"step": 3248
|
22882 |
+
},
|
22883 |
+
{
|
22884 |
+
"epoch": 0.5222423146473779,
|
22885 |
+
"grad_norm": 0.8300021290779114,
|
22886 |
+
"learning_rate": 0.0001856299160888425,
|
22887 |
+
"loss": 1.6345,
|
22888 |
+
"step": 3249
|
22889 |
+
},
|
22890 |
+
{
|
22891 |
+
"epoch": 0.5224030540486236,
|
22892 |
+
"grad_norm": 0.7429481148719788,
|
22893 |
+
"learning_rate": 0.00018562120682556472,
|
22894 |
+
"loss": 1.4749,
|
22895 |
+
"step": 3250
|
22896 |
+
},
|
22897 |
+
{
|
22898 |
+
"epoch": 0.5225637934498694,
|
22899 |
+
"grad_norm": 0.7573351860046387,
|
22900 |
+
"learning_rate": 0.0001856124951283153,
|
22901 |
+
"loss": 1.4571,
|
22902 |
+
"step": 3251
|
22903 |
+
},
|
22904 |
+
{
|
22905 |
+
"epoch": 0.5227245328511151,
|
22906 |
+
"grad_norm": 0.9759644865989685,
|
22907 |
+
"learning_rate": 0.0001856037809973419,
|
22908 |
+
"loss": 1.5486,
|
22909 |
+
"step": 3252
|
22910 |
+
},
|
22911 |
+
{
|
22912 |
+
"epoch": 0.5228852722523608,
|
22913 |
+
"grad_norm": 0.8568567037582397,
|
22914 |
+
"learning_rate": 0.00018559506443289211,
|
22915 |
+
"loss": 1.6903,
|
22916 |
+
"step": 3253
|
22917 |
+
},
|
22918 |
+
{
|
22919 |
+
"epoch": 0.5230460116536065,
|
22920 |
+
"grad_norm": 0.8426090478897095,
|
22921 |
+
"learning_rate": 0.00018558634543521383,
|
22922 |
+
"loss": 1.439,
|
22923 |
+
"step": 3254
|
22924 |
+
},
|
22925 |
+
{
|
22926 |
+
"epoch": 0.5232067510548524,
|
22927 |
+
"grad_norm": 0.6596956849098206,
|
22928 |
+
"learning_rate": 0.00018557762400455484,
|
22929 |
+
"loss": 1.2792,
|
22930 |
+
"step": 3255
|
22931 |
+
},
|
22932 |
+
{
|
22933 |
+
"epoch": 0.5233674904560981,
|
22934 |
+
"grad_norm": 0.917637288570404,
|
22935 |
+
"learning_rate": 0.00018556890014116318,
|
22936 |
+
"loss": 1.776,
|
22937 |
+
"step": 3256
|
22938 |
+
},
|
22939 |
+
{
|
22940 |
+
"epoch": 0.5235282298573438,
|
22941 |
+
"grad_norm": 0.7367154359817505,
|
22942 |
+
"learning_rate": 0.0001855601738452867,
|
22943 |
+
"loss": 1.2243,
|
22944 |
+
"step": 3257
|
22945 |
+
},
|
22946 |
+
{
|
22947 |
+
"epoch": 0.5236889692585895,
|
22948 |
+
"grad_norm": 0.7813432216644287,
|
22949 |
+
"learning_rate": 0.00018555144511717356,
|
22950 |
+
"loss": 1.457,
|
22951 |
+
"step": 3258
|
22952 |
+
},
|
22953 |
+
{
|
22954 |
+
"epoch": 0.5238497086598353,
|
22955 |
+
"grad_norm": 0.6810093522071838,
|
22956 |
+
"learning_rate": 0.00018554271395707187,
|
22957 |
+
"loss": 1.3941,
|
22958 |
+
"step": 3259
|
22959 |
+
},
|
22960 |
+
{
|
22961 |
+
"epoch": 0.524010448061081,
|
22962 |
+
"grad_norm": 0.8087904453277588,
|
22963 |
+
"learning_rate": 0.00018553398036522982,
|
22964 |
+
"loss": 1.3688,
|
22965 |
+
"step": 3260
|
22966 |
+
},
|
22967 |
+
{
|
22968 |
+
"epoch": 0.5241711874623267,
|
22969 |
+
"grad_norm": 0.8361377120018005,
|
22970 |
+
"learning_rate": 0.0001855252443418957,
|
22971 |
+
"loss": 1.5056,
|
22972 |
+
"step": 3261
|
22973 |
+
},
|
22974 |
+
{
|
22975 |
+
"epoch": 0.5243319268635724,
|
22976 |
+
"grad_norm": 0.9677451252937317,
|
22977 |
+
"learning_rate": 0.00018551650588731784,
|
22978 |
+
"loss": 1.5404,
|
22979 |
+
"step": 3262
|
22980 |
+
},
|
22981 |
+
{
|
22982 |
+
"epoch": 0.5244926662648182,
|
22983 |
+
"grad_norm": 0.8938940167427063,
|
22984 |
+
"learning_rate": 0.00018550776500174466,
|
22985 |
+
"loss": 1.6679,
|
22986 |
+
"step": 3263
|
22987 |
+
},
|
22988 |
+
{
|
22989 |
+
"epoch": 0.5246534056660639,
|
22990 |
+
"grad_norm": 0.751091480255127,
|
22991 |
+
"learning_rate": 0.0001854990216854246,
|
22992 |
+
"loss": 1.3274,
|
22993 |
+
"step": 3264
|
22994 |
+
},
|
22995 |
+
{
|
22996 |
+
"epoch": 0.5248141450673096,
|
22997 |
+
"grad_norm": 0.8059028387069702,
|
22998 |
+
"learning_rate": 0.00018549027593860626,
|
22999 |
+
"loss": 1.2289,
|
23000 |
+
"step": 3265
|
23001 |
+
},
|
23002 |
+
{
|
23003 |
+
"epoch": 0.5249748844685553,
|
23004 |
+
"grad_norm": 0.7425819039344788,
|
23005 |
+
"learning_rate": 0.00018548152776153826,
|
23006 |
+
"loss": 1.4981,
|
23007 |
+
"step": 3266
|
23008 |
+
},
|
23009 |
+
{
|
23010 |
+
"epoch": 0.5251356238698011,
|
23011 |
+
"grad_norm": 0.740841805934906,
|
23012 |
+
"learning_rate": 0.00018547277715446923,
|
23013 |
+
"loss": 1.3887,
|
23014 |
+
"step": 3267
|
23015 |
+
},
|
23016 |
+
{
|
23017 |
+
"epoch": 0.5252963632710468,
|
23018 |
+
"grad_norm": 0.6427505016326904,
|
23019 |
+
"learning_rate": 0.00018546402411764797,
|
23020 |
+
"loss": 1.261,
|
23021 |
+
"step": 3268
|
23022 |
+
},
|
23023 |
+
{
|
23024 |
+
"epoch": 0.5254571026722925,
|
23025 |
+
"grad_norm": 0.6863730549812317,
|
23026 |
+
"learning_rate": 0.00018545526865132328,
|
23027 |
+
"loss": 1.2171,
|
23028 |
+
"step": 3269
|
23029 |
+
},
|
23030 |
+
{
|
23031 |
+
"epoch": 0.5256178420735382,
|
23032 |
+
"grad_norm": 0.737600564956665,
|
23033 |
+
"learning_rate": 0.00018544651075574407,
|
23034 |
+
"loss": 1.3609,
|
23035 |
+
"step": 3270
|
23036 |
+
},
|
23037 |
+
{
|
23038 |
+
"epoch": 0.525778581474784,
|
23039 |
+
"grad_norm": 0.7653276324272156,
|
23040 |
+
"learning_rate": 0.0001854377504311593,
|
23041 |
+
"loss": 1.5206,
|
23042 |
+
"step": 3271
|
23043 |
+
},
|
23044 |
+
{
|
23045 |
+
"epoch": 0.5259393208760298,
|
23046 |
+
"grad_norm": 0.8990992307662964,
|
23047 |
+
"learning_rate": 0.000185428987677818,
|
23048 |
+
"loss": 1.4709,
|
23049 |
+
"step": 3272
|
23050 |
+
},
|
23051 |
+
{
|
23052 |
+
"epoch": 0.5261000602772755,
|
23053 |
+
"grad_norm": 0.76966792345047,
|
23054 |
+
"learning_rate": 0.00018542022249596926,
|
23055 |
+
"loss": 1.4829,
|
23056 |
+
"step": 3273
|
23057 |
+
},
|
23058 |
+
{
|
23059 |
+
"epoch": 0.5262607996785212,
|
23060 |
+
"grad_norm": 0.7187246084213257,
|
23061 |
+
"learning_rate": 0.00018541145488586229,
|
23062 |
+
"loss": 1.4161,
|
23063 |
+
"step": 3274
|
23064 |
+
},
|
23065 |
+
{
|
23066 |
+
"epoch": 0.526421539079767,
|
23067 |
+
"grad_norm": 0.8304179906845093,
|
23068 |
+
"learning_rate": 0.0001854026848477463,
|
23069 |
+
"loss": 1.2775,
|
23070 |
+
"step": 3275
|
23071 |
+
},
|
23072 |
+
{
|
23073 |
+
"epoch": 0.5265822784810127,
|
23074 |
+
"grad_norm": 0.761699914932251,
|
23075 |
+
"learning_rate": 0.00018539391238187055,
|
23076 |
+
"loss": 1.3707,
|
23077 |
+
"step": 3276
|
23078 |
+
},
|
23079 |
+
{
|
23080 |
+
"epoch": 0.5267430178822584,
|
23081 |
+
"grad_norm": 0.7719839811325073,
|
23082 |
+
"learning_rate": 0.0001853851374884845,
|
23083 |
+
"loss": 1.3814,
|
23084 |
+
"step": 3277
|
23085 |
+
},
|
23086 |
+
{
|
23087 |
+
"epoch": 0.5269037572835041,
|
23088 |
+
"grad_norm": 0.7285048961639404,
|
23089 |
+
"learning_rate": 0.00018537636016783754,
|
23090 |
+
"loss": 1.4875,
|
23091 |
+
"step": 3278
|
23092 |
+
},
|
23093 |
+
{
|
23094 |
+
"epoch": 0.5270644966847499,
|
23095 |
+
"grad_norm": 0.7954995632171631,
|
23096 |
+
"learning_rate": 0.00018536758042017923,
|
23097 |
+
"loss": 1.4043,
|
23098 |
+
"step": 3279
|
23099 |
+
},
|
23100 |
+
{
|
23101 |
+
"epoch": 0.5272252360859956,
|
23102 |
+
"grad_norm": 0.7614687085151672,
|
23103 |
+
"learning_rate": 0.0001853587982457591,
|
23104 |
+
"loss": 1.4088,
|
23105 |
+
"step": 3280
|
23106 |
+
},
|
23107 |
+
{
|
23108 |
+
"epoch": 0.5273859754872413,
|
23109 |
+
"grad_norm": 0.760796844959259,
|
23110 |
+
"learning_rate": 0.00018535001364482684,
|
23111 |
+
"loss": 1.4955,
|
23112 |
+
"step": 3281
|
23113 |
+
},
|
23114 |
+
{
|
23115 |
+
"epoch": 0.527546714888487,
|
23116 |
+
"grad_norm": 0.7797730565071106,
|
23117 |
+
"learning_rate": 0.00018534122661763216,
|
23118 |
+
"loss": 1.3871,
|
23119 |
+
"step": 3282
|
23120 |
+
},
|
23121 |
+
{
|
23122 |
+
"epoch": 0.5277074542897328,
|
23123 |
+
"grad_norm": 0.8547179698944092,
|
23124 |
+
"learning_rate": 0.00018533243716442485,
|
23125 |
+
"loss": 1.3409,
|
23126 |
+
"step": 3283
|
23127 |
+
},
|
23128 |
+
{
|
23129 |
+
"epoch": 0.5278681936909785,
|
23130 |
+
"grad_norm": 0.8011171817779541,
|
23131 |
+
"learning_rate": 0.00018532364528545475,
|
23132 |
+
"loss": 1.5323,
|
23133 |
+
"step": 3284
|
23134 |
+
},
|
23135 |
+
{
|
23136 |
+
"epoch": 0.5280289330922242,
|
23137 |
+
"grad_norm": 0.7849488258361816,
|
23138 |
+
"learning_rate": 0.00018531485098097184,
|
23139 |
+
"loss": 1.4696,
|
23140 |
+
"step": 3285
|
23141 |
+
},
|
23142 |
+
{
|
23143 |
+
"epoch": 0.5281896724934699,
|
23144 |
+
"grad_norm": 0.8917866349220276,
|
23145 |
+
"learning_rate": 0.00018530605425122611,
|
23146 |
+
"loss": 1.3903,
|
23147 |
+
"step": 3286
|
23148 |
+
},
|
23149 |
+
{
|
23150 |
+
"epoch": 0.5283504118947157,
|
23151 |
+
"grad_norm": 0.8635839223861694,
|
23152 |
+
"learning_rate": 0.00018529725509646756,
|
23153 |
+
"loss": 1.7157,
|
23154 |
+
"step": 3287
|
23155 |
+
},
|
23156 |
+
{
|
23157 |
+
"epoch": 0.5285111512959614,
|
23158 |
+
"grad_norm": 0.7912312746047974,
|
23159 |
+
"learning_rate": 0.0001852884535169464,
|
23160 |
+
"loss": 1.5481,
|
23161 |
+
"step": 3288
|
23162 |
+
},
|
23163 |
+
{
|
23164 |
+
"epoch": 0.5286718906972071,
|
23165 |
+
"grad_norm": 0.7080885767936707,
|
23166 |
+
"learning_rate": 0.00018527964951291277,
|
23167 |
+
"loss": 1.5174,
|
23168 |
+
"step": 3289
|
23169 |
+
},
|
23170 |
+
{
|
23171 |
+
"epoch": 0.5288326300984529,
|
23172 |
+
"grad_norm": 0.8341657519340515,
|
23173 |
+
"learning_rate": 0.000185270843084617,
|
23174 |
+
"loss": 1.4355,
|
23175 |
+
"step": 3290
|
23176 |
+
},
|
23177 |
+
{
|
23178 |
+
"epoch": 0.5289933694996987,
|
23179 |
+
"grad_norm": 0.7897726893424988,
|
23180 |
+
"learning_rate": 0.0001852620342323094,
|
23181 |
+
"loss": 1.3098,
|
23182 |
+
"step": 3291
|
23183 |
+
},
|
23184 |
+
{
|
23185 |
+
"epoch": 0.5291541089009444,
|
23186 |
+
"grad_norm": 0.8012301921844482,
|
23187 |
+
"learning_rate": 0.00018525322295624038,
|
23188 |
+
"loss": 1.3543,
|
23189 |
+
"step": 3292
|
23190 |
+
},
|
23191 |
+
{
|
23192 |
+
"epoch": 0.5293148483021901,
|
23193 |
+
"grad_norm": 0.8380632996559143,
|
23194 |
+
"learning_rate": 0.00018524440925666048,
|
23195 |
+
"loss": 1.6445,
|
23196 |
+
"step": 3293
|
23197 |
+
},
|
23198 |
+
{
|
23199 |
+
"epoch": 0.5294755877034358,
|
23200 |
+
"grad_norm": 0.9531186819076538,
|
23201 |
+
"learning_rate": 0.00018523559313382015,
|
23202 |
+
"loss": 1.8615,
|
23203 |
+
"step": 3294
|
23204 |
+
},
|
23205 |
+
{
|
23206 |
+
"epoch": 0.5296363271046816,
|
23207 |
+
"grad_norm": 0.7453944087028503,
|
23208 |
+
"learning_rate": 0.00018522677458797008,
|
23209 |
+
"loss": 1.2749,
|
23210 |
+
"step": 3295
|
23211 |
+
},
|
23212 |
+
{
|
23213 |
+
"epoch": 0.5297970665059273,
|
23214 |
+
"grad_norm": 0.7704547643661499,
|
23215 |
+
"learning_rate": 0.0001852179536193609,
|
23216 |
+
"loss": 1.6659,
|
23217 |
+
"step": 3296
|
23218 |
+
},
|
23219 |
+
{
|
23220 |
+
"epoch": 0.529957805907173,
|
23221 |
+
"grad_norm": 0.7032475471496582,
|
23222 |
+
"learning_rate": 0.00018520913022824345,
|
23223 |
+
"loss": 1.2315,
|
23224 |
+
"step": 3297
|
23225 |
+
},
|
23226 |
+
{
|
23227 |
+
"epoch": 0.5301185453084187,
|
23228 |
+
"grad_norm": 0.8031439781188965,
|
23229 |
+
"learning_rate": 0.00018520030441486845,
|
23230 |
+
"loss": 1.42,
|
23231 |
+
"step": 3298
|
23232 |
+
},
|
23233 |
+
{
|
23234 |
+
"epoch": 0.5302792847096645,
|
23235 |
+
"grad_norm": 0.7874877452850342,
|
23236 |
+
"learning_rate": 0.0001851914761794869,
|
23237 |
+
"loss": 1.4585,
|
23238 |
+
"step": 3299
|
23239 |
+
},
|
23240 |
+
{
|
23241 |
+
"epoch": 0.5304400241109102,
|
23242 |
+
"grad_norm": 0.7787345051765442,
|
23243 |
+
"learning_rate": 0.00018518264552234967,
|
23244 |
+
"loss": 1.4016,
|
23245 |
+
"step": 3300
|
23246 |
+
},
|
23247 |
+
{
|
23248 |
+
"epoch": 0.5306007635121559,
|
23249 |
+
"grad_norm": 0.8302061557769775,
|
23250 |
+
"learning_rate": 0.00018517381244370783,
|
23251 |
+
"loss": 1.6784,
|
23252 |
+
"step": 3301
|
23253 |
+
},
|
23254 |
+
{
|
23255 |
+
"epoch": 0.5307615029134016,
|
23256 |
+
"grad_norm": 0.6849681735038757,
|
23257 |
+
"learning_rate": 0.0001851649769438125,
|
23258 |
+
"loss": 1.1773,
|
23259 |
+
"step": 3302
|
23260 |
+
},
|
23261 |
+
{
|
23262 |
+
"epoch": 0.5309222423146474,
|
23263 |
+
"grad_norm": 0.8231149911880493,
|
23264 |
+
"learning_rate": 0.00018515613902291483,
|
23265 |
+
"loss": 1.4614,
|
23266 |
+
"step": 3303
|
23267 |
+
},
|
23268 |
+
{
|
23269 |
+
"epoch": 0.5310829817158931,
|
23270 |
+
"grad_norm": 0.8596336245536804,
|
23271 |
+
"learning_rate": 0.00018514729868126603,
|
23272 |
+
"loss": 1.4711,
|
23273 |
+
"step": 3304
|
23274 |
+
},
|
23275 |
+
{
|
23276 |
+
"epoch": 0.5312437211171388,
|
23277 |
+
"grad_norm": 0.8970978260040283,
|
23278 |
+
"learning_rate": 0.00018513845591911746,
|
23279 |
+
"loss": 1.9615,
|
23280 |
+
"step": 3305
|
23281 |
+
},
|
23282 |
+
{
|
23283 |
+
"epoch": 0.5314044605183845,
|
23284 |
+
"grad_norm": 0.8078962564468384,
|
23285 |
+
"learning_rate": 0.00018512961073672044,
|
23286 |
+
"loss": 1.7241,
|
23287 |
+
"step": 3306
|
23288 |
+
},
|
23289 |
+
{
|
23290 |
+
"epoch": 0.5315651999196302,
|
23291 |
+
"grad_norm": 1.2718873023986816,
|
23292 |
+
"learning_rate": 0.00018512076313432645,
|
23293 |
+
"loss": 1.3717,
|
23294 |
+
"step": 3307
|
23295 |
+
},
|
23296 |
+
{
|
23297 |
+
"epoch": 0.5317259393208761,
|
23298 |
+
"grad_norm": 0.8012800812721252,
|
23299 |
+
"learning_rate": 0.000185111913112187,
|
23300 |
+
"loss": 1.6785,
|
23301 |
+
"step": 3308
|
23302 |
+
},
|
23303 |
+
{
|
23304 |
+
"epoch": 0.5318866787221218,
|
23305 |
+
"grad_norm": 0.7391458749771118,
|
23306 |
+
"learning_rate": 0.00018510306067055364,
|
23307 |
+
"loss": 1.3205,
|
23308 |
+
"step": 3309
|
23309 |
+
},
|
23310 |
+
{
|
23311 |
+
"epoch": 0.5320474181233675,
|
23312 |
+
"grad_norm": 0.8265613913536072,
|
23313 |
+
"learning_rate": 0.00018509420580967807,
|
23314 |
+
"loss": 1.3645,
|
23315 |
+
"step": 3310
|
23316 |
+
},
|
23317 |
+
{
|
23318 |
+
"epoch": 0.5322081575246133,
|
23319 |
+
"grad_norm": 0.8628754019737244,
|
23320 |
+
"learning_rate": 0.00018508534852981198,
|
23321 |
+
"loss": 1.4986,
|
23322 |
+
"step": 3311
|
23323 |
+
},
|
23324 |
+
{
|
23325 |
+
"epoch": 0.532368896925859,
|
23326 |
+
"grad_norm": 0.7587042450904846,
|
23327 |
+
"learning_rate": 0.00018507648883120715,
|
23328 |
+
"loss": 1.3636,
|
23329 |
+
"step": 3312
|
23330 |
+
},
|
23331 |
+
{
|
23332 |
+
"epoch": 0.5325296363271047,
|
23333 |
+
"grad_norm": 0.8225183486938477,
|
23334 |
+
"learning_rate": 0.00018506762671411547,
|
23335 |
+
"loss": 1.3968,
|
23336 |
+
"step": 3313
|
23337 |
+
},
|
23338 |
+
{
|
23339 |
+
"epoch": 0.5326903757283504,
|
23340 |
+
"grad_norm": 0.8508570194244385,
|
23341 |
+
"learning_rate": 0.00018505876217878882,
|
23342 |
+
"loss": 1.4494,
|
23343 |
+
"step": 3314
|
23344 |
+
},
|
23345 |
+
{
|
23346 |
+
"epoch": 0.5328511151295962,
|
23347 |
+
"grad_norm": 0.8812022805213928,
|
23348 |
+
"learning_rate": 0.0001850498952254792,
|
23349 |
+
"loss": 1.6349,
|
23350 |
+
"step": 3315
|
23351 |
+
},
|
23352 |
+
{
|
23353 |
+
"epoch": 0.5330118545308419,
|
23354 |
+
"grad_norm": 0.8578895330429077,
|
23355 |
+
"learning_rate": 0.00018504102585443875,
|
23356 |
+
"loss": 1.5098,
|
23357 |
+
"step": 3316
|
23358 |
+
},
|
23359 |
+
{
|
23360 |
+
"epoch": 0.5331725939320876,
|
23361 |
+
"grad_norm": 0.7924550771713257,
|
23362 |
+
"learning_rate": 0.00018503215406591949,
|
23363 |
+
"loss": 1.4885,
|
23364 |
+
"step": 3317
|
23365 |
+
},
|
23366 |
+
{
|
23367 |
+
"epoch": 0.5333333333333333,
|
23368 |
+
"grad_norm": 0.7468888759613037,
|
23369 |
+
"learning_rate": 0.0001850232798601737,
|
23370 |
+
"loss": 1.4191,
|
23371 |
+
"step": 3318
|
23372 |
+
},
|
23373 |
+
{
|
23374 |
+
"epoch": 0.533494072734579,
|
23375 |
+
"grad_norm": 0.7940313220024109,
|
23376 |
+
"learning_rate": 0.00018501440323745357,
|
23377 |
+
"loss": 1.4326,
|
23378 |
+
"step": 3319
|
23379 |
+
},
|
23380 |
+
{
|
23381 |
+
"epoch": 0.5336548121358248,
|
23382 |
+
"grad_norm": 0.6616223454475403,
|
23383 |
+
"learning_rate": 0.00018500552419801154,
|
23384 |
+
"loss": 1.3375,
|
23385 |
+
"step": 3320
|
23386 |
+
},
|
23387 |
+
{
|
23388 |
+
"epoch": 0.5338155515370705,
|
23389 |
+
"grad_norm": 0.7967734932899475,
|
23390 |
+
"learning_rate": 0.00018499664274209993,
|
23391 |
+
"loss": 1.5282,
|
23392 |
+
"step": 3321
|
23393 |
+
},
|
23394 |
+
{
|
23395 |
+
"epoch": 0.5339762909383162,
|
23396 |
+
"grad_norm": 0.8318758010864258,
|
23397 |
+
"learning_rate": 0.0001849877588699713,
|
23398 |
+
"loss": 1.585,
|
23399 |
+
"step": 3322
|
23400 |
+
},
|
23401 |
+
{
|
23402 |
+
"epoch": 0.534137030339562,
|
23403 |
+
"grad_norm": 0.7232612371444702,
|
23404 |
+
"learning_rate": 0.0001849788725818781,
|
23405 |
+
"loss": 1.437,
|
23406 |
+
"step": 3323
|
23407 |
+
},
|
23408 |
+
{
|
23409 |
+
"epoch": 0.5342977697408077,
|
23410 |
+
"grad_norm": 0.7981652021408081,
|
23411 |
+
"learning_rate": 0.00018496998387807298,
|
23412 |
+
"loss": 1.3855,
|
23413 |
+
"step": 3324
|
23414 |
+
},
|
23415 |
+
{
|
23416 |
+
"epoch": 0.5344585091420534,
|
23417 |
+
"grad_norm": 0.743877112865448,
|
23418 |
+
"learning_rate": 0.00018496109275880864,
|
23419 |
+
"loss": 1.3545,
|
23420 |
+
"step": 3325
|
23421 |
+
},
|
23422 |
+
{
|
23423 |
+
"epoch": 0.5346192485432992,
|
23424 |
+
"grad_norm": 0.6819697022438049,
|
23425 |
+
"learning_rate": 0.00018495219922433782,
|
23426 |
+
"loss": 1.3887,
|
23427 |
+
"step": 3326
|
23428 |
+
},
|
23429 |
+
{
|
23430 |
+
"epoch": 0.534779987944545,
|
23431 |
+
"grad_norm": 0.7201501131057739,
|
23432 |
+
"learning_rate": 0.00018494330327491331,
|
23433 |
+
"loss": 1.3843,
|
23434 |
+
"step": 3327
|
23435 |
+
},
|
23436 |
+
{
|
23437 |
+
"epoch": 0.5349407273457907,
|
23438 |
+
"grad_norm": 0.8287776112556458,
|
23439 |
+
"learning_rate": 0.00018493440491078805,
|
23440 |
+
"loss": 1.3751,
|
23441 |
+
"step": 3328
|
23442 |
+
},
|
23443 |
+
{
|
23444 |
+
"epoch": 0.5351014667470364,
|
23445 |
+
"grad_norm": 0.8437519073486328,
|
23446 |
+
"learning_rate": 0.00018492550413221496,
|
23447 |
+
"loss": 1.5746,
|
23448 |
+
"step": 3329
|
23449 |
+
},
|
23450 |
+
{
|
23451 |
+
"epoch": 0.5352622061482821,
|
23452 |
+
"grad_norm": 0.7958610653877258,
|
23453 |
+
"learning_rate": 0.00018491660093944705,
|
23454 |
+
"loss": 1.5523,
|
23455 |
+
"step": 3330
|
23456 |
+
},
|
23457 |
+
{
|
23458 |
+
"epoch": 0.5354229455495279,
|
23459 |
+
"grad_norm": 0.8160830736160278,
|
23460 |
+
"learning_rate": 0.00018490769533273746,
|
23461 |
+
"loss": 1.5238,
|
23462 |
+
"step": 3331
|
23463 |
+
},
|
23464 |
+
{
|
23465 |
+
"epoch": 0.5355836849507736,
|
23466 |
+
"grad_norm": 0.8264061212539673,
|
23467 |
+
"learning_rate": 0.0001848987873123393,
|
23468 |
+
"loss": 1.5469,
|
23469 |
+
"step": 3332
|
23470 |
+
},
|
23471 |
+
{
|
23472 |
+
"epoch": 0.5357444243520193,
|
23473 |
+
"grad_norm": 0.7421874403953552,
|
23474 |
+
"learning_rate": 0.00018488987687850585,
|
23475 |
+
"loss": 1.3911,
|
23476 |
+
"step": 3333
|
23477 |
+
},
|
23478 |
+
{
|
23479 |
+
"epoch": 0.535905163753265,
|
23480 |
+
"grad_norm": 0.882710337638855,
|
23481 |
+
"learning_rate": 0.00018488096403149038,
|
23482 |
+
"loss": 1.5791,
|
23483 |
+
"step": 3334
|
23484 |
+
},
|
23485 |
+
{
|
23486 |
+
"epoch": 0.5360659031545107,
|
23487 |
+
"grad_norm": 0.708991289138794,
|
23488 |
+
"learning_rate": 0.00018487204877154625,
|
23489 |
+
"loss": 1.4892,
|
23490 |
+
"step": 3335
|
23491 |
+
},
|
23492 |
+
{
|
23493 |
+
"epoch": 0.5362266425557565,
|
23494 |
+
"grad_norm": 0.8283132910728455,
|
23495 |
+
"learning_rate": 0.00018486313109892692,
|
23496 |
+
"loss": 1.4233,
|
23497 |
+
"step": 3336
|
23498 |
+
},
|
23499 |
+
{
|
23500 |
+
"epoch": 0.5363873819570022,
|
23501 |
+
"grad_norm": 0.7657718658447266,
|
23502 |
+
"learning_rate": 0.00018485421101388586,
|
23503 |
+
"loss": 1.3739,
|
23504 |
+
"step": 3337
|
23505 |
+
},
|
23506 |
+
{
|
23507 |
+
"epoch": 0.5365481213582479,
|
23508 |
+
"grad_norm": 0.8362711071968079,
|
23509 |
+
"learning_rate": 0.0001848452885166767,
|
23510 |
+
"loss": 1.5645,
|
23511 |
+
"step": 3338
|
23512 |
+
},
|
23513 |
+
{
|
23514 |
+
"epoch": 0.5367088607594936,
|
23515 |
+
"grad_norm": 0.8162758946418762,
|
23516 |
+
"learning_rate": 0.00018483636360755302,
|
23517 |
+
"loss": 1.4889,
|
23518 |
+
"step": 3339
|
23519 |
+
},
|
23520 |
+
{
|
23521 |
+
"epoch": 0.5368696001607394,
|
23522 |
+
"grad_norm": 0.8083055019378662,
|
23523 |
+
"learning_rate": 0.00018482743628676855,
|
23524 |
+
"loss": 1.5101,
|
23525 |
+
"step": 3340
|
23526 |
+
},
|
23527 |
+
{
|
23528 |
+
"epoch": 0.5370303395619851,
|
23529 |
+
"grad_norm": 0.7159759402275085,
|
23530 |
+
"learning_rate": 0.0001848185065545771,
|
23531 |
+
"loss": 1.2628,
|
23532 |
+
"step": 3341
|
23533 |
+
},
|
23534 |
+
{
|
23535 |
+
"epoch": 0.5371910789632308,
|
23536 |
+
"grad_norm": 0.8316575884819031,
|
23537 |
+
"learning_rate": 0.00018480957441123244,
|
23538 |
+
"loss": 1.4327,
|
23539 |
+
"step": 3342
|
23540 |
+
},
|
23541 |
+
{
|
23542 |
+
"epoch": 0.5373518183644765,
|
23543 |
+
"grad_norm": 0.8548365831375122,
|
23544 |
+
"learning_rate": 0.00018480063985698862,
|
23545 |
+
"loss": 1.5206,
|
23546 |
+
"step": 3343
|
23547 |
+
},
|
23548 |
+
{
|
23549 |
+
"epoch": 0.5375125577657224,
|
23550 |
+
"grad_norm": 0.8024420738220215,
|
23551 |
+
"learning_rate": 0.0001847917028920995,
|
23552 |
+
"loss": 1.4826,
|
23553 |
+
"step": 3344
|
23554 |
+
},
|
23555 |
+
{
|
23556 |
+
"epoch": 0.5376732971669681,
|
23557 |
+
"grad_norm": 0.8056092858314514,
|
23558 |
+
"learning_rate": 0.00018478276351681917,
|
23559 |
+
"loss": 1.6029,
|
23560 |
+
"step": 3345
|
23561 |
+
},
|
23562 |
+
{
|
23563 |
+
"epoch": 0.5378340365682138,
|
23564 |
+
"grad_norm": 0.7622305154800415,
|
23565 |
+
"learning_rate": 0.00018477382173140176,
|
23566 |
+
"loss": 1.4532,
|
23567 |
+
"step": 3346
|
23568 |
+
},
|
23569 |
+
{
|
23570 |
+
"epoch": 0.5379947759694595,
|
23571 |
+
"grad_norm": 0.7563185095787048,
|
23572 |
+
"learning_rate": 0.00018476487753610145,
|
23573 |
+
"loss": 1.4579,
|
23574 |
+
"step": 3347
|
23575 |
+
},
|
23576 |
+
{
|
23577 |
+
"epoch": 0.5381555153707053,
|
23578 |
+
"grad_norm": 0.714819073677063,
|
23579 |
+
"learning_rate": 0.00018475593093117255,
|
23580 |
+
"loss": 1.4069,
|
23581 |
+
"step": 3348
|
23582 |
+
},
|
23583 |
+
{
|
23584 |
+
"epoch": 0.538316254771951,
|
23585 |
+
"grad_norm": 0.8602470755577087,
|
23586 |
+
"learning_rate": 0.0001847469819168693,
|
23587 |
+
"loss": 1.5044,
|
23588 |
+
"step": 3349
|
23589 |
+
},
|
23590 |
+
{
|
23591 |
+
"epoch": 0.5384769941731967,
|
23592 |
+
"grad_norm": 0.8590700030326843,
|
23593 |
+
"learning_rate": 0.00018473803049344617,
|
23594 |
+
"loss": 1.4614,
|
23595 |
+
"step": 3350
|
23596 |
+
},
|
23597 |
+
{
|
23598 |
+
"epoch": 0.5386377335744424,
|
23599 |
+
"grad_norm": 0.9124669432640076,
|
23600 |
+
"learning_rate": 0.00018472907666115755,
|
23601 |
+
"loss": 1.6651,
|
23602 |
+
"step": 3351
|
23603 |
+
},
|
23604 |
+
{
|
23605 |
+
"epoch": 0.5387984729756882,
|
23606 |
+
"grad_norm": 0.8647240996360779,
|
23607 |
+
"learning_rate": 0.00018472012042025803,
|
23608 |
+
"loss": 1.4227,
|
23609 |
+
"step": 3352
|
23610 |
+
},
|
23611 |
+
{
|
23612 |
+
"epoch": 0.5389592123769339,
|
23613 |
+
"grad_norm": 0.7770558595657349,
|
23614 |
+
"learning_rate": 0.00018471116177100222,
|
23615 |
+
"loss": 1.5086,
|
23616 |
+
"step": 3353
|
23617 |
+
},
|
23618 |
+
{
|
23619 |
+
"epoch": 0.5391199517781796,
|
23620 |
+
"grad_norm": 0.7929955720901489,
|
23621 |
+
"learning_rate": 0.00018470220071364476,
|
23622 |
+
"loss": 1.3672,
|
23623 |
+
"step": 3354
|
23624 |
+
},
|
23625 |
+
{
|
23626 |
+
"epoch": 0.5392806911794253,
|
23627 |
+
"grad_norm": 0.8219648599624634,
|
23628 |
+
"learning_rate": 0.00018469323724844036,
|
23629 |
+
"loss": 1.4248,
|
23630 |
+
"step": 3355
|
23631 |
+
},
|
23632 |
+
{
|
23633 |
+
"epoch": 0.5394414305806711,
|
23634 |
+
"grad_norm": 0.8101730346679688,
|
23635 |
+
"learning_rate": 0.0001846842713756439,
|
23636 |
+
"loss": 1.4145,
|
23637 |
+
"step": 3356
|
23638 |
+
},
|
23639 |
+
{
|
23640 |
+
"epoch": 0.5396021699819168,
|
23641 |
+
"grad_norm": 0.7860954999923706,
|
23642 |
+
"learning_rate": 0.00018467530309551017,
|
23643 |
+
"loss": 1.4025,
|
23644 |
+
"step": 3357
|
23645 |
+
},
|
23646 |
+
{
|
23647 |
+
"epoch": 0.5397629093831625,
|
23648 |
+
"grad_norm": 0.7054822444915771,
|
23649 |
+
"learning_rate": 0.0001846663324082942,
|
23650 |
+
"loss": 1.2522,
|
23651 |
+
"step": 3358
|
23652 |
+
},
|
23653 |
+
{
|
23654 |
+
"epoch": 0.5399236487844082,
|
23655 |
+
"grad_norm": 0.7910926938056946,
|
23656 |
+
"learning_rate": 0.00018465735931425092,
|
23657 |
+
"loss": 1.5539,
|
23658 |
+
"step": 3359
|
23659 |
+
},
|
23660 |
+
{
|
23661 |
+
"epoch": 0.540084388185654,
|
23662 |
+
"grad_norm": 0.8198317289352417,
|
23663 |
+
"learning_rate": 0.00018464838381363548,
|
23664 |
+
"loss": 1.4939,
|
23665 |
+
"step": 3360
|
23666 |
+
},
|
23667 |
+
{
|
23668 |
+
"epoch": 0.5402451275868997,
|
23669 |
+
"grad_norm": 0.9830654859542847,
|
23670 |
+
"learning_rate": 0.00018463940590670298,
|
23671 |
+
"loss": 1.6687,
|
23672 |
+
"step": 3361
|
23673 |
+
},
|
23674 |
+
{
|
23675 |
+
"epoch": 0.5404058669881455,
|
23676 |
+
"grad_norm": 0.7338623404502869,
|
23677 |
+
"learning_rate": 0.00018463042559370866,
|
23678 |
+
"loss": 1.4941,
|
23679 |
+
"step": 3362
|
23680 |
+
},
|
23681 |
+
{
|
23682 |
+
"epoch": 0.5405666063893912,
|
23683 |
+
"grad_norm": 0.8205195069313049,
|
23684 |
+
"learning_rate": 0.0001846214428749078,
|
23685 |
+
"loss": 1.3692,
|
23686 |
+
"step": 3363
|
23687 |
+
},
|
23688 |
+
{
|
23689 |
+
"epoch": 0.540727345790637,
|
23690 |
+
"grad_norm": 0.7883062362670898,
|
23691 |
+
"learning_rate": 0.0001846124577505558,
|
23692 |
+
"loss": 1.5084,
|
23693 |
+
"step": 3364
|
23694 |
+
},
|
23695 |
+
{
|
23696 |
+
"epoch": 0.5408880851918827,
|
23697 |
+
"grad_norm": 0.8614766001701355,
|
23698 |
+
"learning_rate": 0.00018460347022090793,
|
23699 |
+
"loss": 1.5657,
|
23700 |
+
"step": 3365
|
23701 |
+
},
|
23702 |
+
{
|
23703 |
+
"epoch": 0.5410488245931284,
|
23704 |
+
"grad_norm": 0.6877971291542053,
|
23705 |
+
"learning_rate": 0.00018459448028621987,
|
23706 |
+
"loss": 1.2782,
|
23707 |
+
"step": 3366
|
23708 |
+
},
|
23709 |
+
{
|
23710 |
+
"epoch": 0.5412095639943741,
|
23711 |
+
"grad_norm": 0.8057857751846313,
|
23712 |
+
"learning_rate": 0.00018458548794674705,
|
23713 |
+
"loss": 1.2792,
|
23714 |
+
"step": 3367
|
23715 |
+
},
|
23716 |
+
{
|
23717 |
+
"epoch": 0.5413703033956199,
|
23718 |
+
"grad_norm": 0.8015730381011963,
|
23719 |
+
"learning_rate": 0.00018457649320274518,
|
23720 |
+
"loss": 1.547,
|
23721 |
+
"step": 3368
|
23722 |
+
},
|
23723 |
+
{
|
23724 |
+
"epoch": 0.5415310427968656,
|
23725 |
+
"grad_norm": 0.8146819472312927,
|
23726 |
+
"learning_rate": 0.0001845674960544699,
|
23727 |
+
"loss": 1.4916,
|
23728 |
+
"step": 3369
|
23729 |
+
},
|
23730 |
+
{
|
23731 |
+
"epoch": 0.5416917821981113,
|
23732 |
+
"grad_norm": 0.650114893913269,
|
23733 |
+
"learning_rate": 0.00018455849650217697,
|
23734 |
+
"loss": 1.2217,
|
23735 |
+
"step": 3370
|
23736 |
+
},
|
23737 |
+
{
|
23738 |
+
"epoch": 0.541852521599357,
|
23739 |
+
"grad_norm": 0.8225253224372864,
|
23740 |
+
"learning_rate": 0.00018454949454612226,
|
23741 |
+
"loss": 1.487,
|
23742 |
+
"step": 3371
|
23743 |
+
},
|
23744 |
+
{
|
23745 |
+
"epoch": 0.5420132610006028,
|
23746 |
+
"grad_norm": 0.7451386451721191,
|
23747 |
+
"learning_rate": 0.00018454049018656163,
|
23748 |
+
"loss": 1.4001,
|
23749 |
+
"step": 3372
|
23750 |
+
},
|
23751 |
+
{
|
23752 |
+
"epoch": 0.5421740004018485,
|
23753 |
+
"grad_norm": 0.8658048510551453,
|
23754 |
+
"learning_rate": 0.00018453148342375107,
|
23755 |
+
"loss": 1.3773,
|
23756 |
+
"step": 3373
|
23757 |
+
},
|
23758 |
+
{
|
23759 |
+
"epoch": 0.5423347398030942,
|
23760 |
+
"grad_norm": 0.7653456926345825,
|
23761 |
+
"learning_rate": 0.00018452247425794667,
|
23762 |
+
"loss": 1.5931,
|
23763 |
+
"step": 3374
|
23764 |
+
},
|
23765 |
+
{
|
23766 |
+
"epoch": 0.5424954792043399,
|
23767 |
+
"grad_norm": 0.7824321985244751,
|
23768 |
+
"learning_rate": 0.00018451346268940445,
|
23769 |
+
"loss": 1.7242,
|
23770 |
+
"step": 3375
|
23771 |
+
},
|
23772 |
+
{
|
23773 |
+
"epoch": 0.5426562186055857,
|
23774 |
+
"grad_norm": 0.8325675129890442,
|
23775 |
+
"learning_rate": 0.00018450444871838062,
|
23776 |
+
"loss": 1.5107,
|
23777 |
+
"step": 3376
|
23778 |
+
},
|
23779 |
+
{
|
23780 |
+
"epoch": 0.5428169580068314,
|
23781 |
+
"grad_norm": 0.8157557249069214,
|
23782 |
+
"learning_rate": 0.00018449543234513142,
|
23783 |
+
"loss": 1.4285,
|
23784 |
+
"step": 3377
|
23785 |
+
},
|
23786 |
+
{
|
23787 |
+
"epoch": 0.5429776974080771,
|
23788 |
+
"grad_norm": 0.8847950100898743,
|
23789 |
+
"learning_rate": 0.00018448641356991313,
|
23790 |
+
"loss": 1.4829,
|
23791 |
+
"step": 3378
|
23792 |
+
},
|
23793 |
+
{
|
23794 |
+
"epoch": 0.5431384368093228,
|
23795 |
+
"grad_norm": 0.856139063835144,
|
23796 |
+
"learning_rate": 0.0001844773923929822,
|
23797 |
+
"loss": 1.3546,
|
23798 |
+
"step": 3379
|
23799 |
+
},
|
23800 |
+
{
|
23801 |
+
"epoch": 0.5432991762105687,
|
23802 |
+
"grad_norm": 0.7851885557174683,
|
23803 |
+
"learning_rate": 0.00018446836881459504,
|
23804 |
+
"loss": 1.5413,
|
23805 |
+
"step": 3380
|
23806 |
+
},
|
23807 |
+
{
|
23808 |
+
"epoch": 0.5434599156118144,
|
23809 |
+
"grad_norm": 0.8247860074043274,
|
23810 |
+
"learning_rate": 0.00018445934283500815,
|
23811 |
+
"loss": 1.3986,
|
23812 |
+
"step": 3381
|
23813 |
+
},
|
23814 |
+
{
|
23815 |
+
"epoch": 0.5436206550130601,
|
23816 |
+
"grad_norm": 0.8349491357803345,
|
23817 |
+
"learning_rate": 0.00018445031445447812,
|
23818 |
+
"loss": 1.5491,
|
23819 |
+
"step": 3382
|
23820 |
+
},
|
23821 |
+
{
|
23822 |
+
"epoch": 0.5437813944143058,
|
23823 |
+
"grad_norm": 0.775696337223053,
|
23824 |
+
"learning_rate": 0.0001844412836732616,
|
23825 |
+
"loss": 1.2537,
|
23826 |
+
"step": 3383
|
23827 |
+
},
|
23828 |
+
{
|
23829 |
+
"epoch": 0.5439421338155516,
|
23830 |
+
"grad_norm": 0.7671049237251282,
|
23831 |
+
"learning_rate": 0.0001844322504916153,
|
23832 |
+
"loss": 1.3845,
|
23833 |
+
"step": 3384
|
23834 |
+
},
|
23835 |
+
{
|
23836 |
+
"epoch": 0.5441028732167973,
|
23837 |
+
"grad_norm": 0.8033993244171143,
|
23838 |
+
"learning_rate": 0.00018442321490979603,
|
23839 |
+
"loss": 1.5031,
|
23840 |
+
"step": 3385
|
23841 |
+
},
|
23842 |
+
{
|
23843 |
+
"epoch": 0.544263612618043,
|
23844 |
+
"grad_norm": 0.8441072106361389,
|
23845 |
+
"learning_rate": 0.00018441417692806068,
|
23846 |
+
"loss": 1.3995,
|
23847 |
+
"step": 3386
|
23848 |
+
},
|
23849 |
+
{
|
23850 |
+
"epoch": 0.5444243520192887,
|
23851 |
+
"grad_norm": 0.907695472240448,
|
23852 |
+
"learning_rate": 0.0001844051365466661,
|
23853 |
+
"loss": 1.1331,
|
23854 |
+
"step": 3387
|
23855 |
+
},
|
23856 |
+
{
|
23857 |
+
"epoch": 0.5445850914205345,
|
23858 |
+
"grad_norm": 0.7442054748535156,
|
23859 |
+
"learning_rate": 0.0001843960937658693,
|
23860 |
+
"loss": 1.2081,
|
23861 |
+
"step": 3388
|
23862 |
+
},
|
23863 |
+
{
|
23864 |
+
"epoch": 0.5447458308217802,
|
23865 |
+
"grad_norm": 0.8805733323097229,
|
23866 |
+
"learning_rate": 0.0001843870485859274,
|
23867 |
+
"loss": 1.8348,
|
23868 |
+
"step": 3389
|
23869 |
+
},
|
23870 |
+
{
|
23871 |
+
"epoch": 0.5449065702230259,
|
23872 |
+
"grad_norm": 0.7400468587875366,
|
23873 |
+
"learning_rate": 0.00018437800100709744,
|
23874 |
+
"loss": 1.3888,
|
23875 |
+
"step": 3390
|
23876 |
+
},
|
23877 |
+
{
|
23878 |
+
"epoch": 0.5450673096242716,
|
23879 |
+
"grad_norm": 0.7798414826393127,
|
23880 |
+
"learning_rate": 0.0001843689510296367,
|
23881 |
+
"loss": 1.5811,
|
23882 |
+
"step": 3391
|
23883 |
+
},
|
23884 |
+
{
|
23885 |
+
"epoch": 0.5452280490255174,
|
23886 |
+
"grad_norm": 0.7735156416893005,
|
23887 |
+
"learning_rate": 0.00018435989865380243,
|
23888 |
+
"loss": 1.4198,
|
23889 |
+
"step": 3392
|
23890 |
+
},
|
23891 |
+
{
|
23892 |
+
"epoch": 0.5453887884267631,
|
23893 |
+
"grad_norm": 0.7765915989875793,
|
23894 |
+
"learning_rate": 0.00018435084387985194,
|
23895 |
+
"loss": 1.3977,
|
23896 |
+
"step": 3393
|
23897 |
+
},
|
23898 |
+
{
|
23899 |
+
"epoch": 0.5455495278280088,
|
23900 |
+
"grad_norm": 0.7338336706161499,
|
23901 |
+
"learning_rate": 0.0001843417867080426,
|
23902 |
+
"loss": 1.2919,
|
23903 |
+
"step": 3394
|
23904 |
+
},
|
23905 |
+
{
|
23906 |
+
"epoch": 0.5457102672292545,
|
23907 |
+
"grad_norm": 0.69191974401474,
|
23908 |
+
"learning_rate": 0.0001843327271386319,
|
23909 |
+
"loss": 1.2431,
|
23910 |
+
"step": 3395
|
23911 |
+
},
|
23912 |
+
{
|
23913 |
+
"epoch": 0.5458710066305003,
|
23914 |
+
"grad_norm": 0.7926873564720154,
|
23915 |
+
"learning_rate": 0.00018432366517187745,
|
23916 |
+
"loss": 1.6068,
|
23917 |
+
"step": 3396
|
23918 |
+
},
|
23919 |
+
{
|
23920 |
+
"epoch": 0.546031746031746,
|
23921 |
+
"grad_norm": 0.8520327806472778,
|
23922 |
+
"learning_rate": 0.00018431460080803677,
|
23923 |
+
"loss": 1.5555,
|
23924 |
+
"step": 3397
|
23925 |
+
},
|
23926 |
+
{
|
23927 |
+
"epoch": 0.5461924854329918,
|
23928 |
+
"grad_norm": 0.8541330099105835,
|
23929 |
+
"learning_rate": 0.00018430553404736756,
|
23930 |
+
"loss": 1.5456,
|
23931 |
+
"step": 3398
|
23932 |
+
},
|
23933 |
+
{
|
23934 |
+
"epoch": 0.5463532248342375,
|
23935 |
+
"grad_norm": 0.732483983039856,
|
23936 |
+
"learning_rate": 0.00018429646489012759,
|
23937 |
+
"loss": 1.4058,
|
23938 |
+
"step": 3399
|
23939 |
+
},
|
23940 |
+
{
|
23941 |
+
"epoch": 0.5465139642354833,
|
23942 |
+
"grad_norm": 0.7677106261253357,
|
23943 |
+
"learning_rate": 0.00018428739333657465,
|
23944 |
+
"loss": 1.5872,
|
23945 |
+
"step": 3400
|
23946 |
+
},
|
23947 |
+
{
|
23948 |
+
"epoch": 0.5465139642354833,
|
23949 |
+
"eval_loss": 1.4874334335327148,
|
23950 |
+
"eval_runtime": 46.2584,
|
23951 |
+
"eval_samples_per_second": 5.426,
|
23952 |
+
"eval_steps_per_second": 2.724,
|
23953 |
+
"step": 3400
|
23954 |
}
|
23955 |
],
|
23956 |
"logging_steps": 1,
|
|
|
23979 |
"attributes": {}
|
23980 |
}
|
23981 |
},
|
23982 |
+
"total_flos": 5.937115857813504e+17,
|
23983 |
"train_batch_size": 2,
|
23984 |
"trial_name": null,
|
23985 |
"trial_params": null
|