Training in progress, step 3800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 323014168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b2a4be21af2c716b0cc4a792975c4bfff2e3ba8feb99e48c8a407b17faf0d95
|
3 |
size 323014168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 164465012
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3486fe5446accc38211ef993b3a0774d04ab835c7c05affb4bd1e27e536b974
|
3 |
size 164465012
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffa1608bad0f0f33c0aa1a62856d5d0bd1c9cab4a1caff7aa97c3865e5010bcd
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f38c45f6b71d9887fe4a7a3bf5fddf7a4cea54583cb12f0862158864127ae5d4
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -25359,6 +25359,1414 @@
|
|
25359 |
"eval_samples_per_second": 5.429,
|
25360 |
"eval_steps_per_second": 2.725,
|
25361 |
"step": 3600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25362 |
}
|
25363 |
],
|
25364 |
"logging_steps": 1,
|
@@ -25387,7 +26795,7 @@
|
|
25387 |
"attributes": {}
|
25388 |
}
|
25389 |
},
|
25390 |
-
"total_flos": 6.
|
25391 |
"train_batch_size": 2,
|
25392 |
"trial_name": null,
|
25393 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.4838331937789917,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-3800",
|
4 |
+
"epoch": 0.6108097247337754,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 3800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
25359 |
"eval_samples_per_second": 5.429,
|
25360 |
"eval_steps_per_second": 2.725,
|
25361 |
"step": 3600
|
25362 |
+
},
|
25363 |
+
{
|
25364 |
+
"epoch": 0.578822583885875,
|
25365 |
+
"grad_norm": 0.7528217434883118,
|
25366 |
+
"learning_rate": 0.00018241572242947112,
|
25367 |
+
"loss": 1.4148,
|
25368 |
+
"step": 3601
|
25369 |
+
},
|
25370 |
+
{
|
25371 |
+
"epoch": 0.5789833232871208,
|
25372 |
+
"grad_norm": 0.8018618226051331,
|
25373 |
+
"learning_rate": 0.0001824061722010477,
|
25374 |
+
"loss": 1.2186,
|
25375 |
+
"step": 3602
|
25376 |
+
},
|
25377 |
+
{
|
25378 |
+
"epoch": 0.5791440626883665,
|
25379 |
+
"grad_norm": 0.8759437203407288,
|
25380 |
+
"learning_rate": 0.00018239661963004704,
|
25381 |
+
"loss": 1.5752,
|
25382 |
+
"step": 3603
|
25383 |
+
},
|
25384 |
+
{
|
25385 |
+
"epoch": 0.5793048020896122,
|
25386 |
+
"grad_norm": 0.7461703419685364,
|
25387 |
+
"learning_rate": 0.00018238706471674073,
|
25388 |
+
"loss": 1.6099,
|
25389 |
+
"step": 3604
|
25390 |
+
},
|
25391 |
+
{
|
25392 |
+
"epoch": 0.5794655414908579,
|
25393 |
+
"grad_norm": 0.7844128608703613,
|
25394 |
+
"learning_rate": 0.0001823775074614003,
|
25395 |
+
"loss": 1.3776,
|
25396 |
+
"step": 3605
|
25397 |
+
},
|
25398 |
+
{
|
25399 |
+
"epoch": 0.5796262808921037,
|
25400 |
+
"grad_norm": 0.9594727158546448,
|
25401 |
+
"learning_rate": 0.00018236794786429753,
|
25402 |
+
"loss": 1.4306,
|
25403 |
+
"step": 3606
|
25404 |
+
},
|
25405 |
+
{
|
25406 |
+
"epoch": 0.5797870202933494,
|
25407 |
+
"grad_norm": 0.8263266086578369,
|
25408 |
+
"learning_rate": 0.00018235838592570416,
|
25409 |
+
"loss": 1.275,
|
25410 |
+
"step": 3607
|
25411 |
+
},
|
25412 |
+
{
|
25413 |
+
"epoch": 0.5799477596945951,
|
25414 |
+
"grad_norm": 0.9031165838241577,
|
25415 |
+
"learning_rate": 0.00018234882164589196,
|
25416 |
+
"loss": 1.4176,
|
25417 |
+
"step": 3608
|
25418 |
+
},
|
25419 |
+
{
|
25420 |
+
"epoch": 0.5801084990958408,
|
25421 |
+
"grad_norm": 0.979155957698822,
|
25422 |
+
"learning_rate": 0.00018233925502513288,
|
25423 |
+
"loss": 1.6688,
|
25424 |
+
"step": 3609
|
25425 |
+
},
|
25426 |
+
{
|
25427 |
+
"epoch": 0.5802692384970866,
|
25428 |
+
"grad_norm": 0.8131352066993713,
|
25429 |
+
"learning_rate": 0.0001823296860636988,
|
25430 |
+
"loss": 1.4453,
|
25431 |
+
"step": 3610
|
25432 |
+
},
|
25433 |
+
{
|
25434 |
+
"epoch": 0.5804299778983323,
|
25435 |
+
"grad_norm": 0.7670798301696777,
|
25436 |
+
"learning_rate": 0.0001823201147618618,
|
25437 |
+
"loss": 1.3424,
|
25438 |
+
"step": 3611
|
25439 |
+
},
|
25440 |
+
{
|
25441 |
+
"epoch": 0.580590717299578,
|
25442 |
+
"grad_norm": 0.8556586503982544,
|
25443 |
+
"learning_rate": 0.0001823105411198939,
|
25444 |
+
"loss": 1.5312,
|
25445 |
+
"step": 3612
|
25446 |
+
},
|
25447 |
+
{
|
25448 |
+
"epoch": 0.5807514567008238,
|
25449 |
+
"grad_norm": 0.7436866760253906,
|
25450 |
+
"learning_rate": 0.00018230096513806733,
|
25451 |
+
"loss": 1.4551,
|
25452 |
+
"step": 3613
|
25453 |
+
},
|
25454 |
+
{
|
25455 |
+
"epoch": 0.5809121961020696,
|
25456 |
+
"grad_norm": 0.864037036895752,
|
25457 |
+
"learning_rate": 0.00018229138681665426,
|
25458 |
+
"loss": 1.7248,
|
25459 |
+
"step": 3614
|
25460 |
+
},
|
25461 |
+
{
|
25462 |
+
"epoch": 0.5810729355033153,
|
25463 |
+
"grad_norm": 0.7645609378814697,
|
25464 |
+
"learning_rate": 0.000182281806155927,
|
25465 |
+
"loss": 1.3653,
|
25466 |
+
"step": 3615
|
25467 |
+
},
|
25468 |
+
{
|
25469 |
+
"epoch": 0.581233674904561,
|
25470 |
+
"grad_norm": 0.8260241746902466,
|
25471 |
+
"learning_rate": 0.00018227222315615786,
|
25472 |
+
"loss": 1.5514,
|
25473 |
+
"step": 3616
|
25474 |
+
},
|
25475 |
+
{
|
25476 |
+
"epoch": 0.5813944143058067,
|
25477 |
+
"grad_norm": 0.7814425230026245,
|
25478 |
+
"learning_rate": 0.00018226263781761931,
|
25479 |
+
"loss": 1.5331,
|
25480 |
+
"step": 3617
|
25481 |
+
},
|
25482 |
+
{
|
25483 |
+
"epoch": 0.5815551537070525,
|
25484 |
+
"grad_norm": 0.7925345301628113,
|
25485 |
+
"learning_rate": 0.0001822530501405838,
|
25486 |
+
"loss": 1.4229,
|
25487 |
+
"step": 3618
|
25488 |
+
},
|
25489 |
+
{
|
25490 |
+
"epoch": 0.5817158931082982,
|
25491 |
+
"grad_norm": 0.8228952884674072,
|
25492 |
+
"learning_rate": 0.0001822434601253239,
|
25493 |
+
"loss": 1.5176,
|
25494 |
+
"step": 3619
|
25495 |
+
},
|
25496 |
+
{
|
25497 |
+
"epoch": 0.5818766325095439,
|
25498 |
+
"grad_norm": 0.6458499431610107,
|
25499 |
+
"learning_rate": 0.0001822338677721122,
|
25500 |
+
"loss": 1.1203,
|
25501 |
+
"step": 3620
|
25502 |
+
},
|
25503 |
+
{
|
25504 |
+
"epoch": 0.5820373719107896,
|
25505 |
+
"grad_norm": 0.6864424347877502,
|
25506 |
+
"learning_rate": 0.0001822242730812214,
|
25507 |
+
"loss": 1.228,
|
25508 |
+
"step": 3621
|
25509 |
+
},
|
25510 |
+
{
|
25511 |
+
"epoch": 0.5821981113120354,
|
25512 |
+
"grad_norm": 0.808828592300415,
|
25513 |
+
"learning_rate": 0.0001822146760529243,
|
25514 |
+
"loss": 1.5513,
|
25515 |
+
"step": 3622
|
25516 |
+
},
|
25517 |
+
{
|
25518 |
+
"epoch": 0.5823588507132811,
|
25519 |
+
"grad_norm": 0.858120322227478,
|
25520 |
+
"learning_rate": 0.0001822050766874936,
|
25521 |
+
"loss": 1.415,
|
25522 |
+
"step": 3623
|
25523 |
+
},
|
25524 |
+
{
|
25525 |
+
"epoch": 0.5825195901145268,
|
25526 |
+
"grad_norm": 0.7679414749145508,
|
25527 |
+
"learning_rate": 0.0001821954749852023,
|
25528 |
+
"loss": 1.4252,
|
25529 |
+
"step": 3624
|
25530 |
+
},
|
25531 |
+
{
|
25532 |
+
"epoch": 0.5826803295157725,
|
25533 |
+
"grad_norm": 0.8275029063224792,
|
25534 |
+
"learning_rate": 0.0001821858709463233,
|
25535 |
+
"loss": 1.4278,
|
25536 |
+
"step": 3625
|
25537 |
+
},
|
25538 |
+
{
|
25539 |
+
"epoch": 0.5828410689170183,
|
25540 |
+
"grad_norm": 0.7636759877204895,
|
25541 |
+
"learning_rate": 0.00018217626457112958,
|
25542 |
+
"loss": 1.6626,
|
25543 |
+
"step": 3626
|
25544 |
+
},
|
25545 |
+
{
|
25546 |
+
"epoch": 0.583001808318264,
|
25547 |
+
"grad_norm": 0.7782408595085144,
|
25548 |
+
"learning_rate": 0.00018216665585989432,
|
25549 |
+
"loss": 1.3827,
|
25550 |
+
"step": 3627
|
25551 |
+
},
|
25552 |
+
{
|
25553 |
+
"epoch": 0.5831625477195097,
|
25554 |
+
"grad_norm": 0.7547385692596436,
|
25555 |
+
"learning_rate": 0.00018215704481289055,
|
25556 |
+
"loss": 1.263,
|
25557 |
+
"step": 3628
|
25558 |
+
},
|
25559 |
+
{
|
25560 |
+
"epoch": 0.5833232871207554,
|
25561 |
+
"grad_norm": 0.9035071134567261,
|
25562 |
+
"learning_rate": 0.00018214743143039156,
|
25563 |
+
"loss": 1.6616,
|
25564 |
+
"step": 3629
|
25565 |
+
},
|
25566 |
+
{
|
25567 |
+
"epoch": 0.5834840265220012,
|
25568 |
+
"grad_norm": 0.8114115595817566,
|
25569 |
+
"learning_rate": 0.00018213781571267066,
|
25570 |
+
"loss": 1.2582,
|
25571 |
+
"step": 3630
|
25572 |
+
},
|
25573 |
+
{
|
25574 |
+
"epoch": 0.583644765923247,
|
25575 |
+
"grad_norm": 0.859399139881134,
|
25576 |
+
"learning_rate": 0.00018212819766000112,
|
25577 |
+
"loss": 1.4414,
|
25578 |
+
"step": 3631
|
25579 |
+
},
|
25580 |
+
{
|
25581 |
+
"epoch": 0.5838055053244927,
|
25582 |
+
"grad_norm": 0.8403969407081604,
|
25583 |
+
"learning_rate": 0.00018211857727265635,
|
25584 |
+
"loss": 1.336,
|
25585 |
+
"step": 3632
|
25586 |
+
},
|
25587 |
+
{
|
25588 |
+
"epoch": 0.5839662447257384,
|
25589 |
+
"grad_norm": 0.8457290530204773,
|
25590 |
+
"learning_rate": 0.00018210895455090994,
|
25591 |
+
"loss": 1.3768,
|
25592 |
+
"step": 3633
|
25593 |
+
},
|
25594 |
+
{
|
25595 |
+
"epoch": 0.5841269841269842,
|
25596 |
+
"grad_norm": 0.8492411971092224,
|
25597 |
+
"learning_rate": 0.0001820993294950353,
|
25598 |
+
"loss": 1.5672,
|
25599 |
+
"step": 3634
|
25600 |
+
},
|
25601 |
+
{
|
25602 |
+
"epoch": 0.5842877235282299,
|
25603 |
+
"grad_norm": 0.9314752221107483,
|
25604 |
+
"learning_rate": 0.00018208970210530616,
|
25605 |
+
"loss": 1.6484,
|
25606 |
+
"step": 3635
|
25607 |
+
},
|
25608 |
+
{
|
25609 |
+
"epoch": 0.5844484629294756,
|
25610 |
+
"grad_norm": 0.8824315071105957,
|
25611 |
+
"learning_rate": 0.0001820800723819961,
|
25612 |
+
"loss": 1.3677,
|
25613 |
+
"step": 3636
|
25614 |
+
},
|
25615 |
+
{
|
25616 |
+
"epoch": 0.5846092023307213,
|
25617 |
+
"grad_norm": 0.8490722179412842,
|
25618 |
+
"learning_rate": 0.00018207044032537897,
|
25619 |
+
"loss": 1.5372,
|
25620 |
+
"step": 3637
|
25621 |
+
},
|
25622 |
+
{
|
25623 |
+
"epoch": 0.5847699417319671,
|
25624 |
+
"grad_norm": 0.7830153703689575,
|
25625 |
+
"learning_rate": 0.0001820608059357285,
|
25626 |
+
"loss": 1.5823,
|
25627 |
+
"step": 3638
|
25628 |
+
},
|
25629 |
+
{
|
25630 |
+
"epoch": 0.5849306811332128,
|
25631 |
+
"grad_norm": 0.7777186036109924,
|
25632 |
+
"learning_rate": 0.00018205116921331856,
|
25633 |
+
"loss": 1.2204,
|
25634 |
+
"step": 3639
|
25635 |
+
},
|
25636 |
+
{
|
25637 |
+
"epoch": 0.5850914205344585,
|
25638 |
+
"grad_norm": 0.7347624897956848,
|
25639 |
+
"learning_rate": 0.0001820415301584232,
|
25640 |
+
"loss": 1.4693,
|
25641 |
+
"step": 3640
|
25642 |
+
},
|
25643 |
+
{
|
25644 |
+
"epoch": 0.5852521599357042,
|
25645 |
+
"grad_norm": 0.7492866516113281,
|
25646 |
+
"learning_rate": 0.00018203188877131634,
|
25647 |
+
"loss": 1.4546,
|
25648 |
+
"step": 3641
|
25649 |
+
},
|
25650 |
+
{
|
25651 |
+
"epoch": 0.58541289933695,
|
25652 |
+
"grad_norm": 0.8413015007972717,
|
25653 |
+
"learning_rate": 0.00018202224505227207,
|
25654 |
+
"loss": 1.4991,
|
25655 |
+
"step": 3642
|
25656 |
+
},
|
25657 |
+
{
|
25658 |
+
"epoch": 0.5855736387381957,
|
25659 |
+
"grad_norm": 0.7110021114349365,
|
25660 |
+
"learning_rate": 0.00018201259900156453,
|
25661 |
+
"loss": 1.4083,
|
25662 |
+
"step": 3643
|
25663 |
+
},
|
25664 |
+
{
|
25665 |
+
"epoch": 0.5857343781394414,
|
25666 |
+
"grad_norm": 0.7323606610298157,
|
25667 |
+
"learning_rate": 0.00018200295061946795,
|
25668 |
+
"loss": 1.2658,
|
25669 |
+
"step": 3644
|
25670 |
+
},
|
25671 |
+
{
|
25672 |
+
"epoch": 0.5858951175406871,
|
25673 |
+
"grad_norm": 0.7900603413581848,
|
25674 |
+
"learning_rate": 0.00018199329990625663,
|
25675 |
+
"loss": 1.4276,
|
25676 |
+
"step": 3645
|
25677 |
+
},
|
25678 |
+
{
|
25679 |
+
"epoch": 0.5860558569419329,
|
25680 |
+
"grad_norm": 0.8965500593185425,
|
25681 |
+
"learning_rate": 0.00018198364686220487,
|
25682 |
+
"loss": 1.7896,
|
25683 |
+
"step": 3646
|
25684 |
+
},
|
25685 |
+
{
|
25686 |
+
"epoch": 0.5862165963431786,
|
25687 |
+
"grad_norm": 0.9012687802314758,
|
25688 |
+
"learning_rate": 0.0001819739914875871,
|
25689 |
+
"loss": 1.5813,
|
25690 |
+
"step": 3647
|
25691 |
+
},
|
25692 |
+
{
|
25693 |
+
"epoch": 0.5863773357444243,
|
25694 |
+
"grad_norm": 0.9058600068092346,
|
25695 |
+
"learning_rate": 0.00018196433378267777,
|
25696 |
+
"loss": 1.5633,
|
25697 |
+
"step": 3648
|
25698 |
+
},
|
25699 |
+
{
|
25700 |
+
"epoch": 0.5865380751456701,
|
25701 |
+
"grad_norm": 0.8417165875434875,
|
25702 |
+
"learning_rate": 0.00018195467374775145,
|
25703 |
+
"loss": 1.393,
|
25704 |
+
"step": 3649
|
25705 |
+
},
|
25706 |
+
{
|
25707 |
+
"epoch": 0.5866988145469159,
|
25708 |
+
"grad_norm": 0.8096698522567749,
|
25709 |
+
"learning_rate": 0.00018194501138308274,
|
25710 |
+
"loss": 1.3462,
|
25711 |
+
"step": 3650
|
25712 |
+
},
|
25713 |
+
{
|
25714 |
+
"epoch": 0.5868595539481616,
|
25715 |
+
"grad_norm": 0.8650566935539246,
|
25716 |
+
"learning_rate": 0.0001819353466889463,
|
25717 |
+
"loss": 1.4604,
|
25718 |
+
"step": 3651
|
25719 |
+
},
|
25720 |
+
{
|
25721 |
+
"epoch": 0.5870202933494073,
|
25722 |
+
"grad_norm": 0.7909032106399536,
|
25723 |
+
"learning_rate": 0.00018192567966561688,
|
25724 |
+
"loss": 1.4289,
|
25725 |
+
"step": 3652
|
25726 |
+
},
|
25727 |
+
{
|
25728 |
+
"epoch": 0.587181032750653,
|
25729 |
+
"grad_norm": 0.789933979511261,
|
25730 |
+
"learning_rate": 0.0001819160103133693,
|
25731 |
+
"loss": 1.5194,
|
25732 |
+
"step": 3653
|
25733 |
+
},
|
25734 |
+
{
|
25735 |
+
"epoch": 0.5873417721518988,
|
25736 |
+
"grad_norm": 0.8069058060646057,
|
25737 |
+
"learning_rate": 0.00018190633863247847,
|
25738 |
+
"loss": 1.4561,
|
25739 |
+
"step": 3654
|
25740 |
+
},
|
25741 |
+
{
|
25742 |
+
"epoch": 0.5875025115531445,
|
25743 |
+
"grad_norm": 0.882792055606842,
|
25744 |
+
"learning_rate": 0.0001818966646232192,
|
25745 |
+
"loss": 1.7359,
|
25746 |
+
"step": 3655
|
25747 |
+
},
|
25748 |
+
{
|
25749 |
+
"epoch": 0.5876632509543902,
|
25750 |
+
"grad_norm": 0.8428239822387695,
|
25751 |
+
"learning_rate": 0.0001818869882858666,
|
25752 |
+
"loss": 1.3956,
|
25753 |
+
"step": 3656
|
25754 |
+
},
|
25755 |
+
{
|
25756 |
+
"epoch": 0.5878239903556359,
|
25757 |
+
"grad_norm": 0.8675400018692017,
|
25758 |
+
"learning_rate": 0.00018187730962069575,
|
25759 |
+
"loss": 1.5099,
|
25760 |
+
"step": 3657
|
25761 |
+
},
|
25762 |
+
{
|
25763 |
+
"epoch": 0.5879847297568817,
|
25764 |
+
"grad_norm": 0.7427046895027161,
|
25765 |
+
"learning_rate": 0.0001818676286279817,
|
25766 |
+
"loss": 1.3147,
|
25767 |
+
"step": 3658
|
25768 |
+
},
|
25769 |
+
{
|
25770 |
+
"epoch": 0.5881454691581274,
|
25771 |
+
"grad_norm": 0.8740839958190918,
|
25772 |
+
"learning_rate": 0.00018185794530799974,
|
25773 |
+
"loss": 1.57,
|
25774 |
+
"step": 3659
|
25775 |
+
},
|
25776 |
+
{
|
25777 |
+
"epoch": 0.5883062085593731,
|
25778 |
+
"grad_norm": 0.7777817845344543,
|
25779 |
+
"learning_rate": 0.0001818482596610251,
|
25780 |
+
"loss": 1.4949,
|
25781 |
+
"step": 3660
|
25782 |
+
},
|
25783 |
+
{
|
25784 |
+
"epoch": 0.5884669479606188,
|
25785 |
+
"grad_norm": 0.7431259751319885,
|
25786 |
+
"learning_rate": 0.0001818385716873331,
|
25787 |
+
"loss": 1.3401,
|
25788 |
+
"step": 3661
|
25789 |
+
},
|
25790 |
+
{
|
25791 |
+
"epoch": 0.5886276873618645,
|
25792 |
+
"grad_norm": 0.7448675036430359,
|
25793 |
+
"learning_rate": 0.00018182888138719924,
|
25794 |
+
"loss": 1.4713,
|
25795 |
+
"step": 3662
|
25796 |
+
},
|
25797 |
+
{
|
25798 |
+
"epoch": 0.5887884267631103,
|
25799 |
+
"grad_norm": 0.8758003115653992,
|
25800 |
+
"learning_rate": 0.00018181918876089884,
|
25801 |
+
"loss": 1.5525,
|
25802 |
+
"step": 3663
|
25803 |
+
},
|
25804 |
+
{
|
25805 |
+
"epoch": 0.588949166164356,
|
25806 |
+
"grad_norm": 0.8097133040428162,
|
25807 |
+
"learning_rate": 0.00018180949380870755,
|
25808 |
+
"loss": 1.4829,
|
25809 |
+
"step": 3664
|
25810 |
+
},
|
25811 |
+
{
|
25812 |
+
"epoch": 0.5891099055656017,
|
25813 |
+
"grad_norm": 0.7894575595855713,
|
25814 |
+
"learning_rate": 0.0001817997965309009,
|
25815 |
+
"loss": 1.4029,
|
25816 |
+
"step": 3665
|
25817 |
+
},
|
25818 |
+
{
|
25819 |
+
"epoch": 0.5892706449668474,
|
25820 |
+
"grad_norm": 0.7532247304916382,
|
25821 |
+
"learning_rate": 0.0001817900969277546,
|
25822 |
+
"loss": 1.4283,
|
25823 |
+
"step": 3666
|
25824 |
+
},
|
25825 |
+
{
|
25826 |
+
"epoch": 0.5894313843680933,
|
25827 |
+
"grad_norm": 0.7733650207519531,
|
25828 |
+
"learning_rate": 0.00018178039499954437,
|
25829 |
+
"loss": 1.4176,
|
25830 |
+
"step": 3667
|
25831 |
+
},
|
25832 |
+
{
|
25833 |
+
"epoch": 0.589592123769339,
|
25834 |
+
"grad_norm": 0.8910491466522217,
|
25835 |
+
"learning_rate": 0.000181770690746546,
|
25836 |
+
"loss": 1.4035,
|
25837 |
+
"step": 3668
|
25838 |
+
},
|
25839 |
+
{
|
25840 |
+
"epoch": 0.5897528631705847,
|
25841 |
+
"grad_norm": 0.7591543197631836,
|
25842 |
+
"learning_rate": 0.00018176098416903535,
|
25843 |
+
"loss": 1.3643,
|
25844 |
+
"step": 3669
|
25845 |
+
},
|
25846 |
+
{
|
25847 |
+
"epoch": 0.5899136025718305,
|
25848 |
+
"grad_norm": 0.7821176052093506,
|
25849 |
+
"learning_rate": 0.00018175127526728834,
|
25850 |
+
"loss": 1.4535,
|
25851 |
+
"step": 3670
|
25852 |
+
},
|
25853 |
+
{
|
25854 |
+
"epoch": 0.5900743419730762,
|
25855 |
+
"grad_norm": 0.8687539100646973,
|
25856 |
+
"learning_rate": 0.00018174156404158104,
|
25857 |
+
"loss": 1.4257,
|
25858 |
+
"step": 3671
|
25859 |
+
},
|
25860 |
+
{
|
25861 |
+
"epoch": 0.5902350813743219,
|
25862 |
+
"grad_norm": 0.7204382419586182,
|
25863 |
+
"learning_rate": 0.00018173185049218945,
|
25864 |
+
"loss": 1.2511,
|
25865 |
+
"step": 3672
|
25866 |
+
},
|
25867 |
+
{
|
25868 |
+
"epoch": 0.5903958207755676,
|
25869 |
+
"grad_norm": 0.8722280263900757,
|
25870 |
+
"learning_rate": 0.00018172213461938968,
|
25871 |
+
"loss": 1.297,
|
25872 |
+
"step": 3673
|
25873 |
+
},
|
25874 |
+
{
|
25875 |
+
"epoch": 0.5905565601768133,
|
25876 |
+
"grad_norm": 0.8667067289352417,
|
25877 |
+
"learning_rate": 0.000181712416423458,
|
25878 |
+
"loss": 1.5202,
|
25879 |
+
"step": 3674
|
25880 |
+
},
|
25881 |
+
{
|
25882 |
+
"epoch": 0.5907172995780591,
|
25883 |
+
"grad_norm": 0.8912860155105591,
|
25884 |
+
"learning_rate": 0.0001817026959046706,
|
25885 |
+
"loss": 1.4046,
|
25886 |
+
"step": 3675
|
25887 |
+
},
|
25888 |
+
{
|
25889 |
+
"epoch": 0.5908780389793048,
|
25890 |
+
"grad_norm": 0.8770840167999268,
|
25891 |
+
"learning_rate": 0.00018169297306330382,
|
25892 |
+
"loss": 1.33,
|
25893 |
+
"step": 3676
|
25894 |
+
},
|
25895 |
+
{
|
25896 |
+
"epoch": 0.5910387783805505,
|
25897 |
+
"grad_norm": 0.8713551163673401,
|
25898 |
+
"learning_rate": 0.0001816832478996341,
|
25899 |
+
"loss": 1.3679,
|
25900 |
+
"step": 3677
|
25901 |
+
},
|
25902 |
+
{
|
25903 |
+
"epoch": 0.5911995177817962,
|
25904 |
+
"grad_norm": 0.8583324551582336,
|
25905 |
+
"learning_rate": 0.00018167352041393783,
|
25906 |
+
"loss": 1.6945,
|
25907 |
+
"step": 3678
|
25908 |
+
},
|
25909 |
+
{
|
25910 |
+
"epoch": 0.591360257183042,
|
25911 |
+
"grad_norm": 0.8254200220108032,
|
25912 |
+
"learning_rate": 0.0001816637906064916,
|
25913 |
+
"loss": 1.5261,
|
25914 |
+
"step": 3679
|
25915 |
+
},
|
25916 |
+
{
|
25917 |
+
"epoch": 0.5915209965842877,
|
25918 |
+
"grad_norm": 0.8377071022987366,
|
25919 |
+
"learning_rate": 0.00018165405847757197,
|
25920 |
+
"loss": 1.3657,
|
25921 |
+
"step": 3680
|
25922 |
+
},
|
25923 |
+
{
|
25924 |
+
"epoch": 0.5916817359855334,
|
25925 |
+
"grad_norm": 0.7493349313735962,
|
25926 |
+
"learning_rate": 0.0001816443240274556,
|
25927 |
+
"loss": 1.1766,
|
25928 |
+
"step": 3681
|
25929 |
+
},
|
25930 |
+
{
|
25931 |
+
"epoch": 0.5918424753867791,
|
25932 |
+
"grad_norm": 0.7360992431640625,
|
25933 |
+
"learning_rate": 0.0001816345872564192,
|
25934 |
+
"loss": 1.2527,
|
25935 |
+
"step": 3682
|
25936 |
+
},
|
25937 |
+
{
|
25938 |
+
"epoch": 0.5920032147880249,
|
25939 |
+
"grad_norm": 0.8222139477729797,
|
25940 |
+
"learning_rate": 0.00018162484816473958,
|
25941 |
+
"loss": 1.3253,
|
25942 |
+
"step": 3683
|
25943 |
+
},
|
25944 |
+
{
|
25945 |
+
"epoch": 0.5921639541892706,
|
25946 |
+
"grad_norm": 0.8094049096107483,
|
25947 |
+
"learning_rate": 0.0001816151067526936,
|
25948 |
+
"loss": 1.4964,
|
25949 |
+
"step": 3684
|
25950 |
+
},
|
25951 |
+
{
|
25952 |
+
"epoch": 0.5923246935905164,
|
25953 |
+
"grad_norm": 0.6552943587303162,
|
25954 |
+
"learning_rate": 0.00018160536302055816,
|
25955 |
+
"loss": 1.3322,
|
25956 |
+
"step": 3685
|
25957 |
+
},
|
25958 |
+
{
|
25959 |
+
"epoch": 0.5924854329917622,
|
25960 |
+
"grad_norm": 0.9407137632369995,
|
25961 |
+
"learning_rate": 0.00018159561696861026,
|
25962 |
+
"loss": 1.5793,
|
25963 |
+
"step": 3686
|
25964 |
+
},
|
25965 |
+
{
|
25966 |
+
"epoch": 0.5926461723930079,
|
25967 |
+
"grad_norm": 0.8417989015579224,
|
25968 |
+
"learning_rate": 0.00018158586859712693,
|
25969 |
+
"loss": 1.549,
|
25970 |
+
"step": 3687
|
25971 |
+
},
|
25972 |
+
{
|
25973 |
+
"epoch": 0.5928069117942536,
|
25974 |
+
"grad_norm": 0.7849283218383789,
|
25975 |
+
"learning_rate": 0.00018157611790638534,
|
25976 |
+
"loss": 1.5159,
|
25977 |
+
"step": 3688
|
25978 |
+
},
|
25979 |
+
{
|
25980 |
+
"epoch": 0.5929676511954993,
|
25981 |
+
"grad_norm": 0.821814239025116,
|
25982 |
+
"learning_rate": 0.0001815663648966626,
|
25983 |
+
"loss": 1.6834,
|
25984 |
+
"step": 3689
|
25985 |
+
},
|
25986 |
+
{
|
25987 |
+
"epoch": 0.593128390596745,
|
25988 |
+
"grad_norm": 0.8287963271141052,
|
25989 |
+
"learning_rate": 0.00018155660956823604,
|
25990 |
+
"loss": 1.5942,
|
25991 |
+
"step": 3690
|
25992 |
+
},
|
25993 |
+
{
|
25994 |
+
"epoch": 0.5932891299979908,
|
25995 |
+
"grad_norm": 0.8353708982467651,
|
25996 |
+
"learning_rate": 0.0001815468519213829,
|
25997 |
+
"loss": 1.5027,
|
25998 |
+
"step": 3691
|
25999 |
+
},
|
26000 |
+
{
|
26001 |
+
"epoch": 0.5934498693992365,
|
26002 |
+
"grad_norm": 0.7299961447715759,
|
26003 |
+
"learning_rate": 0.00018153709195638064,
|
26004 |
+
"loss": 1.5006,
|
26005 |
+
"step": 3692
|
26006 |
+
},
|
26007 |
+
{
|
26008 |
+
"epoch": 0.5936106088004822,
|
26009 |
+
"grad_norm": 0.7976347804069519,
|
26010 |
+
"learning_rate": 0.00018152732967350669,
|
26011 |
+
"loss": 1.4962,
|
26012 |
+
"step": 3693
|
26013 |
+
},
|
26014 |
+
{
|
26015 |
+
"epoch": 0.5937713482017279,
|
26016 |
+
"grad_norm": 0.7902625799179077,
|
26017 |
+
"learning_rate": 0.0001815175650730385,
|
26018 |
+
"loss": 1.2712,
|
26019 |
+
"step": 3694
|
26020 |
+
},
|
26021 |
+
{
|
26022 |
+
"epoch": 0.5939320876029737,
|
26023 |
+
"grad_norm": 0.7606790065765381,
|
26024 |
+
"learning_rate": 0.00018150779815525372,
|
26025 |
+
"loss": 1.3739,
|
26026 |
+
"step": 3695
|
26027 |
+
},
|
26028 |
+
{
|
26029 |
+
"epoch": 0.5940928270042194,
|
26030 |
+
"grad_norm": 0.70188307762146,
|
26031 |
+
"learning_rate": 0.00018149802892042995,
|
26032 |
+
"loss": 1.3356,
|
26033 |
+
"step": 3696
|
26034 |
+
},
|
26035 |
+
{
|
26036 |
+
"epoch": 0.5942535664054651,
|
26037 |
+
"grad_norm": 0.7931273579597473,
|
26038 |
+
"learning_rate": 0.00018148825736884495,
|
26039 |
+
"loss": 1.5998,
|
26040 |
+
"step": 3697
|
26041 |
+
},
|
26042 |
+
{
|
26043 |
+
"epoch": 0.5944143058067108,
|
26044 |
+
"grad_norm": 1.0433954000473022,
|
26045 |
+
"learning_rate": 0.00018147848350077647,
|
26046 |
+
"loss": 1.5181,
|
26047 |
+
"step": 3698
|
26048 |
+
},
|
26049 |
+
{
|
26050 |
+
"epoch": 0.5945750452079566,
|
26051 |
+
"grad_norm": 0.8985570669174194,
|
26052 |
+
"learning_rate": 0.00018146870731650233,
|
26053 |
+
"loss": 1.5607,
|
26054 |
+
"step": 3699
|
26055 |
+
},
|
26056 |
+
{
|
26057 |
+
"epoch": 0.5947357846092023,
|
26058 |
+
"grad_norm": 0.7515749335289001,
|
26059 |
+
"learning_rate": 0.0001814589288163005,
|
26060 |
+
"loss": 1.2315,
|
26061 |
+
"step": 3700
|
26062 |
+
},
|
26063 |
+
{
|
26064 |
+
"epoch": 0.594896524010448,
|
26065 |
+
"grad_norm": 0.7470755577087402,
|
26066 |
+
"learning_rate": 0.0001814491480004489,
|
26067 |
+
"loss": 1.4232,
|
26068 |
+
"step": 3701
|
26069 |
+
},
|
26070 |
+
{
|
26071 |
+
"epoch": 0.5950572634116937,
|
26072 |
+
"grad_norm": 0.7948623299598694,
|
26073 |
+
"learning_rate": 0.0001814393648692256,
|
26074 |
+
"loss": 1.4657,
|
26075 |
+
"step": 3702
|
26076 |
+
},
|
26077 |
+
{
|
26078 |
+
"epoch": 0.5952180028129396,
|
26079 |
+
"grad_norm": 0.7484851479530334,
|
26080 |
+
"learning_rate": 0.0001814295794229087,
|
26081 |
+
"loss": 1.2337,
|
26082 |
+
"step": 3703
|
26083 |
+
},
|
26084 |
+
{
|
26085 |
+
"epoch": 0.5953787422141853,
|
26086 |
+
"grad_norm": 0.7571722865104675,
|
26087 |
+
"learning_rate": 0.00018141979166177635,
|
26088 |
+
"loss": 1.1797,
|
26089 |
+
"step": 3704
|
26090 |
+
},
|
26091 |
+
{
|
26092 |
+
"epoch": 0.595539481615431,
|
26093 |
+
"grad_norm": 0.8501807451248169,
|
26094 |
+
"learning_rate": 0.00018141000158610688,
|
26095 |
+
"loss": 1.6867,
|
26096 |
+
"step": 3705
|
26097 |
+
},
|
26098 |
+
{
|
26099 |
+
"epoch": 0.5957002210166767,
|
26100 |
+
"grad_norm": 0.774314284324646,
|
26101 |
+
"learning_rate": 0.00018140020919617846,
|
26102 |
+
"loss": 1.5818,
|
26103 |
+
"step": 3706
|
26104 |
+
},
|
26105 |
+
{
|
26106 |
+
"epoch": 0.5958609604179225,
|
26107 |
+
"grad_norm": 0.9612992405891418,
|
26108 |
+
"learning_rate": 0.00018139041449226955,
|
26109 |
+
"loss": 1.8204,
|
26110 |
+
"step": 3707
|
26111 |
+
},
|
26112 |
+
{
|
26113 |
+
"epoch": 0.5960216998191682,
|
26114 |
+
"grad_norm": 0.8349434733390808,
|
26115 |
+
"learning_rate": 0.00018138061747465856,
|
26116 |
+
"loss": 1.4394,
|
26117 |
+
"step": 3708
|
26118 |
+
},
|
26119 |
+
{
|
26120 |
+
"epoch": 0.5961824392204139,
|
26121 |
+
"grad_norm": 0.8350684642791748,
|
26122 |
+
"learning_rate": 0.000181370818143624,
|
26123 |
+
"loss": 1.317,
|
26124 |
+
"step": 3709
|
26125 |
+
},
|
26126 |
+
{
|
26127 |
+
"epoch": 0.5963431786216596,
|
26128 |
+
"grad_norm": 0.7730976343154907,
|
26129 |
+
"learning_rate": 0.00018136101649944442,
|
26130 |
+
"loss": 1.3897,
|
26131 |
+
"step": 3710
|
26132 |
+
},
|
26133 |
+
{
|
26134 |
+
"epoch": 0.5965039180229054,
|
26135 |
+
"grad_norm": 0.7344859838485718,
|
26136 |
+
"learning_rate": 0.00018135121254239843,
|
26137 |
+
"loss": 1.2824,
|
26138 |
+
"step": 3711
|
26139 |
+
},
|
26140 |
+
{
|
26141 |
+
"epoch": 0.5966646574241511,
|
26142 |
+
"grad_norm": 0.7821740508079529,
|
26143 |
+
"learning_rate": 0.00018134140627276484,
|
26144 |
+
"loss": 1.2432,
|
26145 |
+
"step": 3712
|
26146 |
+
},
|
26147 |
+
{
|
26148 |
+
"epoch": 0.5968253968253968,
|
26149 |
+
"grad_norm": 0.7955597639083862,
|
26150 |
+
"learning_rate": 0.00018133159769082225,
|
26151 |
+
"loss": 1.3529,
|
26152 |
+
"step": 3713
|
26153 |
+
},
|
26154 |
+
{
|
26155 |
+
"epoch": 0.5969861362266425,
|
26156 |
+
"grad_norm": 0.8677069544792175,
|
26157 |
+
"learning_rate": 0.00018132178679684964,
|
26158 |
+
"loss": 1.3015,
|
26159 |
+
"step": 3714
|
26160 |
+
},
|
26161 |
+
{
|
26162 |
+
"epoch": 0.5971468756278883,
|
26163 |
+
"grad_norm": 0.8404643535614014,
|
26164 |
+
"learning_rate": 0.00018131197359112585,
|
26165 |
+
"loss": 1.6486,
|
26166 |
+
"step": 3715
|
26167 |
+
},
|
26168 |
+
{
|
26169 |
+
"epoch": 0.597307615029134,
|
26170 |
+
"grad_norm": 0.8434663414955139,
|
26171 |
+
"learning_rate": 0.00018130215807392983,
|
26172 |
+
"loss": 1.5239,
|
26173 |
+
"step": 3716
|
26174 |
+
},
|
26175 |
+
{
|
26176 |
+
"epoch": 0.5974683544303797,
|
26177 |
+
"grad_norm": 0.9369305372238159,
|
26178 |
+
"learning_rate": 0.00018129234024554062,
|
26179 |
+
"loss": 1.6832,
|
26180 |
+
"step": 3717
|
26181 |
+
},
|
26182 |
+
{
|
26183 |
+
"epoch": 0.5976290938316254,
|
26184 |
+
"grad_norm": 0.7326930165290833,
|
26185 |
+
"learning_rate": 0.00018128252010623727,
|
26186 |
+
"loss": 1.4893,
|
26187 |
+
"step": 3718
|
26188 |
+
},
|
26189 |
+
{
|
26190 |
+
"epoch": 0.5977898332328712,
|
26191 |
+
"grad_norm": 0.9300792217254639,
|
26192 |
+
"learning_rate": 0.000181272697656299,
|
26193 |
+
"loss": 1.4438,
|
26194 |
+
"step": 3719
|
26195 |
+
},
|
26196 |
+
{
|
26197 |
+
"epoch": 0.5979505726341169,
|
26198 |
+
"grad_norm": 0.9124138355255127,
|
26199 |
+
"learning_rate": 0.00018126287289600503,
|
26200 |
+
"loss": 1.6215,
|
26201 |
+
"step": 3720
|
26202 |
+
},
|
26203 |
+
{
|
26204 |
+
"epoch": 0.5981113120353627,
|
26205 |
+
"grad_norm": 0.8568860292434692,
|
26206 |
+
"learning_rate": 0.00018125304582563464,
|
26207 |
+
"loss": 1.358,
|
26208 |
+
"step": 3721
|
26209 |
+
},
|
26210 |
+
{
|
26211 |
+
"epoch": 0.5982720514366084,
|
26212 |
+
"grad_norm": 0.8504576086997986,
|
26213 |
+
"learning_rate": 0.00018124321644546717,
|
26214 |
+
"loss": 1.6332,
|
26215 |
+
"step": 3722
|
26216 |
+
},
|
26217 |
+
{
|
26218 |
+
"epoch": 0.5984327908378542,
|
26219 |
+
"grad_norm": 0.8040387630462646,
|
26220 |
+
"learning_rate": 0.00018123338475578202,
|
26221 |
+
"loss": 1.3837,
|
26222 |
+
"step": 3723
|
26223 |
+
},
|
26224 |
+
{
|
26225 |
+
"epoch": 0.5985935302390999,
|
26226 |
+
"grad_norm": 0.7997492551803589,
|
26227 |
+
"learning_rate": 0.00018122355075685876,
|
26228 |
+
"loss": 1.3742,
|
26229 |
+
"step": 3724
|
26230 |
+
},
|
26231 |
+
{
|
26232 |
+
"epoch": 0.5987542696403456,
|
26233 |
+
"grad_norm": 0.7933949828147888,
|
26234 |
+
"learning_rate": 0.00018121371444897687,
|
26235 |
+
"loss": 1.5881,
|
26236 |
+
"step": 3725
|
26237 |
+
},
|
26238 |
+
{
|
26239 |
+
"epoch": 0.5989150090415913,
|
26240 |
+
"grad_norm": 0.8566396832466125,
|
26241 |
+
"learning_rate": 0.00018120387583241596,
|
26242 |
+
"loss": 1.5905,
|
26243 |
+
"step": 3726
|
26244 |
+
},
|
26245 |
+
{
|
26246 |
+
"epoch": 0.5990757484428371,
|
26247 |
+
"grad_norm": 0.7068170309066772,
|
26248 |
+
"learning_rate": 0.00018119403490745578,
|
26249 |
+
"loss": 1.1968,
|
26250 |
+
"step": 3727
|
26251 |
+
},
|
26252 |
+
{
|
26253 |
+
"epoch": 0.5992364878440828,
|
26254 |
+
"grad_norm": 0.7053247094154358,
|
26255 |
+
"learning_rate": 0.000181184191674376,
|
26256 |
+
"loss": 1.3447,
|
26257 |
+
"step": 3728
|
26258 |
+
},
|
26259 |
+
{
|
26260 |
+
"epoch": 0.5993972272453285,
|
26261 |
+
"grad_norm": 0.7613917589187622,
|
26262 |
+
"learning_rate": 0.00018117434613345652,
|
26263 |
+
"loss": 1.3665,
|
26264 |
+
"step": 3729
|
26265 |
+
},
|
26266 |
+
{
|
26267 |
+
"epoch": 0.5995579666465742,
|
26268 |
+
"grad_norm": 0.8570252656936646,
|
26269 |
+
"learning_rate": 0.00018116449828497718,
|
26270 |
+
"loss": 1.5134,
|
26271 |
+
"step": 3730
|
26272 |
+
},
|
26273 |
+
{
|
26274 |
+
"epoch": 0.59971870604782,
|
26275 |
+
"grad_norm": 0.8509315848350525,
|
26276 |
+
"learning_rate": 0.00018115464812921793,
|
26277 |
+
"loss": 1.4006,
|
26278 |
+
"step": 3731
|
26279 |
+
},
|
26280 |
+
{
|
26281 |
+
"epoch": 0.5998794454490657,
|
26282 |
+
"grad_norm": 0.898302435874939,
|
26283 |
+
"learning_rate": 0.00018114479566645877,
|
26284 |
+
"loss": 1.6407,
|
26285 |
+
"step": 3732
|
26286 |
+
},
|
26287 |
+
{
|
26288 |
+
"epoch": 0.6000401848503114,
|
26289 |
+
"grad_norm": 0.8953514695167542,
|
26290 |
+
"learning_rate": 0.0001811349408969798,
|
26291 |
+
"loss": 1.5149,
|
26292 |
+
"step": 3733
|
26293 |
+
},
|
26294 |
+
{
|
26295 |
+
"epoch": 0.6002009242515571,
|
26296 |
+
"grad_norm": 0.753764808177948,
|
26297 |
+
"learning_rate": 0.00018112508382106115,
|
26298 |
+
"loss": 1.4518,
|
26299 |
+
"step": 3734
|
26300 |
+
},
|
26301 |
+
{
|
26302 |
+
"epoch": 0.6003616636528029,
|
26303 |
+
"grad_norm": 0.839428186416626,
|
26304 |
+
"learning_rate": 0.000181115224438983,
|
26305 |
+
"loss": 1.8347,
|
26306 |
+
"step": 3735
|
26307 |
+
},
|
26308 |
+
{
|
26309 |
+
"epoch": 0.6005224030540486,
|
26310 |
+
"grad_norm": 0.807718813419342,
|
26311 |
+
"learning_rate": 0.0001811053627510257,
|
26312 |
+
"loss": 1.326,
|
26313 |
+
"step": 3736
|
26314 |
+
},
|
26315 |
+
{
|
26316 |
+
"epoch": 0.6006831424552943,
|
26317 |
+
"grad_norm": 0.8619174361228943,
|
26318 |
+
"learning_rate": 0.00018109549875746953,
|
26319 |
+
"loss": 1.4947,
|
26320 |
+
"step": 3737
|
26321 |
+
},
|
26322 |
+
{
|
26323 |
+
"epoch": 0.60084388185654,
|
26324 |
+
"grad_norm": 0.8958014845848083,
|
26325 |
+
"learning_rate": 0.0001810856324585949,
|
26326 |
+
"loss": 1.5178,
|
26327 |
+
"step": 3738
|
26328 |
+
},
|
26329 |
+
{
|
26330 |
+
"epoch": 0.6010046212577859,
|
26331 |
+
"grad_norm": 0.7751129865646362,
|
26332 |
+
"learning_rate": 0.00018107576385468233,
|
26333 |
+
"loss": 1.3066,
|
26334 |
+
"step": 3739
|
26335 |
+
},
|
26336 |
+
{
|
26337 |
+
"epoch": 0.6011653606590316,
|
26338 |
+
"grad_norm": 0.773658275604248,
|
26339 |
+
"learning_rate": 0.00018106589294601227,
|
26340 |
+
"loss": 1.3381,
|
26341 |
+
"step": 3740
|
26342 |
+
},
|
26343 |
+
{
|
26344 |
+
"epoch": 0.6013261000602773,
|
26345 |
+
"grad_norm": 0.9120597839355469,
|
26346 |
+
"learning_rate": 0.0001810560197328654,
|
26347 |
+
"loss": 1.6399,
|
26348 |
+
"step": 3741
|
26349 |
+
},
|
26350 |
+
{
|
26351 |
+
"epoch": 0.601486839461523,
|
26352 |
+
"grad_norm": 0.7968156933784485,
|
26353 |
+
"learning_rate": 0.0001810461442155224,
|
26354 |
+
"loss": 1.5155,
|
26355 |
+
"step": 3742
|
26356 |
+
},
|
26357 |
+
{
|
26358 |
+
"epoch": 0.6016475788627688,
|
26359 |
+
"grad_norm": 0.9005950689315796,
|
26360 |
+
"learning_rate": 0.0001810362663942639,
|
26361 |
+
"loss": 1.5162,
|
26362 |
+
"step": 3743
|
26363 |
+
},
|
26364 |
+
{
|
26365 |
+
"epoch": 0.6018083182640145,
|
26366 |
+
"grad_norm": 0.7130471467971802,
|
26367 |
+
"learning_rate": 0.0001810263862693708,
|
26368 |
+
"loss": 1.3079,
|
26369 |
+
"step": 3744
|
26370 |
+
},
|
26371 |
+
{
|
26372 |
+
"epoch": 0.6019690576652602,
|
26373 |
+
"grad_norm": 0.7735755443572998,
|
26374 |
+
"learning_rate": 0.00018101650384112392,
|
26375 |
+
"loss": 1.3289,
|
26376 |
+
"step": 3745
|
26377 |
+
},
|
26378 |
+
{
|
26379 |
+
"epoch": 0.6021297970665059,
|
26380 |
+
"grad_norm": 0.8187049627304077,
|
26381 |
+
"learning_rate": 0.0001810066191098042,
|
26382 |
+
"loss": 1.3108,
|
26383 |
+
"step": 3746
|
26384 |
+
},
|
26385 |
+
{
|
26386 |
+
"epoch": 0.6022905364677517,
|
26387 |
+
"grad_norm": 0.8278025984764099,
|
26388 |
+
"learning_rate": 0.00018099673207569263,
|
26389 |
+
"loss": 1.4337,
|
26390 |
+
"step": 3747
|
26391 |
+
},
|
26392 |
+
{
|
26393 |
+
"epoch": 0.6024512758689974,
|
26394 |
+
"grad_norm": 0.9100396633148193,
|
26395 |
+
"learning_rate": 0.00018098684273907026,
|
26396 |
+
"loss": 1.6135,
|
26397 |
+
"step": 3748
|
26398 |
+
},
|
26399 |
+
{
|
26400 |
+
"epoch": 0.6026120152702431,
|
26401 |
+
"grad_norm": 0.7987372279167175,
|
26402 |
+
"learning_rate": 0.0001809769511002182,
|
26403 |
+
"loss": 1.2536,
|
26404 |
+
"step": 3749
|
26405 |
+
},
|
26406 |
+
{
|
26407 |
+
"epoch": 0.6027727546714888,
|
26408 |
+
"grad_norm": 1.1697989702224731,
|
26409 |
+
"learning_rate": 0.00018096705715941775,
|
26410 |
+
"loss": 1.6774,
|
26411 |
+
"step": 3750
|
26412 |
+
},
|
26413 |
+
{
|
26414 |
+
"epoch": 0.6029334940727346,
|
26415 |
+
"grad_norm": 0.8988845348358154,
|
26416 |
+
"learning_rate": 0.00018095716091695002,
|
26417 |
+
"loss": 1.4912,
|
26418 |
+
"step": 3751
|
26419 |
+
},
|
26420 |
+
{
|
26421 |
+
"epoch": 0.6030942334739803,
|
26422 |
+
"grad_norm": 0.8764055371284485,
|
26423 |
+
"learning_rate": 0.00018094726237309646,
|
26424 |
+
"loss": 1.5176,
|
26425 |
+
"step": 3752
|
26426 |
+
},
|
26427 |
+
{
|
26428 |
+
"epoch": 0.603254972875226,
|
26429 |
+
"grad_norm": 0.8616553544998169,
|
26430 |
+
"learning_rate": 0.00018093736152813836,
|
26431 |
+
"loss": 1.425,
|
26432 |
+
"step": 3753
|
26433 |
+
},
|
26434 |
+
{
|
26435 |
+
"epoch": 0.6034157122764717,
|
26436 |
+
"grad_norm": 0.7865896224975586,
|
26437 |
+
"learning_rate": 0.00018092745838235722,
|
26438 |
+
"loss": 1.5219,
|
26439 |
+
"step": 3754
|
26440 |
+
},
|
26441 |
+
{
|
26442 |
+
"epoch": 0.6035764516777175,
|
26443 |
+
"grad_norm": 0.7704793214797974,
|
26444 |
+
"learning_rate": 0.00018091755293603455,
|
26445 |
+
"loss": 1.4627,
|
26446 |
+
"step": 3755
|
26447 |
+
},
|
26448 |
+
{
|
26449 |
+
"epoch": 0.6037371910789632,
|
26450 |
+
"grad_norm": 0.8521960377693176,
|
26451 |
+
"learning_rate": 0.00018090764518945195,
|
26452 |
+
"loss": 1.5079,
|
26453 |
+
"step": 3756
|
26454 |
+
},
|
26455 |
+
{
|
26456 |
+
"epoch": 0.603897930480209,
|
26457 |
+
"grad_norm": 0.9701086282730103,
|
26458 |
+
"learning_rate": 0.00018089773514289102,
|
26459 |
+
"loss": 1.5404,
|
26460 |
+
"step": 3757
|
26461 |
+
},
|
26462 |
+
{
|
26463 |
+
"epoch": 0.6040586698814547,
|
26464 |
+
"grad_norm": 1.0885487794876099,
|
26465 |
+
"learning_rate": 0.00018088782279663352,
|
26466 |
+
"loss": 1.1888,
|
26467 |
+
"step": 3758
|
26468 |
+
},
|
26469 |
+
{
|
26470 |
+
"epoch": 0.6042194092827005,
|
26471 |
+
"grad_norm": 0.8525895476341248,
|
26472 |
+
"learning_rate": 0.00018087790815096125,
|
26473 |
+
"loss": 1.531,
|
26474 |
+
"step": 3759
|
26475 |
+
},
|
26476 |
+
{
|
26477 |
+
"epoch": 0.6043801486839462,
|
26478 |
+
"grad_norm": 0.7228007316589355,
|
26479 |
+
"learning_rate": 0.00018086799120615597,
|
26480 |
+
"loss": 1.206,
|
26481 |
+
"step": 3760
|
26482 |
+
},
|
26483 |
+
{
|
26484 |
+
"epoch": 0.6045408880851919,
|
26485 |
+
"grad_norm": 0.7948485612869263,
|
26486 |
+
"learning_rate": 0.0001808580719624997,
|
26487 |
+
"loss": 1.5672,
|
26488 |
+
"step": 3761
|
26489 |
+
},
|
26490 |
+
{
|
26491 |
+
"epoch": 0.6047016274864376,
|
26492 |
+
"grad_norm": 0.8333085775375366,
|
26493 |
+
"learning_rate": 0.00018084815042027435,
|
26494 |
+
"loss": 1.485,
|
26495 |
+
"step": 3762
|
26496 |
+
},
|
26497 |
+
{
|
26498 |
+
"epoch": 0.6048623668876834,
|
26499 |
+
"grad_norm": 0.7149414420127869,
|
26500 |
+
"learning_rate": 0.00018083822657976196,
|
26501 |
+
"loss": 1.3211,
|
26502 |
+
"step": 3763
|
26503 |
+
},
|
26504 |
+
{
|
26505 |
+
"epoch": 0.6050231062889291,
|
26506 |
+
"grad_norm": 0.7888516783714294,
|
26507 |
+
"learning_rate": 0.00018082830044124465,
|
26508 |
+
"loss": 1.3568,
|
26509 |
+
"step": 3764
|
26510 |
+
},
|
26511 |
+
{
|
26512 |
+
"epoch": 0.6051838456901748,
|
26513 |
+
"grad_norm": 0.807858943939209,
|
26514 |
+
"learning_rate": 0.0001808183720050046,
|
26515 |
+
"loss": 1.3067,
|
26516 |
+
"step": 3765
|
26517 |
+
},
|
26518 |
+
{
|
26519 |
+
"epoch": 0.6053445850914205,
|
26520 |
+
"grad_norm": 0.8449767827987671,
|
26521 |
+
"learning_rate": 0.00018080844127132405,
|
26522 |
+
"loss": 1.3851,
|
26523 |
+
"step": 3766
|
26524 |
+
},
|
26525 |
+
{
|
26526 |
+
"epoch": 0.6055053244926663,
|
26527 |
+
"grad_norm": 0.921038031578064,
|
26528 |
+
"learning_rate": 0.0001807985082404853,
|
26529 |
+
"loss": 1.4452,
|
26530 |
+
"step": 3767
|
26531 |
+
},
|
26532 |
+
{
|
26533 |
+
"epoch": 0.605666063893912,
|
26534 |
+
"grad_norm": 0.932400643825531,
|
26535 |
+
"learning_rate": 0.0001807885729127707,
|
26536 |
+
"loss": 1.6684,
|
26537 |
+
"step": 3768
|
26538 |
+
},
|
26539 |
+
{
|
26540 |
+
"epoch": 0.6058268032951577,
|
26541 |
+
"grad_norm": 0.7853494882583618,
|
26542 |
+
"learning_rate": 0.00018077863528846271,
|
26543 |
+
"loss": 1.6653,
|
26544 |
+
"step": 3769
|
26545 |
+
},
|
26546 |
+
{
|
26547 |
+
"epoch": 0.6059875426964034,
|
26548 |
+
"grad_norm": 0.8361904621124268,
|
26549 |
+
"learning_rate": 0.0001807686953678438,
|
26550 |
+
"loss": 1.2109,
|
26551 |
+
"step": 3770
|
26552 |
+
},
|
26553 |
+
{
|
26554 |
+
"epoch": 0.6061482820976491,
|
26555 |
+
"grad_norm": 0.9145835041999817,
|
26556 |
+
"learning_rate": 0.00018075875315119654,
|
26557 |
+
"loss": 1.2192,
|
26558 |
+
"step": 3771
|
26559 |
+
},
|
26560 |
+
{
|
26561 |
+
"epoch": 0.6063090214988949,
|
26562 |
+
"grad_norm": 0.9017598032951355,
|
26563 |
+
"learning_rate": 0.0001807488086388036,
|
26564 |
+
"loss": 1.4493,
|
26565 |
+
"step": 3772
|
26566 |
+
},
|
26567 |
+
{
|
26568 |
+
"epoch": 0.6064697609001406,
|
26569 |
+
"grad_norm": 0.768317699432373,
|
26570 |
+
"learning_rate": 0.00018073886183094762,
|
26571 |
+
"loss": 1.5136,
|
26572 |
+
"step": 3773
|
26573 |
+
},
|
26574 |
+
{
|
26575 |
+
"epoch": 0.6066305003013864,
|
26576 |
+
"grad_norm": 0.7461726069450378,
|
26577 |
+
"learning_rate": 0.00018072891272791138,
|
26578 |
+
"loss": 1.2408,
|
26579 |
+
"step": 3774
|
26580 |
+
},
|
26581 |
+
{
|
26582 |
+
"epoch": 0.6067912397026322,
|
26583 |
+
"grad_norm": 0.9705711603164673,
|
26584 |
+
"learning_rate": 0.00018071896132997774,
|
26585 |
+
"loss": 1.4389,
|
26586 |
+
"step": 3775
|
26587 |
+
},
|
26588 |
+
{
|
26589 |
+
"epoch": 0.6069519791038779,
|
26590 |
+
"grad_norm": 0.9792290329933167,
|
26591 |
+
"learning_rate": 0.00018070900763742954,
|
26592 |
+
"loss": 1.8006,
|
26593 |
+
"step": 3776
|
26594 |
+
},
|
26595 |
+
{
|
26596 |
+
"epoch": 0.6071127185051236,
|
26597 |
+
"grad_norm": 0.76834636926651,
|
26598 |
+
"learning_rate": 0.00018069905165054974,
|
26599 |
+
"loss": 1.4672,
|
26600 |
+
"step": 3777
|
26601 |
+
},
|
26602 |
+
{
|
26603 |
+
"epoch": 0.6072734579063693,
|
26604 |
+
"grad_norm": 0.8297131061553955,
|
26605 |
+
"learning_rate": 0.00018068909336962138,
|
26606 |
+
"loss": 1.5131,
|
26607 |
+
"step": 3778
|
26608 |
+
},
|
26609 |
+
{
|
26610 |
+
"epoch": 0.607434197307615,
|
26611 |
+
"grad_norm": 0.8736981749534607,
|
26612 |
+
"learning_rate": 0.00018067913279492756,
|
26613 |
+
"loss": 1.5907,
|
26614 |
+
"step": 3779
|
26615 |
+
},
|
26616 |
+
{
|
26617 |
+
"epoch": 0.6075949367088608,
|
26618 |
+
"grad_norm": 0.7620412111282349,
|
26619 |
+
"learning_rate": 0.0001806691699267514,
|
26620 |
+
"loss": 1.3978,
|
26621 |
+
"step": 3780
|
26622 |
+
},
|
26623 |
+
{
|
26624 |
+
"epoch": 0.6077556761101065,
|
26625 |
+
"grad_norm": 0.7704584002494812,
|
26626 |
+
"learning_rate": 0.00018065920476537616,
|
26627 |
+
"loss": 1.4127,
|
26628 |
+
"step": 3781
|
26629 |
+
},
|
26630 |
+
{
|
26631 |
+
"epoch": 0.6079164155113522,
|
26632 |
+
"grad_norm": 0.7565736770629883,
|
26633 |
+
"learning_rate": 0.00018064923731108505,
|
26634 |
+
"loss": 1.3097,
|
26635 |
+
"step": 3782
|
26636 |
+
},
|
26637 |
+
{
|
26638 |
+
"epoch": 0.608077154912598,
|
26639 |
+
"grad_norm": 0.7868773937225342,
|
26640 |
+
"learning_rate": 0.0001806392675641615,
|
26641 |
+
"loss": 1.461,
|
26642 |
+
"step": 3783
|
26643 |
+
},
|
26644 |
+
{
|
26645 |
+
"epoch": 0.6082378943138437,
|
26646 |
+
"grad_norm": 0.8495548963546753,
|
26647 |
+
"learning_rate": 0.00018062929552488884,
|
26648 |
+
"loss": 1.2411,
|
26649 |
+
"step": 3784
|
26650 |
+
},
|
26651 |
+
{
|
26652 |
+
"epoch": 0.6083986337150894,
|
26653 |
+
"grad_norm": 0.7776492238044739,
|
26654 |
+
"learning_rate": 0.0001806193211935506,
|
26655 |
+
"loss": 1.4262,
|
26656 |
+
"step": 3785
|
26657 |
+
},
|
26658 |
+
{
|
26659 |
+
"epoch": 0.6085593731163351,
|
26660 |
+
"grad_norm": 0.8739528059959412,
|
26661 |
+
"learning_rate": 0.0001806093445704303,
|
26662 |
+
"loss": 1.486,
|
26663 |
+
"step": 3786
|
26664 |
+
},
|
26665 |
+
{
|
26666 |
+
"epoch": 0.6087201125175808,
|
26667 |
+
"grad_norm": 0.8195846676826477,
|
26668 |
+
"learning_rate": 0.0001805993656558116,
|
26669 |
+
"loss": 1.2955,
|
26670 |
+
"step": 3787
|
26671 |
+
},
|
26672 |
+
{
|
26673 |
+
"epoch": 0.6088808519188266,
|
26674 |
+
"grad_norm": 0.8094857335090637,
|
26675 |
+
"learning_rate": 0.0001805893844499781,
|
26676 |
+
"loss": 1.5479,
|
26677 |
+
"step": 3788
|
26678 |
+
},
|
26679 |
+
{
|
26680 |
+
"epoch": 0.6090415913200723,
|
26681 |
+
"grad_norm": 0.8348665833473206,
|
26682 |
+
"learning_rate": 0.00018057940095321358,
|
26683 |
+
"loss": 1.5388,
|
26684 |
+
"step": 3789
|
26685 |
+
},
|
26686 |
+
{
|
26687 |
+
"epoch": 0.609202330721318,
|
26688 |
+
"grad_norm": 0.8143827319145203,
|
26689 |
+
"learning_rate": 0.0001805694151658018,
|
26690 |
+
"loss": 1.4557,
|
26691 |
+
"step": 3790
|
26692 |
+
},
|
26693 |
+
{
|
26694 |
+
"epoch": 0.6093630701225637,
|
26695 |
+
"grad_norm": 0.8822208046913147,
|
26696 |
+
"learning_rate": 0.00018055942708802668,
|
26697 |
+
"loss": 1.6374,
|
26698 |
+
"step": 3791
|
26699 |
+
},
|
26700 |
+
{
|
26701 |
+
"epoch": 0.6095238095238096,
|
26702 |
+
"grad_norm": 0.7860540151596069,
|
26703 |
+
"learning_rate": 0.00018054943672017213,
|
26704 |
+
"loss": 1.318,
|
26705 |
+
"step": 3792
|
26706 |
+
},
|
26707 |
+
{
|
26708 |
+
"epoch": 0.6096845489250553,
|
26709 |
+
"grad_norm": 0.8239076733589172,
|
26710 |
+
"learning_rate": 0.00018053944406252215,
|
26711 |
+
"loss": 1.4162,
|
26712 |
+
"step": 3793
|
26713 |
+
},
|
26714 |
+
{
|
26715 |
+
"epoch": 0.609845288326301,
|
26716 |
+
"grad_norm": 0.8407960534095764,
|
26717 |
+
"learning_rate": 0.0001805294491153608,
|
26718 |
+
"loss": 1.4742,
|
26719 |
+
"step": 3794
|
26720 |
+
},
|
26721 |
+
{
|
26722 |
+
"epoch": 0.6100060277275468,
|
26723 |
+
"grad_norm": 0.7489108443260193,
|
26724 |
+
"learning_rate": 0.00018051945187897223,
|
26725 |
+
"loss": 1.2645,
|
26726 |
+
"step": 3795
|
26727 |
+
},
|
26728 |
+
{
|
26729 |
+
"epoch": 0.6101667671287925,
|
26730 |
+
"grad_norm": 0.9327155351638794,
|
26731 |
+
"learning_rate": 0.0001805094523536406,
|
26732 |
+
"loss": 1.5033,
|
26733 |
+
"step": 3796
|
26734 |
+
},
|
26735 |
+
{
|
26736 |
+
"epoch": 0.6103275065300382,
|
26737 |
+
"grad_norm": 0.7914235591888428,
|
26738 |
+
"learning_rate": 0.0001804994505396502,
|
26739 |
+
"loss": 1.5223,
|
26740 |
+
"step": 3797
|
26741 |
+
},
|
26742 |
+
{
|
26743 |
+
"epoch": 0.6104882459312839,
|
26744 |
+
"grad_norm": 0.8573668599128723,
|
26745 |
+
"learning_rate": 0.00018048944643728532,
|
26746 |
+
"loss": 1.3934,
|
26747 |
+
"step": 3798
|
26748 |
+
},
|
26749 |
+
{
|
26750 |
+
"epoch": 0.6106489853325296,
|
26751 |
+
"grad_norm": 0.7822215557098389,
|
26752 |
+
"learning_rate": 0.00018047944004683037,
|
26753 |
+
"loss": 1.4106,
|
26754 |
+
"step": 3799
|
26755 |
+
},
|
26756 |
+
{
|
26757 |
+
"epoch": 0.6108097247337754,
|
26758 |
+
"grad_norm": 0.7630478143692017,
|
26759 |
+
"learning_rate": 0.0001804694313685698,
|
26760 |
+
"loss": 1.2981,
|
26761 |
+
"step": 3800
|
26762 |
+
},
|
26763 |
+
{
|
26764 |
+
"epoch": 0.6108097247337754,
|
26765 |
+
"eval_loss": 1.4838331937789917,
|
26766 |
+
"eval_runtime": 46.2589,
|
26767 |
+
"eval_samples_per_second": 5.426,
|
26768 |
+
"eval_steps_per_second": 2.724,
|
26769 |
+
"step": 3800
|
26770 |
}
|
26771 |
],
|
26772 |
"logging_steps": 1,
|
|
|
26795 |
"attributes": {}
|
26796 |
}
|
26797 |
},
|
26798 |
+
"total_flos": 6.634850546598543e+17,
|
26799 |
"train_batch_size": 2,
|
26800 |
"trial_name": null,
|
26801 |
"trial_params": null
|