Training in progress, step 3600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 323014168
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6275e92586ebb9ca0b5977d9652c845089912b4e4ad71f457f7f364769bfa19f
|
3 |
size 323014168
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 164465012
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b6fd2f6f76c9acd502516403577f139b932e47d321ddfb84d29194fbca648306
|
3 |
size 164465012
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0dec1cc0b7d1a7a511361ea3e252c632fa0bfee7aa58aa5d7557dc36d2df83f
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a0a60f42e028864de11afc84d122693930b530eaae802f79242d99cd78ad9ec
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -23951,6 +23951,1414 @@
|
|
23951 |
"eval_samples_per_second": 5.426,
|
23952 |
"eval_steps_per_second": 2.724,
|
23953 |
"step": 3400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23954 |
}
|
23955 |
],
|
23956 |
"logging_steps": 1,
|
@@ -23979,7 +25387,7 @@
|
|
23979 |
"attributes": {}
|
23980 |
}
|
23981 |
},
|
23982 |
-
"total_flos":
|
23983 |
"train_batch_size": 2,
|
23984 |
"trial_name": null,
|
23985 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.4872082471847534,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-3600",
|
4 |
+
"epoch": 0.5786618444846293,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 3600,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
23951 |
"eval_samples_per_second": 5.426,
|
23952 |
"eval_steps_per_second": 2.724,
|
23953 |
"step": 3400
|
23954 |
+
},
|
23955 |
+
{
|
23956 |
+
"epoch": 0.546674703636729,
|
23957 |
+
"grad_norm": 0.8547418713569641,
|
23958 |
+
"learning_rate": 0.0001842783193869666,
|
23959 |
+
"loss": 1.5056,
|
23960 |
+
"step": 3401
|
23961 |
+
},
|
23962 |
+
{
|
23963 |
+
"epoch": 0.5468354430379747,
|
23964 |
+
"grad_norm": 0.7882863879203796,
|
23965 |
+
"learning_rate": 0.00018426924304156138,
|
23966 |
+
"loss": 1.4055,
|
23967 |
+
"step": 3402
|
23968 |
+
},
|
23969 |
+
{
|
23970 |
+
"epoch": 0.5469961824392204,
|
23971 |
+
"grad_norm": 0.782737672328949,
|
23972 |
+
"learning_rate": 0.00018426016430061708,
|
23973 |
+
"loss": 1.4642,
|
23974 |
+
"step": 3403
|
23975 |
+
},
|
23976 |
+
{
|
23977 |
+
"epoch": 0.5471569218404662,
|
23978 |
+
"grad_norm": 0.7308638095855713,
|
23979 |
+
"learning_rate": 0.00018425108316439174,
|
23980 |
+
"loss": 1.3799,
|
23981 |
+
"step": 3404
|
23982 |
+
},
|
23983 |
+
{
|
23984 |
+
"epoch": 0.5473176612417119,
|
23985 |
+
"grad_norm": 0.8885317444801331,
|
23986 |
+
"learning_rate": 0.00018424199963314346,
|
23987 |
+
"loss": 1.5583,
|
23988 |
+
"step": 3405
|
23989 |
+
},
|
23990 |
+
{
|
23991 |
+
"epoch": 0.5474784006429576,
|
23992 |
+
"grad_norm": 0.7803483009338379,
|
23993 |
+
"learning_rate": 0.0001842329137071305,
|
23994 |
+
"loss": 1.5434,
|
23995 |
+
"step": 3406
|
23996 |
+
},
|
23997 |
+
{
|
23998 |
+
"epoch": 0.5476391400442033,
|
23999 |
+
"grad_norm": 0.9327698945999146,
|
24000 |
+
"learning_rate": 0.00018422382538661118,
|
24001 |
+
"loss": 1.9708,
|
24002 |
+
"step": 3407
|
24003 |
+
},
|
24004 |
+
{
|
24005 |
+
"epoch": 0.5477998794454491,
|
24006 |
+
"grad_norm": 0.7588239908218384,
|
24007 |
+
"learning_rate": 0.00018421473467184382,
|
24008 |
+
"loss": 1.3409,
|
24009 |
+
"step": 3408
|
24010 |
+
},
|
24011 |
+
{
|
24012 |
+
"epoch": 0.5479606188466948,
|
24013 |
+
"grad_norm": 0.6719919443130493,
|
24014 |
+
"learning_rate": 0.00018420564156308687,
|
24015 |
+
"loss": 1.2773,
|
24016 |
+
"step": 3409
|
24017 |
+
},
|
24018 |
+
{
|
24019 |
+
"epoch": 0.5481213582479405,
|
24020 |
+
"grad_norm": 0.6549716591835022,
|
24021 |
+
"learning_rate": 0.00018419654606059878,
|
24022 |
+
"loss": 1.2431,
|
24023 |
+
"step": 3410
|
24024 |
+
},
|
24025 |
+
{
|
24026 |
+
"epoch": 0.5482820976491862,
|
24027 |
+
"grad_norm": 0.7733752131462097,
|
24028 |
+
"learning_rate": 0.00018418744816463812,
|
24029 |
+
"loss": 1.5152,
|
24030 |
+
"step": 3411
|
24031 |
+
},
|
24032 |
+
{
|
24033 |
+
"epoch": 0.548442837050432,
|
24034 |
+
"grad_norm": 0.7603765726089478,
|
24035 |
+
"learning_rate": 0.00018417834787546355,
|
24036 |
+
"loss": 1.3646,
|
24037 |
+
"step": 3412
|
24038 |
+
},
|
24039 |
+
{
|
24040 |
+
"epoch": 0.5486035764516777,
|
24041 |
+
"grad_norm": 0.937150776386261,
|
24042 |
+
"learning_rate": 0.00018416924519333377,
|
24043 |
+
"loss": 1.7231,
|
24044 |
+
"step": 3413
|
24045 |
+
},
|
24046 |
+
{
|
24047 |
+
"epoch": 0.5487643158529234,
|
24048 |
+
"grad_norm": 0.7216455936431885,
|
24049 |
+
"learning_rate": 0.00018416014011850745,
|
24050 |
+
"loss": 1.4371,
|
24051 |
+
"step": 3414
|
24052 |
+
},
|
24053 |
+
{
|
24054 |
+
"epoch": 0.5489250552541691,
|
24055 |
+
"grad_norm": 0.7946565747261047,
|
24056 |
+
"learning_rate": 0.00018415103265124354,
|
24057 |
+
"loss": 1.5204,
|
24058 |
+
"step": 3415
|
24059 |
+
},
|
24060 |
+
{
|
24061 |
+
"epoch": 0.549085794655415,
|
24062 |
+
"grad_norm": 0.794370710849762,
|
24063 |
+
"learning_rate": 0.00018414192279180087,
|
24064 |
+
"loss": 1.5416,
|
24065 |
+
"step": 3416
|
24066 |
+
},
|
24067 |
+
{
|
24068 |
+
"epoch": 0.5492465340566607,
|
24069 |
+
"grad_norm": 0.8013623356819153,
|
24070 |
+
"learning_rate": 0.00018413281054043847,
|
24071 |
+
"loss": 1.3143,
|
24072 |
+
"step": 3417
|
24073 |
+
},
|
24074 |
+
{
|
24075 |
+
"epoch": 0.5494072734579064,
|
24076 |
+
"grad_norm": 0.7810857892036438,
|
24077 |
+
"learning_rate": 0.0001841236958974153,
|
24078 |
+
"loss": 1.5244,
|
24079 |
+
"step": 3418
|
24080 |
+
},
|
24081 |
+
{
|
24082 |
+
"epoch": 0.5495680128591521,
|
24083 |
+
"grad_norm": 0.7712723612785339,
|
24084 |
+
"learning_rate": 0.0001841145788629905,
|
24085 |
+
"loss": 1.302,
|
24086 |
+
"step": 3419
|
24087 |
+
},
|
24088 |
+
{
|
24089 |
+
"epoch": 0.5497287522603979,
|
24090 |
+
"grad_norm": 0.8773642182350159,
|
24091 |
+
"learning_rate": 0.00018410545943742325,
|
24092 |
+
"loss": 1.6297,
|
24093 |
+
"step": 3420
|
24094 |
+
},
|
24095 |
+
{
|
24096 |
+
"epoch": 0.5498894916616436,
|
24097 |
+
"grad_norm": 0.855289101600647,
|
24098 |
+
"learning_rate": 0.00018409633762097276,
|
24099 |
+
"loss": 1.7535,
|
24100 |
+
"step": 3421
|
24101 |
+
},
|
24102 |
+
{
|
24103 |
+
"epoch": 0.5500502310628893,
|
24104 |
+
"grad_norm": 0.7963067293167114,
|
24105 |
+
"learning_rate": 0.00018408721341389836,
|
24106 |
+
"loss": 1.442,
|
24107 |
+
"step": 3422
|
24108 |
+
},
|
24109 |
+
{
|
24110 |
+
"epoch": 0.550210970464135,
|
24111 |
+
"grad_norm": 0.9864539504051208,
|
24112 |
+
"learning_rate": 0.00018407808681645947,
|
24113 |
+
"loss": 1.7095,
|
24114 |
+
"step": 3423
|
24115 |
+
},
|
24116 |
+
{
|
24117 |
+
"epoch": 0.5503717098653808,
|
24118 |
+
"grad_norm": 0.8141933083534241,
|
24119 |
+
"learning_rate": 0.00018406895782891543,
|
24120 |
+
"loss": 1.2587,
|
24121 |
+
"step": 3424
|
24122 |
+
},
|
24123 |
+
{
|
24124 |
+
"epoch": 0.5505324492666265,
|
24125 |
+
"grad_norm": 0.7481880784034729,
|
24126 |
+
"learning_rate": 0.00018405982645152582,
|
24127 |
+
"loss": 1.4753,
|
24128 |
+
"step": 3425
|
24129 |
+
},
|
24130 |
+
{
|
24131 |
+
"epoch": 0.5506931886678722,
|
24132 |
+
"grad_norm": 0.7643571496009827,
|
24133 |
+
"learning_rate": 0.00018405069268455024,
|
24134 |
+
"loss": 1.3064,
|
24135 |
+
"step": 3426
|
24136 |
+
},
|
24137 |
+
{
|
24138 |
+
"epoch": 0.5508539280691179,
|
24139 |
+
"grad_norm": 0.9446070790290833,
|
24140 |
+
"learning_rate": 0.00018404155652824829,
|
24141 |
+
"loss": 1.3712,
|
24142 |
+
"step": 3427
|
24143 |
+
},
|
24144 |
+
{
|
24145 |
+
"epoch": 0.5510146674703637,
|
24146 |
+
"grad_norm": 0.8087760806083679,
|
24147 |
+
"learning_rate": 0.0001840324179828797,
|
24148 |
+
"loss": 1.3824,
|
24149 |
+
"step": 3428
|
24150 |
+
},
|
24151 |
+
{
|
24152 |
+
"epoch": 0.5511754068716094,
|
24153 |
+
"grad_norm": 0.9354684352874756,
|
24154 |
+
"learning_rate": 0.00018402327704870425,
|
24155 |
+
"loss": 1.5243,
|
24156 |
+
"step": 3429
|
24157 |
+
},
|
24158 |
+
{
|
24159 |
+
"epoch": 0.5513361462728551,
|
24160 |
+
"grad_norm": 0.7447128891944885,
|
24161 |
+
"learning_rate": 0.0001840141337259818,
|
24162 |
+
"loss": 1.2098,
|
24163 |
+
"step": 3430
|
24164 |
+
},
|
24165 |
+
{
|
24166 |
+
"epoch": 0.5514968856741008,
|
24167 |
+
"grad_norm": 0.6258952617645264,
|
24168 |
+
"learning_rate": 0.00018400498801497225,
|
24169 |
+
"loss": 1.2759,
|
24170 |
+
"step": 3431
|
24171 |
+
},
|
24172 |
+
{
|
24173 |
+
"epoch": 0.5516576250753465,
|
24174 |
+
"grad_norm": 0.7399110794067383,
|
24175 |
+
"learning_rate": 0.0001839958399159356,
|
24176 |
+
"loss": 1.3586,
|
24177 |
+
"step": 3432
|
24178 |
+
},
|
24179 |
+
{
|
24180 |
+
"epoch": 0.5518183644765923,
|
24181 |
+
"grad_norm": 0.8511313796043396,
|
24182 |
+
"learning_rate": 0.00018398668942913194,
|
24183 |
+
"loss": 1.3737,
|
24184 |
+
"step": 3433
|
24185 |
+
},
|
24186 |
+
{
|
24187 |
+
"epoch": 0.5519791038778381,
|
24188 |
+
"grad_norm": 0.7959484457969666,
|
24189 |
+
"learning_rate": 0.00018397753655482134,
|
24190 |
+
"loss": 1.323,
|
24191 |
+
"step": 3434
|
24192 |
+
},
|
24193 |
+
{
|
24194 |
+
"epoch": 0.5521398432790838,
|
24195 |
+
"grad_norm": 0.7975229620933533,
|
24196 |
+
"learning_rate": 0.000183968381293264,
|
24197 |
+
"loss": 1.3714,
|
24198 |
+
"step": 3435
|
24199 |
+
},
|
24200 |
+
{
|
24201 |
+
"epoch": 0.5523005826803296,
|
24202 |
+
"grad_norm": 0.8922042846679688,
|
24203 |
+
"learning_rate": 0.0001839592236447202,
|
24204 |
+
"loss": 1.9249,
|
24205 |
+
"step": 3436
|
24206 |
+
},
|
24207 |
+
{
|
24208 |
+
"epoch": 0.5524613220815753,
|
24209 |
+
"grad_norm": 0.7569393515586853,
|
24210 |
+
"learning_rate": 0.00018395006360945025,
|
24211 |
+
"loss": 1.5926,
|
24212 |
+
"step": 3437
|
24213 |
+
},
|
24214 |
+
{
|
24215 |
+
"epoch": 0.552622061482821,
|
24216 |
+
"grad_norm": 0.7032526731491089,
|
24217 |
+
"learning_rate": 0.0001839409011877146,
|
24218 |
+
"loss": 1.2975,
|
24219 |
+
"step": 3438
|
24220 |
+
},
|
24221 |
+
{
|
24222 |
+
"epoch": 0.5527828008840667,
|
24223 |
+
"grad_norm": 0.8664374947547913,
|
24224 |
+
"learning_rate": 0.0001839317363797736,
|
24225 |
+
"loss": 1.5843,
|
24226 |
+
"step": 3439
|
24227 |
+
},
|
24228 |
+
{
|
24229 |
+
"epoch": 0.5529435402853125,
|
24230 |
+
"grad_norm": 0.8368168473243713,
|
24231 |
+
"learning_rate": 0.00018392256918588787,
|
24232 |
+
"loss": 1.5089,
|
24233 |
+
"step": 3440
|
24234 |
+
},
|
24235 |
+
{
|
24236 |
+
"epoch": 0.5531042796865582,
|
24237 |
+
"grad_norm": 0.849844217300415,
|
24238 |
+
"learning_rate": 0.00018391339960631794,
|
24239 |
+
"loss": 1.3327,
|
24240 |
+
"step": 3441
|
24241 |
+
},
|
24242 |
+
{
|
24243 |
+
"epoch": 0.5532650190878039,
|
24244 |
+
"grad_norm": 0.8162112236022949,
|
24245 |
+
"learning_rate": 0.00018390422764132453,
|
24246 |
+
"loss": 1.5279,
|
24247 |
+
"step": 3442
|
24248 |
+
},
|
24249 |
+
{
|
24250 |
+
"epoch": 0.5534257584890496,
|
24251 |
+
"grad_norm": 0.8311508893966675,
|
24252 |
+
"learning_rate": 0.00018389505329116838,
|
24253 |
+
"loss": 1.7123,
|
24254 |
+
"step": 3443
|
24255 |
+
},
|
24256 |
+
{
|
24257 |
+
"epoch": 0.5535864978902953,
|
24258 |
+
"grad_norm": 0.8905399441719055,
|
24259 |
+
"learning_rate": 0.00018388587655611028,
|
24260 |
+
"loss": 1.5369,
|
24261 |
+
"step": 3444
|
24262 |
+
},
|
24263 |
+
{
|
24264 |
+
"epoch": 0.5537472372915411,
|
24265 |
+
"grad_norm": 0.860458254814148,
|
24266 |
+
"learning_rate": 0.00018387669743641106,
|
24267 |
+
"loss": 1.631,
|
24268 |
+
"step": 3445
|
24269 |
+
},
|
24270 |
+
{
|
24271 |
+
"epoch": 0.5539079766927868,
|
24272 |
+
"grad_norm": 0.7295376658439636,
|
24273 |
+
"learning_rate": 0.00018386751593233168,
|
24274 |
+
"loss": 1.2728,
|
24275 |
+
"step": 3446
|
24276 |
+
},
|
24277 |
+
{
|
24278 |
+
"epoch": 0.5540687160940325,
|
24279 |
+
"grad_norm": 0.8431108593940735,
|
24280 |
+
"learning_rate": 0.00018385833204413317,
|
24281 |
+
"loss": 1.494,
|
24282 |
+
"step": 3447
|
24283 |
+
},
|
24284 |
+
{
|
24285 |
+
"epoch": 0.5542294554952782,
|
24286 |
+
"grad_norm": 0.8596949577331543,
|
24287 |
+
"learning_rate": 0.00018384914577207657,
|
24288 |
+
"loss": 1.4665,
|
24289 |
+
"step": 3448
|
24290 |
+
},
|
24291 |
+
{
|
24292 |
+
"epoch": 0.554390194896524,
|
24293 |
+
"grad_norm": 0.7842774391174316,
|
24294 |
+
"learning_rate": 0.00018383995711642302,
|
24295 |
+
"loss": 1.5249,
|
24296 |
+
"step": 3449
|
24297 |
+
},
|
24298 |
+
{
|
24299 |
+
"epoch": 0.5545509342977697,
|
24300 |
+
"grad_norm": 0.7341744303703308,
|
24301 |
+
"learning_rate": 0.00018383076607743376,
|
24302 |
+
"loss": 1.1438,
|
24303 |
+
"step": 3450
|
24304 |
+
},
|
24305 |
+
{
|
24306 |
+
"epoch": 0.5547116736990154,
|
24307 |
+
"grad_norm": 0.9056680798530579,
|
24308 |
+
"learning_rate": 0.00018382157265537004,
|
24309 |
+
"loss": 1.4604,
|
24310 |
+
"step": 3451
|
24311 |
+
},
|
24312 |
+
{
|
24313 |
+
"epoch": 0.5548724131002613,
|
24314 |
+
"grad_norm": 0.8012903332710266,
|
24315 |
+
"learning_rate": 0.0001838123768504932,
|
24316 |
+
"loss": 1.6805,
|
24317 |
+
"step": 3452
|
24318 |
+
},
|
24319 |
+
{
|
24320 |
+
"epoch": 0.555033152501507,
|
24321 |
+
"grad_norm": 0.9109938740730286,
|
24322 |
+
"learning_rate": 0.00018380317866306464,
|
24323 |
+
"loss": 1.5579,
|
24324 |
+
"step": 3453
|
24325 |
+
},
|
24326 |
+
{
|
24327 |
+
"epoch": 0.5551938919027527,
|
24328 |
+
"grad_norm": 0.8787387013435364,
|
24329 |
+
"learning_rate": 0.0001837939780933459,
|
24330 |
+
"loss": 1.555,
|
24331 |
+
"step": 3454
|
24332 |
+
},
|
24333 |
+
{
|
24334 |
+
"epoch": 0.5553546313039984,
|
24335 |
+
"grad_norm": 0.8159571290016174,
|
24336 |
+
"learning_rate": 0.00018378477514159846,
|
24337 |
+
"loss": 1.38,
|
24338 |
+
"step": 3455
|
24339 |
+
},
|
24340 |
+
{
|
24341 |
+
"epoch": 0.5555153707052441,
|
24342 |
+
"grad_norm": 0.6941602230072021,
|
24343 |
+
"learning_rate": 0.00018377556980808397,
|
24344 |
+
"loss": 1.3412,
|
24345 |
+
"step": 3456
|
24346 |
+
},
|
24347 |
+
{
|
24348 |
+
"epoch": 0.5556761101064899,
|
24349 |
+
"grad_norm": 0.9344702959060669,
|
24350 |
+
"learning_rate": 0.0001837663620930641,
|
24351 |
+
"loss": 1.6012,
|
24352 |
+
"step": 3457
|
24353 |
+
},
|
24354 |
+
{
|
24355 |
+
"epoch": 0.5558368495077356,
|
24356 |
+
"grad_norm": 0.7718198299407959,
|
24357 |
+
"learning_rate": 0.0001837571519968006,
|
24358 |
+
"loss": 1.4974,
|
24359 |
+
"step": 3458
|
24360 |
+
},
|
24361 |
+
{
|
24362 |
+
"epoch": 0.5559975889089813,
|
24363 |
+
"grad_norm": 0.719251811504364,
|
24364 |
+
"learning_rate": 0.00018374793951955528,
|
24365 |
+
"loss": 1.2737,
|
24366 |
+
"step": 3459
|
24367 |
+
},
|
24368 |
+
{
|
24369 |
+
"epoch": 0.556158328310227,
|
24370 |
+
"grad_norm": 0.7487744092941284,
|
24371 |
+
"learning_rate": 0.00018373872466159003,
|
24372 |
+
"loss": 1.4077,
|
24373 |
+
"step": 3460
|
24374 |
+
},
|
24375 |
+
{
|
24376 |
+
"epoch": 0.5563190677114728,
|
24377 |
+
"grad_norm": 0.8294851779937744,
|
24378 |
+
"learning_rate": 0.00018372950742316681,
|
24379 |
+
"loss": 1.4041,
|
24380 |
+
"step": 3461
|
24381 |
+
},
|
24382 |
+
{
|
24383 |
+
"epoch": 0.5564798071127185,
|
24384 |
+
"grad_norm": 0.9241026639938354,
|
24385 |
+
"learning_rate": 0.00018372028780454765,
|
24386 |
+
"loss": 1.5195,
|
24387 |
+
"step": 3462
|
24388 |
+
},
|
24389 |
+
{
|
24390 |
+
"epoch": 0.5566405465139642,
|
24391 |
+
"grad_norm": 0.8246049284934998,
|
24392 |
+
"learning_rate": 0.0001837110658059946,
|
24393 |
+
"loss": 1.4784,
|
24394 |
+
"step": 3463
|
24395 |
+
},
|
24396 |
+
{
|
24397 |
+
"epoch": 0.5568012859152099,
|
24398 |
+
"grad_norm": 0.8178633451461792,
|
24399 |
+
"learning_rate": 0.00018370184142776986,
|
24400 |
+
"loss": 1.3113,
|
24401 |
+
"step": 3464
|
24402 |
+
},
|
24403 |
+
{
|
24404 |
+
"epoch": 0.5569620253164557,
|
24405 |
+
"grad_norm": 0.9420003294944763,
|
24406 |
+
"learning_rate": 0.00018369261467013562,
|
24407 |
+
"loss": 1.5045,
|
24408 |
+
"step": 3465
|
24409 |
+
},
|
24410 |
+
{
|
24411 |
+
"epoch": 0.5571227647177014,
|
24412 |
+
"grad_norm": 0.7491409182548523,
|
24413 |
+
"learning_rate": 0.00018368338553335418,
|
24414 |
+
"loss": 1.3278,
|
24415 |
+
"step": 3466
|
24416 |
+
},
|
24417 |
+
{
|
24418 |
+
"epoch": 0.5572835041189471,
|
24419 |
+
"grad_norm": 0.9637048840522766,
|
24420 |
+
"learning_rate": 0.00018367415401768792,
|
24421 |
+
"loss": 1.5834,
|
24422 |
+
"step": 3467
|
24423 |
+
},
|
24424 |
+
{
|
24425 |
+
"epoch": 0.5574442435201928,
|
24426 |
+
"grad_norm": 0.8200823068618774,
|
24427 |
+
"learning_rate": 0.0001836649201233992,
|
24428 |
+
"loss": 1.5716,
|
24429 |
+
"step": 3468
|
24430 |
+
},
|
24431 |
+
{
|
24432 |
+
"epoch": 0.5576049829214386,
|
24433 |
+
"grad_norm": 0.7579705715179443,
|
24434 |
+
"learning_rate": 0.00018365568385075062,
|
24435 |
+
"loss": 1.321,
|
24436 |
+
"step": 3469
|
24437 |
+
},
|
24438 |
+
{
|
24439 |
+
"epoch": 0.5577657223226844,
|
24440 |
+
"grad_norm": 0.8488672971725464,
|
24441 |
+
"learning_rate": 0.00018364644520000467,
|
24442 |
+
"loss": 1.4687,
|
24443 |
+
"step": 3470
|
24444 |
+
},
|
24445 |
+
{
|
24446 |
+
"epoch": 0.5579264617239301,
|
24447 |
+
"grad_norm": 0.8835506439208984,
|
24448 |
+
"learning_rate": 0.00018363720417142397,
|
24449 |
+
"loss": 1.5661,
|
24450 |
+
"step": 3471
|
24451 |
+
},
|
24452 |
+
{
|
24453 |
+
"epoch": 0.5580872011251758,
|
24454 |
+
"grad_norm": 0.7685453295707703,
|
24455 |
+
"learning_rate": 0.00018362796076527128,
|
24456 |
+
"loss": 1.204,
|
24457 |
+
"step": 3472
|
24458 |
+
},
|
24459 |
+
{
|
24460 |
+
"epoch": 0.5582479405264216,
|
24461 |
+
"grad_norm": 0.9423272609710693,
|
24462 |
+
"learning_rate": 0.0001836187149818093,
|
24463 |
+
"loss": 1.5675,
|
24464 |
+
"step": 3473
|
24465 |
+
},
|
24466 |
+
{
|
24467 |
+
"epoch": 0.5584086799276673,
|
24468 |
+
"grad_norm": 0.8542371988296509,
|
24469 |
+
"learning_rate": 0.00018360946682130088,
|
24470 |
+
"loss": 1.6619,
|
24471 |
+
"step": 3474
|
24472 |
+
},
|
24473 |
+
{
|
24474 |
+
"epoch": 0.558569419328913,
|
24475 |
+
"grad_norm": 0.7364301085472107,
|
24476 |
+
"learning_rate": 0.00018360021628400892,
|
24477 |
+
"loss": 1.5459,
|
24478 |
+
"step": 3475
|
24479 |
+
},
|
24480 |
+
{
|
24481 |
+
"epoch": 0.5587301587301587,
|
24482 |
+
"grad_norm": 6.780817031860352,
|
24483 |
+
"learning_rate": 0.00018359096337019638,
|
24484 |
+
"loss": 1.4864,
|
24485 |
+
"step": 3476
|
24486 |
+
},
|
24487 |
+
{
|
24488 |
+
"epoch": 0.5588908981314045,
|
24489 |
+
"grad_norm": 0.8980083465576172,
|
24490 |
+
"learning_rate": 0.00018358170808012631,
|
24491 |
+
"loss": 1.7447,
|
24492 |
+
"step": 3477
|
24493 |
+
},
|
24494 |
+
{
|
24495 |
+
"epoch": 0.5590516375326502,
|
24496 |
+
"grad_norm": 0.6896567940711975,
|
24497 |
+
"learning_rate": 0.00018357245041406183,
|
24498 |
+
"loss": 1.2883,
|
24499 |
+
"step": 3478
|
24500 |
+
},
|
24501 |
+
{
|
24502 |
+
"epoch": 0.5592123769338959,
|
24503 |
+
"grad_norm": 0.9001600742340088,
|
24504 |
+
"learning_rate": 0.00018356319037226608,
|
24505 |
+
"loss": 1.5896,
|
24506 |
+
"step": 3479
|
24507 |
+
},
|
24508 |
+
{
|
24509 |
+
"epoch": 0.5593731163351416,
|
24510 |
+
"grad_norm": 0.7801342606544495,
|
24511 |
+
"learning_rate": 0.0001835539279550023,
|
24512 |
+
"loss": 1.4654,
|
24513 |
+
"step": 3480
|
24514 |
+
},
|
24515 |
+
{
|
24516 |
+
"epoch": 0.5595338557363874,
|
24517 |
+
"grad_norm": 0.8106282353401184,
|
24518 |
+
"learning_rate": 0.00018354466316253382,
|
24519 |
+
"loss": 1.4703,
|
24520 |
+
"step": 3481
|
24521 |
+
},
|
24522 |
+
{
|
24523 |
+
"epoch": 0.5596945951376331,
|
24524 |
+
"grad_norm": 0.6558858752250671,
|
24525 |
+
"learning_rate": 0.00018353539599512395,
|
24526 |
+
"loss": 1.2083,
|
24527 |
+
"step": 3482
|
24528 |
+
},
|
24529 |
+
{
|
24530 |
+
"epoch": 0.5598553345388788,
|
24531 |
+
"grad_norm": 0.7697267532348633,
|
24532 |
+
"learning_rate": 0.00018352612645303618,
|
24533 |
+
"loss": 1.4069,
|
24534 |
+
"step": 3483
|
24535 |
+
},
|
24536 |
+
{
|
24537 |
+
"epoch": 0.5600160739401245,
|
24538 |
+
"grad_norm": 0.8283537030220032,
|
24539 |
+
"learning_rate": 0.00018351685453653403,
|
24540 |
+
"loss": 1.4165,
|
24541 |
+
"step": 3484
|
24542 |
+
},
|
24543 |
+
{
|
24544 |
+
"epoch": 0.5601768133413703,
|
24545 |
+
"grad_norm": 0.6744006276130676,
|
24546 |
+
"learning_rate": 0.00018350758024588106,
|
24547 |
+
"loss": 1.3273,
|
24548 |
+
"step": 3485
|
24549 |
+
},
|
24550 |
+
{
|
24551 |
+
"epoch": 0.560337552742616,
|
24552 |
+
"grad_norm": 0.8205862641334534,
|
24553 |
+
"learning_rate": 0.0001834983035813409,
|
24554 |
+
"loss": 1.4465,
|
24555 |
+
"step": 3486
|
24556 |
+
},
|
24557 |
+
{
|
24558 |
+
"epoch": 0.5604982921438617,
|
24559 |
+
"grad_norm": 0.8396273851394653,
|
24560 |
+
"learning_rate": 0.00018348902454317726,
|
24561 |
+
"loss": 1.6407,
|
24562 |
+
"step": 3487
|
24563 |
+
},
|
24564 |
+
{
|
24565 |
+
"epoch": 0.5606590315451075,
|
24566 |
+
"grad_norm": 0.8097394108772278,
|
24567 |
+
"learning_rate": 0.0001834797431316539,
|
24568 |
+
"loss": 1.3629,
|
24569 |
+
"step": 3488
|
24570 |
+
},
|
24571 |
+
{
|
24572 |
+
"epoch": 0.5608197709463533,
|
24573 |
+
"grad_norm": 0.8776138424873352,
|
24574 |
+
"learning_rate": 0.0001834704593470347,
|
24575 |
+
"loss": 1.2357,
|
24576 |
+
"step": 3489
|
24577 |
+
},
|
24578 |
+
{
|
24579 |
+
"epoch": 0.560980510347599,
|
24580 |
+
"grad_norm": 0.7313289642333984,
|
24581 |
+
"learning_rate": 0.00018346117318958355,
|
24582 |
+
"loss": 1.2672,
|
24583 |
+
"step": 3490
|
24584 |
+
},
|
24585 |
+
{
|
24586 |
+
"epoch": 0.5611412497488447,
|
24587 |
+
"grad_norm": 0.7351658940315247,
|
24588 |
+
"learning_rate": 0.00018345188465956444,
|
24589 |
+
"loss": 1.2434,
|
24590 |
+
"step": 3491
|
24591 |
+
},
|
24592 |
+
{
|
24593 |
+
"epoch": 0.5613019891500904,
|
24594 |
+
"grad_norm": 0.7830203771591187,
|
24595 |
+
"learning_rate": 0.00018344259375724145,
|
24596 |
+
"loss": 1.3425,
|
24597 |
+
"step": 3492
|
24598 |
+
},
|
24599 |
+
{
|
24600 |
+
"epoch": 0.5614627285513362,
|
24601 |
+
"grad_norm": 0.8343597650527954,
|
24602 |
+
"learning_rate": 0.00018343330048287864,
|
24603 |
+
"loss": 1.6124,
|
24604 |
+
"step": 3493
|
24605 |
+
},
|
24606 |
+
{
|
24607 |
+
"epoch": 0.5616234679525819,
|
24608 |
+
"grad_norm": 0.9303722381591797,
|
24609 |
+
"learning_rate": 0.0001834240048367402,
|
24610 |
+
"loss": 1.5776,
|
24611 |
+
"step": 3494
|
24612 |
+
},
|
24613 |
+
{
|
24614 |
+
"epoch": 0.5617842073538276,
|
24615 |
+
"grad_norm": 0.7689720988273621,
|
24616 |
+
"learning_rate": 0.0001834147068190904,
|
24617 |
+
"loss": 1.3182,
|
24618 |
+
"step": 3495
|
24619 |
+
},
|
24620 |
+
{
|
24621 |
+
"epoch": 0.5619449467550733,
|
24622 |
+
"grad_norm": 0.8655688762664795,
|
24623 |
+
"learning_rate": 0.00018340540643019355,
|
24624 |
+
"loss": 1.71,
|
24625 |
+
"step": 3496
|
24626 |
+
},
|
24627 |
+
{
|
24628 |
+
"epoch": 0.5621056861563191,
|
24629 |
+
"grad_norm": 0.8428177237510681,
|
24630 |
+
"learning_rate": 0.000183396103670314,
|
24631 |
+
"loss": 1.5182,
|
24632 |
+
"step": 3497
|
24633 |
+
},
|
24634 |
+
{
|
24635 |
+
"epoch": 0.5622664255575648,
|
24636 |
+
"grad_norm": 0.704310417175293,
|
24637 |
+
"learning_rate": 0.00018338679853971627,
|
24638 |
+
"loss": 1.4134,
|
24639 |
+
"step": 3498
|
24640 |
+
},
|
24641 |
+
{
|
24642 |
+
"epoch": 0.5624271649588105,
|
24643 |
+
"grad_norm": 0.8697547912597656,
|
24644 |
+
"learning_rate": 0.00018337749103866484,
|
24645 |
+
"loss": 1.5691,
|
24646 |
+
"step": 3499
|
24647 |
+
},
|
24648 |
+
{
|
24649 |
+
"epoch": 0.5625879043600562,
|
24650 |
+
"grad_norm": 0.8562211394309998,
|
24651 |
+
"learning_rate": 0.00018336818116742427,
|
24652 |
+
"loss": 1.4765,
|
24653 |
+
"step": 3500
|
24654 |
+
},
|
24655 |
+
{
|
24656 |
+
"epoch": 0.562748643761302,
|
24657 |
+
"grad_norm": 0.7913533449172974,
|
24658 |
+
"learning_rate": 0.00018335886892625926,
|
24659 |
+
"loss": 1.1787,
|
24660 |
+
"step": 3501
|
24661 |
+
},
|
24662 |
+
{
|
24663 |
+
"epoch": 0.5629093831625477,
|
24664 |
+
"grad_norm": 0.7344976663589478,
|
24665 |
+
"learning_rate": 0.00018334955431543453,
|
24666 |
+
"loss": 1.404,
|
24667 |
+
"step": 3502
|
24668 |
+
},
|
24669 |
+
{
|
24670 |
+
"epoch": 0.5630701225637934,
|
24671 |
+
"grad_norm": 0.840094804763794,
|
24672 |
+
"learning_rate": 0.00018334023733521478,
|
24673 |
+
"loss": 1.4018,
|
24674 |
+
"step": 3503
|
24675 |
+
},
|
24676 |
+
{
|
24677 |
+
"epoch": 0.5632308619650391,
|
24678 |
+
"grad_norm": 0.7555215358734131,
|
24679 |
+
"learning_rate": 0.00018333091798586503,
|
24680 |
+
"loss": 1.3866,
|
24681 |
+
"step": 3504
|
24682 |
+
},
|
24683 |
+
{
|
24684 |
+
"epoch": 0.5633916013662849,
|
24685 |
+
"grad_norm": 0.7541857957839966,
|
24686 |
+
"learning_rate": 0.00018332159626765004,
|
24687 |
+
"loss": 1.3739,
|
24688 |
+
"step": 3505
|
24689 |
+
},
|
24690 |
+
{
|
24691 |
+
"epoch": 0.5635523407675307,
|
24692 |
+
"grad_norm": 0.82652348279953,
|
24693 |
+
"learning_rate": 0.0001833122721808349,
|
24694 |
+
"loss": 1.456,
|
24695 |
+
"step": 3506
|
24696 |
+
},
|
24697 |
+
{
|
24698 |
+
"epoch": 0.5637130801687764,
|
24699 |
+
"grad_norm": 0.7439965009689331,
|
24700 |
+
"learning_rate": 0.00018330294572568466,
|
24701 |
+
"loss": 1.4646,
|
24702 |
+
"step": 3507
|
24703 |
+
},
|
24704 |
+
{
|
24705 |
+
"epoch": 0.5638738195700221,
|
24706 |
+
"grad_norm": 0.7989190816879272,
|
24707 |
+
"learning_rate": 0.00018329361690246437,
|
24708 |
+
"loss": 1.4223,
|
24709 |
+
"step": 3508
|
24710 |
+
},
|
24711 |
+
{
|
24712 |
+
"epoch": 0.5640345589712679,
|
24713 |
+
"grad_norm": 0.9072486162185669,
|
24714 |
+
"learning_rate": 0.00018328428571143932,
|
24715 |
+
"loss": 1.2378,
|
24716 |
+
"step": 3509
|
24717 |
+
},
|
24718 |
+
{
|
24719 |
+
"epoch": 0.5641952983725136,
|
24720 |
+
"grad_norm": 0.8002077341079712,
|
24721 |
+
"learning_rate": 0.0001832749521528747,
|
24722 |
+
"loss": 1.3504,
|
24723 |
+
"step": 3510
|
24724 |
+
},
|
24725 |
+
{
|
24726 |
+
"epoch": 0.5643560377737593,
|
24727 |
+
"grad_norm": 0.771520733833313,
|
24728 |
+
"learning_rate": 0.00018326561622703587,
|
24729 |
+
"loss": 1.5216,
|
24730 |
+
"step": 3511
|
24731 |
+
},
|
24732 |
+
{
|
24733 |
+
"epoch": 0.564516777175005,
|
24734 |
+
"grad_norm": 0.8173030018806458,
|
24735 |
+
"learning_rate": 0.0001832562779341882,
|
24736 |
+
"loss": 1.5692,
|
24737 |
+
"step": 3512
|
24738 |
+
},
|
24739 |
+
{
|
24740 |
+
"epoch": 0.5646775165762508,
|
24741 |
+
"grad_norm": 0.8047986626625061,
|
24742 |
+
"learning_rate": 0.0001832469372745972,
|
24743 |
+
"loss": 1.3961,
|
24744 |
+
"step": 3513
|
24745 |
+
},
|
24746 |
+
{
|
24747 |
+
"epoch": 0.5648382559774965,
|
24748 |
+
"grad_norm": 0.8184692859649658,
|
24749 |
+
"learning_rate": 0.00018323759424852836,
|
24750 |
+
"loss": 1.4309,
|
24751 |
+
"step": 3514
|
24752 |
+
},
|
24753 |
+
{
|
24754 |
+
"epoch": 0.5649989953787422,
|
24755 |
+
"grad_norm": 0.7703588008880615,
|
24756 |
+
"learning_rate": 0.00018322824885624726,
|
24757 |
+
"loss": 1.4827,
|
24758 |
+
"step": 3515
|
24759 |
+
},
|
24760 |
+
{
|
24761 |
+
"epoch": 0.5651597347799879,
|
24762 |
+
"grad_norm": 0.9133789539337158,
|
24763 |
+
"learning_rate": 0.0001832189010980196,
|
24764 |
+
"loss": 1.623,
|
24765 |
+
"step": 3516
|
24766 |
+
},
|
24767 |
+
{
|
24768 |
+
"epoch": 0.5653204741812337,
|
24769 |
+
"grad_norm": 0.6975560784339905,
|
24770 |
+
"learning_rate": 0.00018320955097411108,
|
24771 |
+
"loss": 1.2888,
|
24772 |
+
"step": 3517
|
24773 |
+
},
|
24774 |
+
{
|
24775 |
+
"epoch": 0.5654812135824794,
|
24776 |
+
"grad_norm": 0.8293776512145996,
|
24777 |
+
"learning_rate": 0.00018320019848478753,
|
24778 |
+
"loss": 1.3558,
|
24779 |
+
"step": 3518
|
24780 |
+
},
|
24781 |
+
{
|
24782 |
+
"epoch": 0.5656419529837251,
|
24783 |
+
"grad_norm": 0.8176863789558411,
|
24784 |
+
"learning_rate": 0.0001831908436303148,
|
24785 |
+
"loss": 1.3205,
|
24786 |
+
"step": 3519
|
24787 |
+
},
|
24788 |
+
{
|
24789 |
+
"epoch": 0.5658026923849708,
|
24790 |
+
"grad_norm": 0.7486855387687683,
|
24791 |
+
"learning_rate": 0.00018318148641095883,
|
24792 |
+
"loss": 1.3347,
|
24793 |
+
"step": 3520
|
24794 |
+
},
|
24795 |
+
{
|
24796 |
+
"epoch": 0.5659634317862166,
|
24797 |
+
"grad_norm": 0.8870848417282104,
|
24798 |
+
"learning_rate": 0.0001831721268269856,
|
24799 |
+
"loss": 1.4796,
|
24800 |
+
"step": 3521
|
24801 |
+
},
|
24802 |
+
{
|
24803 |
+
"epoch": 0.5661241711874623,
|
24804 |
+
"grad_norm": 0.8714601397514343,
|
24805 |
+
"learning_rate": 0.00018316276487866117,
|
24806 |
+
"loss": 1.3692,
|
24807 |
+
"step": 3522
|
24808 |
+
},
|
24809 |
+
{
|
24810 |
+
"epoch": 0.5662849105887081,
|
24811 |
+
"grad_norm": 0.789484977722168,
|
24812 |
+
"learning_rate": 0.00018315340056625172,
|
24813 |
+
"loss": 1.4999,
|
24814 |
+
"step": 3523
|
24815 |
+
},
|
24816 |
+
{
|
24817 |
+
"epoch": 0.5664456499899538,
|
24818 |
+
"grad_norm": 0.7682178616523743,
|
24819 |
+
"learning_rate": 0.00018314403389002343,
|
24820 |
+
"loss": 1.3781,
|
24821 |
+
"step": 3524
|
24822 |
+
},
|
24823 |
+
{
|
24824 |
+
"epoch": 0.5666063893911996,
|
24825 |
+
"grad_norm": 0.76296466588974,
|
24826 |
+
"learning_rate": 0.00018313466485024257,
|
24827 |
+
"loss": 1.4181,
|
24828 |
+
"step": 3525
|
24829 |
+
},
|
24830 |
+
{
|
24831 |
+
"epoch": 0.5667671287924453,
|
24832 |
+
"grad_norm": 0.7934573888778687,
|
24833 |
+
"learning_rate": 0.0001831252934471754,
|
24834 |
+
"loss": 1.6167,
|
24835 |
+
"step": 3526
|
24836 |
+
},
|
24837 |
+
{
|
24838 |
+
"epoch": 0.566927868193691,
|
24839 |
+
"grad_norm": 0.8610479831695557,
|
24840 |
+
"learning_rate": 0.00018311591968108844,
|
24841 |
+
"loss": 1.4827,
|
24842 |
+
"step": 3527
|
24843 |
+
},
|
24844 |
+
{
|
24845 |
+
"epoch": 0.5670886075949367,
|
24846 |
+
"grad_norm": 0.8090446591377258,
|
24847 |
+
"learning_rate": 0.00018310654355224812,
|
24848 |
+
"loss": 1.574,
|
24849 |
+
"step": 3528
|
24850 |
+
},
|
24851 |
+
{
|
24852 |
+
"epoch": 0.5672493469961825,
|
24853 |
+
"grad_norm": 1.0797876119613647,
|
24854 |
+
"learning_rate": 0.00018309716506092097,
|
24855 |
+
"loss": 1.5846,
|
24856 |
+
"step": 3529
|
24857 |
+
},
|
24858 |
+
{
|
24859 |
+
"epoch": 0.5674100863974282,
|
24860 |
+
"grad_norm": 0.8803074359893799,
|
24861 |
+
"learning_rate": 0.0001830877842073736,
|
24862 |
+
"loss": 1.6595,
|
24863 |
+
"step": 3530
|
24864 |
+
},
|
24865 |
+
{
|
24866 |
+
"epoch": 0.5675708257986739,
|
24867 |
+
"grad_norm": 0.6558796763420105,
|
24868 |
+
"learning_rate": 0.00018307840099187264,
|
24869 |
+
"loss": 1.2479,
|
24870 |
+
"step": 3531
|
24871 |
+
},
|
24872 |
+
{
|
24873 |
+
"epoch": 0.5677315651999196,
|
24874 |
+
"grad_norm": 0.8579014539718628,
|
24875 |
+
"learning_rate": 0.00018306901541468486,
|
24876 |
+
"loss": 1.3684,
|
24877 |
+
"step": 3532
|
24878 |
+
},
|
24879 |
+
{
|
24880 |
+
"epoch": 0.5678923046011654,
|
24881 |
+
"grad_norm": 0.8699368238449097,
|
24882 |
+
"learning_rate": 0.00018305962747607708,
|
24883 |
+
"loss": 1.743,
|
24884 |
+
"step": 3533
|
24885 |
+
},
|
24886 |
+
{
|
24887 |
+
"epoch": 0.5680530440024111,
|
24888 |
+
"grad_norm": 0.7640133500099182,
|
24889 |
+
"learning_rate": 0.00018305023717631616,
|
24890 |
+
"loss": 1.3804,
|
24891 |
+
"step": 3534
|
24892 |
+
},
|
24893 |
+
{
|
24894 |
+
"epoch": 0.5682137834036568,
|
24895 |
+
"grad_norm": 0.7888444066047668,
|
24896 |
+
"learning_rate": 0.00018304084451566905,
|
24897 |
+
"loss": 1.3579,
|
24898 |
+
"step": 3535
|
24899 |
+
},
|
24900 |
+
{
|
24901 |
+
"epoch": 0.5683745228049025,
|
24902 |
+
"grad_norm": 0.7729140520095825,
|
24903 |
+
"learning_rate": 0.00018303144949440275,
|
24904 |
+
"loss": 1.4692,
|
24905 |
+
"step": 3536
|
24906 |
+
},
|
24907 |
+
{
|
24908 |
+
"epoch": 0.5685352622061483,
|
24909 |
+
"grad_norm": 0.9253979921340942,
|
24910 |
+
"learning_rate": 0.0001830220521127843,
|
24911 |
+
"loss": 1.6244,
|
24912 |
+
"step": 3537
|
24913 |
+
},
|
24914 |
+
{
|
24915 |
+
"epoch": 0.568696001607394,
|
24916 |
+
"grad_norm": 0.8450219631195068,
|
24917 |
+
"learning_rate": 0.0001830126523710809,
|
24918 |
+
"loss": 1.3658,
|
24919 |
+
"step": 3538
|
24920 |
+
},
|
24921 |
+
{
|
24922 |
+
"epoch": 0.5688567410086397,
|
24923 |
+
"grad_norm": 0.8708456754684448,
|
24924 |
+
"learning_rate": 0.00018300325026955974,
|
24925 |
+
"loss": 1.3416,
|
24926 |
+
"step": 3539
|
24927 |
+
},
|
24928 |
+
{
|
24929 |
+
"epoch": 0.5690174804098854,
|
24930 |
+
"grad_norm": 0.9159559607505798,
|
24931 |
+
"learning_rate": 0.00018299384580848805,
|
24932 |
+
"loss": 1.3184,
|
24933 |
+
"step": 3540
|
24934 |
+
},
|
24935 |
+
{
|
24936 |
+
"epoch": 0.5691782198111313,
|
24937 |
+
"grad_norm": 0.9485965967178345,
|
24938 |
+
"learning_rate": 0.00018298443898813327,
|
24939 |
+
"loss": 1.5163,
|
24940 |
+
"step": 3541
|
24941 |
+
},
|
24942 |
+
{
|
24943 |
+
"epoch": 0.569338959212377,
|
24944 |
+
"grad_norm": 0.7586264610290527,
|
24945 |
+
"learning_rate": 0.00018297502980876268,
|
24946 |
+
"loss": 1.2261,
|
24947 |
+
"step": 3542
|
24948 |
+
},
|
24949 |
+
{
|
24950 |
+
"epoch": 0.5694996986136227,
|
24951 |
+
"grad_norm": 0.8161360025405884,
|
24952 |
+
"learning_rate": 0.00018296561827064388,
|
24953 |
+
"loss": 1.3039,
|
24954 |
+
"step": 3543
|
24955 |
+
},
|
24956 |
+
{
|
24957 |
+
"epoch": 0.5696604380148684,
|
24958 |
+
"grad_norm": 0.7738651037216187,
|
24959 |
+
"learning_rate": 0.00018295620437404434,
|
24960 |
+
"loss": 1.4654,
|
24961 |
+
"step": 3544
|
24962 |
+
},
|
24963 |
+
{
|
24964 |
+
"epoch": 0.5698211774161142,
|
24965 |
+
"grad_norm": 0.8391285538673401,
|
24966 |
+
"learning_rate": 0.00018294678811923168,
|
24967 |
+
"loss": 1.5028,
|
24968 |
+
"step": 3545
|
24969 |
+
},
|
24970 |
+
{
|
24971 |
+
"epoch": 0.5699819168173599,
|
24972 |
+
"grad_norm": 0.8255013227462769,
|
24973 |
+
"learning_rate": 0.0001829373695064736,
|
24974 |
+
"loss": 1.2561,
|
24975 |
+
"step": 3546
|
24976 |
+
},
|
24977 |
+
{
|
24978 |
+
"epoch": 0.5701426562186056,
|
24979 |
+
"grad_norm": 0.8152461051940918,
|
24980 |
+
"learning_rate": 0.00018292794853603782,
|
24981 |
+
"loss": 1.3583,
|
24982 |
+
"step": 3547
|
24983 |
+
},
|
24984 |
+
{
|
24985 |
+
"epoch": 0.5703033956198513,
|
24986 |
+
"grad_norm": 0.8787049651145935,
|
24987 |
+
"learning_rate": 0.0001829185252081922,
|
24988 |
+
"loss": 1.4508,
|
24989 |
+
"step": 3548
|
24990 |
+
},
|
24991 |
+
{
|
24992 |
+
"epoch": 0.570464135021097,
|
24993 |
+
"grad_norm": 0.7311258912086487,
|
24994 |
+
"learning_rate": 0.00018290909952320457,
|
24995 |
+
"loss": 1.4889,
|
24996 |
+
"step": 3549
|
24997 |
+
},
|
24998 |
+
{
|
24999 |
+
"epoch": 0.5706248744223428,
|
25000 |
+
"grad_norm": 0.8246176838874817,
|
25001 |
+
"learning_rate": 0.00018289967148134287,
|
25002 |
+
"loss": 1.3151,
|
25003 |
+
"step": 3550
|
25004 |
+
},
|
25005 |
+
{
|
25006 |
+
"epoch": 0.5707856138235885,
|
25007 |
+
"grad_norm": 0.8691884279251099,
|
25008 |
+
"learning_rate": 0.00018289024108287513,
|
25009 |
+
"loss": 1.3415,
|
25010 |
+
"step": 3551
|
25011 |
+
},
|
25012 |
+
{
|
25013 |
+
"epoch": 0.5709463532248342,
|
25014 |
+
"grad_norm": 0.8864870071411133,
|
25015 |
+
"learning_rate": 0.00018288080832806947,
|
25016 |
+
"loss": 1.6408,
|
25017 |
+
"step": 3552
|
25018 |
+
},
|
25019 |
+
{
|
25020 |
+
"epoch": 0.57110709262608,
|
25021 |
+
"grad_norm": 0.8469099998474121,
|
25022 |
+
"learning_rate": 0.00018287137321719397,
|
25023 |
+
"loss": 1.4378,
|
25024 |
+
"step": 3553
|
25025 |
+
},
|
25026 |
+
{
|
25027 |
+
"epoch": 0.5712678320273257,
|
25028 |
+
"grad_norm": 0.7355675101280212,
|
25029 |
+
"learning_rate": 0.00018286193575051688,
|
25030 |
+
"loss": 1.3712,
|
25031 |
+
"step": 3554
|
25032 |
+
},
|
25033 |
+
{
|
25034 |
+
"epoch": 0.5714285714285714,
|
25035 |
+
"grad_norm": 0.8527761697769165,
|
25036 |
+
"learning_rate": 0.00018285249592830648,
|
25037 |
+
"loss": 1.4176,
|
25038 |
+
"step": 3555
|
25039 |
+
},
|
25040 |
+
{
|
25041 |
+
"epoch": 0.5715893108298171,
|
25042 |
+
"grad_norm": 0.8498595356941223,
|
25043 |
+
"learning_rate": 0.0001828430537508311,
|
25044 |
+
"loss": 1.6733,
|
25045 |
+
"step": 3556
|
25046 |
+
},
|
25047 |
+
{
|
25048 |
+
"epoch": 0.5717500502310628,
|
25049 |
+
"grad_norm": 0.7803249359130859,
|
25050 |
+
"learning_rate": 0.00018283360921835917,
|
25051 |
+
"loss": 1.2986,
|
25052 |
+
"step": 3557
|
25053 |
+
},
|
25054 |
+
{
|
25055 |
+
"epoch": 0.5719107896323086,
|
25056 |
+
"grad_norm": 0.7896379828453064,
|
25057 |
+
"learning_rate": 0.00018282416233115917,
|
25058 |
+
"loss": 1.3964,
|
25059 |
+
"step": 3558
|
25060 |
+
},
|
25061 |
+
{
|
25062 |
+
"epoch": 0.5720715290335544,
|
25063 |
+
"grad_norm": 0.7736493945121765,
|
25064 |
+
"learning_rate": 0.00018281471308949967,
|
25065 |
+
"loss": 1.3914,
|
25066 |
+
"step": 3559
|
25067 |
+
},
|
25068 |
+
{
|
25069 |
+
"epoch": 0.5722322684348001,
|
25070 |
+
"grad_norm": 0.8273385763168335,
|
25071 |
+
"learning_rate": 0.0001828052614936492,
|
25072 |
+
"loss": 1.3998,
|
25073 |
+
"step": 3560
|
25074 |
+
},
|
25075 |
+
{
|
25076 |
+
"epoch": 0.5723930078360459,
|
25077 |
+
"grad_norm": 0.8132904171943665,
|
25078 |
+
"learning_rate": 0.00018279580754387658,
|
25079 |
+
"loss": 1.5559,
|
25080 |
+
"step": 3561
|
25081 |
+
},
|
25082 |
+
{
|
25083 |
+
"epoch": 0.5725537472372916,
|
25084 |
+
"grad_norm": 0.6619194746017456,
|
25085 |
+
"learning_rate": 0.00018278635124045043,
|
25086 |
+
"loss": 1.2204,
|
25087 |
+
"step": 3562
|
25088 |
+
},
|
25089 |
+
{
|
25090 |
+
"epoch": 0.5727144866385373,
|
25091 |
+
"grad_norm": 0.8050054311752319,
|
25092 |
+
"learning_rate": 0.00018277689258363965,
|
25093 |
+
"loss": 1.506,
|
25094 |
+
"step": 3563
|
25095 |
+
},
|
25096 |
+
{
|
25097 |
+
"epoch": 0.572875226039783,
|
25098 |
+
"grad_norm": 0.8875206708908081,
|
25099 |
+
"learning_rate": 0.00018276743157371307,
|
25100 |
+
"loss": 1.5657,
|
25101 |
+
"step": 3564
|
25102 |
+
},
|
25103 |
+
{
|
25104 |
+
"epoch": 0.5730359654410287,
|
25105 |
+
"grad_norm": 0.935675859451294,
|
25106 |
+
"learning_rate": 0.00018275796821093967,
|
25107 |
+
"loss": 1.5008,
|
25108 |
+
"step": 3565
|
25109 |
+
},
|
25110 |
+
{
|
25111 |
+
"epoch": 0.5731967048422745,
|
25112 |
+
"grad_norm": 0.7848905920982361,
|
25113 |
+
"learning_rate": 0.00018274850249558848,
|
25114 |
+
"loss": 1.3902,
|
25115 |
+
"step": 3566
|
25116 |
+
},
|
25117 |
+
{
|
25118 |
+
"epoch": 0.5733574442435202,
|
25119 |
+
"grad_norm": 0.8881785869598389,
|
25120 |
+
"learning_rate": 0.00018273903442792854,
|
25121 |
+
"loss": 1.4688,
|
25122 |
+
"step": 3567
|
25123 |
+
},
|
25124 |
+
{
|
25125 |
+
"epoch": 0.5735181836447659,
|
25126 |
+
"grad_norm": 0.8983811140060425,
|
25127 |
+
"learning_rate": 0.00018272956400822905,
|
25128 |
+
"loss": 1.6156,
|
25129 |
+
"step": 3568
|
25130 |
+
},
|
25131 |
+
{
|
25132 |
+
"epoch": 0.5736789230460116,
|
25133 |
+
"grad_norm": 0.8703747391700745,
|
25134 |
+
"learning_rate": 0.00018272009123675918,
|
25135 |
+
"loss": 1.4617,
|
25136 |
+
"step": 3569
|
25137 |
+
},
|
25138 |
+
{
|
25139 |
+
"epoch": 0.5738396624472574,
|
25140 |
+
"grad_norm": 0.8837721347808838,
|
25141 |
+
"learning_rate": 0.00018271061611378826,
|
25142 |
+
"loss": 1.6641,
|
25143 |
+
"step": 3570
|
25144 |
+
},
|
25145 |
+
{
|
25146 |
+
"epoch": 0.5740004018485031,
|
25147 |
+
"grad_norm": 0.8597355484962463,
|
25148 |
+
"learning_rate": 0.0001827011386395856,
|
25149 |
+
"loss": 1.655,
|
25150 |
+
"step": 3571
|
25151 |
+
},
|
25152 |
+
{
|
25153 |
+
"epoch": 0.5741611412497488,
|
25154 |
+
"grad_norm": 0.7949037551879883,
|
25155 |
+
"learning_rate": 0.00018269165881442065,
|
25156 |
+
"loss": 1.4002,
|
25157 |
+
"step": 3572
|
25158 |
+
},
|
25159 |
+
{
|
25160 |
+
"epoch": 0.5743218806509945,
|
25161 |
+
"grad_norm": 0.7416331768035889,
|
25162 |
+
"learning_rate": 0.0001826821766385629,
|
25163 |
+
"loss": 1.1628,
|
25164 |
+
"step": 3573
|
25165 |
+
},
|
25166 |
+
{
|
25167 |
+
"epoch": 0.5744826200522403,
|
25168 |
+
"grad_norm": 0.7875040769577026,
|
25169 |
+
"learning_rate": 0.00018267269211228184,
|
25170 |
+
"loss": 1.4303,
|
25171 |
+
"step": 3574
|
25172 |
+
},
|
25173 |
+
{
|
25174 |
+
"epoch": 0.574643359453486,
|
25175 |
+
"grad_norm": 0.8152592182159424,
|
25176 |
+
"learning_rate": 0.00018266320523584715,
|
25177 |
+
"loss": 1.4172,
|
25178 |
+
"step": 3575
|
25179 |
+
},
|
25180 |
+
{
|
25181 |
+
"epoch": 0.5748040988547317,
|
25182 |
+
"grad_norm": 0.8573201894760132,
|
25183 |
+
"learning_rate": 0.0001826537160095285,
|
25184 |
+
"loss": 1.5459,
|
25185 |
+
"step": 3576
|
25186 |
+
},
|
25187 |
+
{
|
25188 |
+
"epoch": 0.5749648382559776,
|
25189 |
+
"grad_norm": 0.7887688875198364,
|
25190 |
+
"learning_rate": 0.00018264422443359563,
|
25191 |
+
"loss": 1.5199,
|
25192 |
+
"step": 3577
|
25193 |
+
},
|
25194 |
+
{
|
25195 |
+
"epoch": 0.5751255776572233,
|
25196 |
+
"grad_norm": 0.9749796390533447,
|
25197 |
+
"learning_rate": 0.00018263473050831837,
|
25198 |
+
"loss": 1.523,
|
25199 |
+
"step": 3578
|
25200 |
+
},
|
25201 |
+
{
|
25202 |
+
"epoch": 0.575286317058469,
|
25203 |
+
"grad_norm": 0.8879985809326172,
|
25204 |
+
"learning_rate": 0.00018262523423396662,
|
25205 |
+
"loss": 1.3985,
|
25206 |
+
"step": 3579
|
25207 |
+
},
|
25208 |
+
{
|
25209 |
+
"epoch": 0.5754470564597147,
|
25210 |
+
"grad_norm": 0.61961430311203,
|
25211 |
+
"learning_rate": 0.00018261573561081031,
|
25212 |
+
"loss": 1.0912,
|
25213 |
+
"step": 3580
|
25214 |
+
},
|
25215 |
+
{
|
25216 |
+
"epoch": 0.5756077958609604,
|
25217 |
+
"grad_norm": 0.8372465968132019,
|
25218 |
+
"learning_rate": 0.00018260623463911947,
|
25219 |
+
"loss": 1.4631,
|
25220 |
+
"step": 3581
|
25221 |
+
},
|
25222 |
+
{
|
25223 |
+
"epoch": 0.5757685352622062,
|
25224 |
+
"grad_norm": 0.787183940410614,
|
25225 |
+
"learning_rate": 0.00018259673131916417,
|
25226 |
+
"loss": 1.4696,
|
25227 |
+
"step": 3582
|
25228 |
+
},
|
25229 |
+
{
|
25230 |
+
"epoch": 0.5759292746634519,
|
25231 |
+
"grad_norm": 0.8781692981719971,
|
25232 |
+
"learning_rate": 0.00018258722565121457,
|
25233 |
+
"loss": 1.666,
|
25234 |
+
"step": 3583
|
25235 |
+
},
|
25236 |
+
{
|
25237 |
+
"epoch": 0.5760900140646976,
|
25238 |
+
"grad_norm": 0.745153546333313,
|
25239 |
+
"learning_rate": 0.0001825777176355409,
|
25240 |
+
"loss": 1.4375,
|
25241 |
+
"step": 3584
|
25242 |
+
},
|
25243 |
+
{
|
25244 |
+
"epoch": 0.5762507534659433,
|
25245 |
+
"grad_norm": 0.8475265502929688,
|
25246 |
+
"learning_rate": 0.00018256820727241346,
|
25247 |
+
"loss": 1.4029,
|
25248 |
+
"step": 3585
|
25249 |
+
},
|
25250 |
+
{
|
25251 |
+
"epoch": 0.5764114928671891,
|
25252 |
+
"grad_norm": 0.9090588688850403,
|
25253 |
+
"learning_rate": 0.00018255869456210258,
|
25254 |
+
"loss": 1.7783,
|
25255 |
+
"step": 3586
|
25256 |
+
},
|
25257 |
+
{
|
25258 |
+
"epoch": 0.5765722322684348,
|
25259 |
+
"grad_norm": 0.7442242503166199,
|
25260 |
+
"learning_rate": 0.00018254917950487868,
|
25261 |
+
"loss": 1.3415,
|
25262 |
+
"step": 3587
|
25263 |
+
},
|
25264 |
+
{
|
25265 |
+
"epoch": 0.5767329716696805,
|
25266 |
+
"grad_norm": 0.7352378964424133,
|
25267 |
+
"learning_rate": 0.00018253966210101224,
|
25268 |
+
"loss": 1.2056,
|
25269 |
+
"step": 3588
|
25270 |
+
},
|
25271 |
+
{
|
25272 |
+
"epoch": 0.5768937110709262,
|
25273 |
+
"grad_norm": 0.8588562607765198,
|
25274 |
+
"learning_rate": 0.00018253014235077383,
|
25275 |
+
"loss": 1.4402,
|
25276 |
+
"step": 3589
|
25277 |
+
},
|
25278 |
+
{
|
25279 |
+
"epoch": 0.577054450472172,
|
25280 |
+
"grad_norm": 0.8207905888557434,
|
25281 |
+
"learning_rate": 0.00018252062025443406,
|
25282 |
+
"loss": 1.4028,
|
25283 |
+
"step": 3590
|
25284 |
+
},
|
25285 |
+
{
|
25286 |
+
"epoch": 0.5772151898734177,
|
25287 |
+
"grad_norm": 0.6777616739273071,
|
25288 |
+
"learning_rate": 0.00018251109581226361,
|
25289 |
+
"loss": 1.1815,
|
25290 |
+
"step": 3591
|
25291 |
+
},
|
25292 |
+
{
|
25293 |
+
"epoch": 0.5773759292746634,
|
25294 |
+
"grad_norm": 0.8747861385345459,
|
25295 |
+
"learning_rate": 0.00018250156902453326,
|
25296 |
+
"loss": 1.5673,
|
25297 |
+
"step": 3592
|
25298 |
+
},
|
25299 |
+
{
|
25300 |
+
"epoch": 0.5775366686759091,
|
25301 |
+
"grad_norm": 0.6920707821846008,
|
25302 |
+
"learning_rate": 0.0001824920398915138,
|
25303 |
+
"loss": 1.3132,
|
25304 |
+
"step": 3593
|
25305 |
+
},
|
25306 |
+
{
|
25307 |
+
"epoch": 0.5776974080771549,
|
25308 |
+
"grad_norm": 0.7359331250190735,
|
25309 |
+
"learning_rate": 0.00018248250841347617,
|
25310 |
+
"loss": 1.3675,
|
25311 |
+
"step": 3594
|
25312 |
+
},
|
25313 |
+
{
|
25314 |
+
"epoch": 0.5778581474784007,
|
25315 |
+
"grad_norm": 0.8641143441200256,
|
25316 |
+
"learning_rate": 0.00018247297459069123,
|
25317 |
+
"loss": 1.5476,
|
25318 |
+
"step": 3595
|
25319 |
+
},
|
25320 |
+
{
|
25321 |
+
"epoch": 0.5780188868796464,
|
25322 |
+
"grad_norm": 0.7159335017204285,
|
25323 |
+
"learning_rate": 0.0001824634384234301,
|
25324 |
+
"loss": 1.4403,
|
25325 |
+
"step": 3596
|
25326 |
+
},
|
25327 |
+
{
|
25328 |
+
"epoch": 0.5781796262808921,
|
25329 |
+
"grad_norm": 0.7588579058647156,
|
25330 |
+
"learning_rate": 0.00018245389991196378,
|
25331 |
+
"loss": 1.4324,
|
25332 |
+
"step": 3597
|
25333 |
+
},
|
25334 |
+
{
|
25335 |
+
"epoch": 0.5783403656821379,
|
25336 |
+
"grad_norm": 0.8039872050285339,
|
25337 |
+
"learning_rate": 0.00018244435905656348,
|
25338 |
+
"loss": 1.4148,
|
25339 |
+
"step": 3598
|
25340 |
+
},
|
25341 |
+
{
|
25342 |
+
"epoch": 0.5785011050833836,
|
25343 |
+
"grad_norm": 0.79317706823349,
|
25344 |
+
"learning_rate": 0.0001824348158575004,
|
25345 |
+
"loss": 1.5543,
|
25346 |
+
"step": 3599
|
25347 |
+
},
|
25348 |
+
{
|
25349 |
+
"epoch": 0.5786618444846293,
|
25350 |
+
"grad_norm": 0.8184112906455994,
|
25351 |
+
"learning_rate": 0.00018242527031504585,
|
25352 |
+
"loss": 1.6351,
|
25353 |
+
"step": 3600
|
25354 |
+
},
|
25355 |
+
{
|
25356 |
+
"epoch": 0.5786618444846293,
|
25357 |
+
"eval_loss": 1.4872082471847534,
|
25358 |
+
"eval_runtime": 46.2321,
|
25359 |
+
"eval_samples_per_second": 5.429,
|
25360 |
+
"eval_steps_per_second": 2.725,
|
25361 |
+
"step": 3600
|
25362 |
}
|
25363 |
],
|
25364 |
"logging_steps": 1,
|
|
|
25387 |
"attributes": {}
|
25388 |
}
|
25389 |
},
|
25390 |
+
"total_flos": 6.286202891969495e+17,
|
25391 |
"train_batch_size": 2,
|
25392 |
"trial_name": null,
|
25393 |
"trial_params": null
|