Training in progress, step 2000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 80792096
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be34e19063a01672a9636dbf2ca4c03fe9f29ca4dfe01598a6ad72a6490bbaf5
|
3 |
size 80792096
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 41460084
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22baf3ddd3ab195250ed72d03d933150903f606a1d1eb7509717a6b3402b3a00
|
3 |
size 41460084
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79f1b4d874921b657cd2914c561320d06d0161083bd33be5d35dd2c31f29e4e2
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:665f12a1546ede76823baa14bce405ee667bfb3012d4aa39dc9421ee1bb75cb9
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.6775344610214233,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-500",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -10539,6 +10539,3514 @@
|
|
10539 |
"eval_samples_per_second": 29.278,
|
10540 |
"eval_steps_per_second": 14.639,
|
10541 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10542 |
}
|
10543 |
],
|
10544 |
"logging_steps": 1,
|
@@ -10553,7 +14061,7 @@
|
|
10553 |
"early_stopping_threshold": 0.0
|
10554 |
},
|
10555 |
"attributes": {
|
10556 |
-
"early_stopping_patience_counter":
|
10557 |
}
|
10558 |
},
|
10559 |
"TrainerControl": {
|
@@ -10562,12 +14070,12 @@
|
|
10562 |
"should_evaluate": false,
|
10563 |
"should_log": false,
|
10564 |
"should_save": true,
|
10565 |
-
"should_training_stop":
|
10566 |
},
|
10567 |
"attributes": {}
|
10568 |
}
|
10569 |
},
|
10570 |
-
"total_flos": 1.
|
10571 |
"train_batch_size": 4,
|
10572 |
"trial_name": null,
|
10573 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.6775344610214233,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-500",
|
4 |
+
"epoch": 4.582546494992847,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
10539 |
"eval_samples_per_second": 29.278,
|
10540 |
"eval_steps_per_second": 14.639,
|
10541 |
"step": 1500
|
10542 |
+
},
|
10543 |
+
{
|
10544 |
+
"epoch": 3.4391988555078683,
|
10545 |
+
"grad_norm": 1.7406142950057983,
|
10546 |
+
"learning_rate": 0.00015908370856861033,
|
10547 |
+
"loss": 0.3547,
|
10548 |
+
"step": 1501
|
10549 |
+
},
|
10550 |
+
{
|
10551 |
+
"epoch": 3.4414878397711015,
|
10552 |
+
"grad_norm": 1.3736141920089722,
|
10553 |
+
"learning_rate": 0.00015903290310842857,
|
10554 |
+
"loss": 0.293,
|
10555 |
+
"step": 1502
|
10556 |
+
},
|
10557 |
+
{
|
10558 |
+
"epoch": 3.4437768240343347,
|
10559 |
+
"grad_norm": 1.7873742580413818,
|
10560 |
+
"learning_rate": 0.00015898207424949013,
|
10561 |
+
"loss": 0.3333,
|
10562 |
+
"step": 1503
|
10563 |
+
},
|
10564 |
+
{
|
10565 |
+
"epoch": 3.446065808297568,
|
10566 |
+
"grad_norm": 1.976028323173523,
|
10567 |
+
"learning_rate": 0.00015893122201194197,
|
10568 |
+
"loss": 0.3112,
|
10569 |
+
"step": 1504
|
10570 |
+
},
|
10571 |
+
{
|
10572 |
+
"epoch": 3.448354792560801,
|
10573 |
+
"grad_norm": 1.6089595556259155,
|
10574 |
+
"learning_rate": 0.00015888034641594026,
|
10575 |
+
"loss": 0.197,
|
10576 |
+
"step": 1505
|
10577 |
+
},
|
10578 |
+
{
|
10579 |
+
"epoch": 3.4506437768240343,
|
10580 |
+
"grad_norm": 2.2680089473724365,
|
10581 |
+
"learning_rate": 0.00015882944748165048,
|
10582 |
+
"loss": 0.324,
|
10583 |
+
"step": 1506
|
10584 |
+
},
|
10585 |
+
{
|
10586 |
+
"epoch": 3.4529327610872675,
|
10587 |
+
"grad_norm": 1.6552183628082275,
|
10588 |
+
"learning_rate": 0.00015877852522924732,
|
10589 |
+
"loss": 0.1868,
|
10590 |
+
"step": 1507
|
10591 |
+
},
|
10592 |
+
{
|
10593 |
+
"epoch": 3.4552217453505008,
|
10594 |
+
"grad_norm": 2.8168227672576904,
|
10595 |
+
"learning_rate": 0.00015872757967891476,
|
10596 |
+
"loss": 0.2664,
|
10597 |
+
"step": 1508
|
10598 |
+
},
|
10599 |
+
{
|
10600 |
+
"epoch": 3.457510729613734,
|
10601 |
+
"grad_norm": 2.649996042251587,
|
10602 |
+
"learning_rate": 0.00015867661085084596,
|
10603 |
+
"loss": 0.2186,
|
10604 |
+
"step": 1509
|
10605 |
+
},
|
10606 |
+
{
|
10607 |
+
"epoch": 3.459799713876967,
|
10608 |
+
"grad_norm": 1.109015703201294,
|
10609 |
+
"learning_rate": 0.00015862561876524338,
|
10610 |
+
"loss": 1.6589,
|
10611 |
+
"step": 1510
|
10612 |
+
},
|
10613 |
+
{
|
10614 |
+
"epoch": 3.4620886981402004,
|
10615 |
+
"grad_norm": 0.5797591805458069,
|
10616 |
+
"learning_rate": 0.00015857460344231863,
|
10617 |
+
"loss": 1.8554,
|
10618 |
+
"step": 1511
|
10619 |
+
},
|
10620 |
+
{
|
10621 |
+
"epoch": 3.4643776824034336,
|
10622 |
+
"grad_norm": 0.6286052465438843,
|
10623 |
+
"learning_rate": 0.00015852356490229252,
|
10624 |
+
"loss": 1.7645,
|
10625 |
+
"step": 1512
|
10626 |
+
},
|
10627 |
+
{
|
10628 |
+
"epoch": 3.466666666666667,
|
10629 |
+
"grad_norm": 0.7255878448486328,
|
10630 |
+
"learning_rate": 0.00015847250316539514,
|
10631 |
+
"loss": 1.701,
|
10632 |
+
"step": 1513
|
10633 |
+
},
|
10634 |
+
{
|
10635 |
+
"epoch": 3.4689556509299,
|
10636 |
+
"grad_norm": 0.7296729683876038,
|
10637 |
+
"learning_rate": 0.0001584214182518657,
|
10638 |
+
"loss": 1.5291,
|
10639 |
+
"step": 1514
|
10640 |
+
},
|
10641 |
+
{
|
10642 |
+
"epoch": 3.471244635193133,
|
10643 |
+
"grad_norm": 0.7586151361465454,
|
10644 |
+
"learning_rate": 0.00015837031018195268,
|
10645 |
+
"loss": 1.6909,
|
10646 |
+
"step": 1515
|
10647 |
+
},
|
10648 |
+
{
|
10649 |
+
"epoch": 3.4735336194563664,
|
10650 |
+
"grad_norm": 0.7578348517417908,
|
10651 |
+
"learning_rate": 0.00015831917897591365,
|
10652 |
+
"loss": 1.4752,
|
10653 |
+
"step": 1516
|
10654 |
+
},
|
10655 |
+
{
|
10656 |
+
"epoch": 3.4758226037195996,
|
10657 |
+
"grad_norm": 0.7664616703987122,
|
10658 |
+
"learning_rate": 0.00015826802465401536,
|
10659 |
+
"loss": 1.4779,
|
10660 |
+
"step": 1517
|
10661 |
+
},
|
10662 |
+
{
|
10663 |
+
"epoch": 3.4781115879828324,
|
10664 |
+
"grad_norm": 0.7997162342071533,
|
10665 |
+
"learning_rate": 0.00015821684723653378,
|
10666 |
+
"loss": 1.5449,
|
10667 |
+
"step": 1518
|
10668 |
+
},
|
10669 |
+
{
|
10670 |
+
"epoch": 3.480400572246066,
|
10671 |
+
"grad_norm": 0.8170934319496155,
|
10672 |
+
"learning_rate": 0.000158165646743754,
|
10673 |
+
"loss": 1.3985,
|
10674 |
+
"step": 1519
|
10675 |
+
},
|
10676 |
+
{
|
10677 |
+
"epoch": 3.482689556509299,
|
10678 |
+
"grad_norm": 0.7675970792770386,
|
10679 |
+
"learning_rate": 0.00015811442319597026,
|
10680 |
+
"loss": 1.4542,
|
10681 |
+
"step": 1520
|
10682 |
+
},
|
10683 |
+
{
|
10684 |
+
"epoch": 3.484978540772532,
|
10685 |
+
"grad_norm": 0.7991522550582886,
|
10686 |
+
"learning_rate": 0.00015806317661348594,
|
10687 |
+
"loss": 1.4358,
|
10688 |
+
"step": 1521
|
10689 |
+
},
|
10690 |
+
{
|
10691 |
+
"epoch": 3.487267525035765,
|
10692 |
+
"grad_norm": 0.7838619351387024,
|
10693 |
+
"learning_rate": 0.00015801190701661357,
|
10694 |
+
"loss": 1.2911,
|
10695 |
+
"step": 1522
|
10696 |
+
},
|
10697 |
+
{
|
10698 |
+
"epoch": 3.4895565092989984,
|
10699 |
+
"grad_norm": 0.7992157936096191,
|
10700 |
+
"learning_rate": 0.0001579606144256747,
|
10701 |
+
"loss": 1.3085,
|
10702 |
+
"step": 1523
|
10703 |
+
},
|
10704 |
+
{
|
10705 |
+
"epoch": 3.4918454935622316,
|
10706 |
+
"grad_norm": 0.8065170049667358,
|
10707 |
+
"learning_rate": 0.00015790929886100018,
|
10708 |
+
"loss": 1.3501,
|
10709 |
+
"step": 1524
|
10710 |
+
},
|
10711 |
+
{
|
10712 |
+
"epoch": 3.494134477825465,
|
10713 |
+
"grad_norm": 0.7681014537811279,
|
10714 |
+
"learning_rate": 0.0001578579603429298,
|
10715 |
+
"loss": 1.1413,
|
10716 |
+
"step": 1525
|
10717 |
+
},
|
10718 |
+
{
|
10719 |
+
"epoch": 3.496423462088698,
|
10720 |
+
"grad_norm": 0.7987495064735413,
|
10721 |
+
"learning_rate": 0.00015780659889181253,
|
10722 |
+
"loss": 1.4301,
|
10723 |
+
"step": 1526
|
10724 |
+
},
|
10725 |
+
{
|
10726 |
+
"epoch": 3.4987124463519312,
|
10727 |
+
"grad_norm": 0.8664441108703613,
|
10728 |
+
"learning_rate": 0.00015775521452800634,
|
10729 |
+
"loss": 1.1792,
|
10730 |
+
"step": 1527
|
10731 |
+
},
|
10732 |
+
{
|
10733 |
+
"epoch": 3.5010014306151644,
|
10734 |
+
"grad_norm": 0.8405476212501526,
|
10735 |
+
"learning_rate": 0.00015770380727187845,
|
10736 |
+
"loss": 1.1298,
|
10737 |
+
"step": 1528
|
10738 |
+
},
|
10739 |
+
{
|
10740 |
+
"epoch": 3.5032904148783977,
|
10741 |
+
"grad_norm": 0.896719753742218,
|
10742 |
+
"learning_rate": 0.000157652377143805,
|
10743 |
+
"loss": 1.3248,
|
10744 |
+
"step": 1529
|
10745 |
+
},
|
10746 |
+
{
|
10747 |
+
"epoch": 3.505579399141631,
|
10748 |
+
"grad_norm": 0.825574517250061,
|
10749 |
+
"learning_rate": 0.00015760092416417125,
|
10750 |
+
"loss": 1.0444,
|
10751 |
+
"step": 1530
|
10752 |
+
},
|
10753 |
+
{
|
10754 |
+
"epoch": 3.507868383404864,
|
10755 |
+
"grad_norm": 0.8801305890083313,
|
10756 |
+
"learning_rate": 0.00015754944835337157,
|
10757 |
+
"loss": 1.1188,
|
10758 |
+
"step": 1531
|
10759 |
+
},
|
10760 |
+
{
|
10761 |
+
"epoch": 3.5101573676680973,
|
10762 |
+
"grad_norm": 0.8957611322402954,
|
10763 |
+
"learning_rate": 0.00015749794973180921,
|
10764 |
+
"loss": 1.0719,
|
10765 |
+
"step": 1532
|
10766 |
+
},
|
10767 |
+
{
|
10768 |
+
"epoch": 3.5124463519313305,
|
10769 |
+
"grad_norm": 1.020003080368042,
|
10770 |
+
"learning_rate": 0.0001574464283198967,
|
10771 |
+
"loss": 1.0326,
|
10772 |
+
"step": 1533
|
10773 |
+
},
|
10774 |
+
{
|
10775 |
+
"epoch": 3.5147353361945637,
|
10776 |
+
"grad_norm": 0.9494165778160095,
|
10777 |
+
"learning_rate": 0.00015739488413805543,
|
10778 |
+
"loss": 0.9371,
|
10779 |
+
"step": 1534
|
10780 |
+
},
|
10781 |
+
{
|
10782 |
+
"epoch": 3.517024320457797,
|
10783 |
+
"grad_norm": 0.9374694228172302,
|
10784 |
+
"learning_rate": 0.00015734331720671584,
|
10785 |
+
"loss": 1.0078,
|
10786 |
+
"step": 1535
|
10787 |
+
},
|
10788 |
+
{
|
10789 |
+
"epoch": 3.51931330472103,
|
10790 |
+
"grad_norm": 0.984219491481781,
|
10791 |
+
"learning_rate": 0.00015729172754631749,
|
10792 |
+
"loss": 0.7388,
|
10793 |
+
"step": 1536
|
10794 |
+
},
|
10795 |
+
{
|
10796 |
+
"epoch": 3.5216022889842633,
|
10797 |
+
"grad_norm": 1.0056235790252686,
|
10798 |
+
"learning_rate": 0.00015724011517730878,
|
10799 |
+
"loss": 0.7902,
|
10800 |
+
"step": 1537
|
10801 |
+
},
|
10802 |
+
{
|
10803 |
+
"epoch": 3.5238912732474965,
|
10804 |
+
"grad_norm": 1.028628945350647,
|
10805 |
+
"learning_rate": 0.00015718848012014725,
|
10806 |
+
"loss": 0.8631,
|
10807 |
+
"step": 1538
|
10808 |
+
},
|
10809 |
+
{
|
10810 |
+
"epoch": 3.5261802575107297,
|
10811 |
+
"grad_norm": 1.0650653839111328,
|
10812 |
+
"learning_rate": 0.00015713682239529942,
|
10813 |
+
"loss": 0.8271,
|
10814 |
+
"step": 1539
|
10815 |
+
},
|
10816 |
+
{
|
10817 |
+
"epoch": 3.528469241773963,
|
10818 |
+
"grad_norm": 1.0517405271530151,
|
10819 |
+
"learning_rate": 0.00015708514202324075,
|
10820 |
+
"loss": 0.5618,
|
10821 |
+
"step": 1540
|
10822 |
+
},
|
10823 |
+
{
|
10824 |
+
"epoch": 3.530758226037196,
|
10825 |
+
"grad_norm": 1.1985256671905518,
|
10826 |
+
"learning_rate": 0.00015703343902445562,
|
10827 |
+
"loss": 0.7335,
|
10828 |
+
"step": 1541
|
10829 |
+
},
|
10830 |
+
{
|
10831 |
+
"epoch": 3.5330472103004293,
|
10832 |
+
"grad_norm": 1.4088890552520752,
|
10833 |
+
"learning_rate": 0.00015698171341943753,
|
10834 |
+
"loss": 0.637,
|
10835 |
+
"step": 1542
|
10836 |
+
},
|
10837 |
+
{
|
10838 |
+
"epoch": 3.535336194563662,
|
10839 |
+
"grad_norm": 1.1967028379440308,
|
10840 |
+
"learning_rate": 0.0001569299652286888,
|
10841 |
+
"loss": 0.6781,
|
10842 |
+
"step": 1543
|
10843 |
+
},
|
10844 |
+
{
|
10845 |
+
"epoch": 3.5376251788268958,
|
10846 |
+
"grad_norm": 1.2229645252227783,
|
10847 |
+
"learning_rate": 0.00015687819447272078,
|
10848 |
+
"loss": 0.576,
|
10849 |
+
"step": 1544
|
10850 |
+
},
|
10851 |
+
{
|
10852 |
+
"epoch": 3.5399141630901285,
|
10853 |
+
"grad_norm": 1.3130781650543213,
|
10854 |
+
"learning_rate": 0.00015682640117205377,
|
10855 |
+
"loss": 0.4962,
|
10856 |
+
"step": 1545
|
10857 |
+
},
|
10858 |
+
{
|
10859 |
+
"epoch": 3.542203147353362,
|
10860 |
+
"grad_norm": 1.6849275827407837,
|
10861 |
+
"learning_rate": 0.00015677458534721692,
|
10862 |
+
"loss": 0.5366,
|
10863 |
+
"step": 1546
|
10864 |
+
},
|
10865 |
+
{
|
10866 |
+
"epoch": 3.544492131616595,
|
10867 |
+
"grad_norm": 1.2390133142471313,
|
10868 |
+
"learning_rate": 0.0001567227470187484,
|
10869 |
+
"loss": 0.3497,
|
10870 |
+
"step": 1547
|
10871 |
+
},
|
10872 |
+
{
|
10873 |
+
"epoch": 3.5467811158798286,
|
10874 |
+
"grad_norm": 1.5223252773284912,
|
10875 |
+
"learning_rate": 0.00015667088620719528,
|
10876 |
+
"loss": 0.3136,
|
10877 |
+
"step": 1548
|
10878 |
+
},
|
10879 |
+
{
|
10880 |
+
"epoch": 3.5490701001430613,
|
10881 |
+
"grad_norm": 1.772728443145752,
|
10882 |
+
"learning_rate": 0.0001566190029331135,
|
10883 |
+
"loss": 0.4348,
|
10884 |
+
"step": 1549
|
10885 |
+
},
|
10886 |
+
{
|
10887 |
+
"epoch": 3.5513590844062946,
|
10888 |
+
"grad_norm": 2.023331880569458,
|
10889 |
+
"learning_rate": 0.00015656709721706798,
|
10890 |
+
"loss": 0.412,
|
10891 |
+
"step": 1550
|
10892 |
+
},
|
10893 |
+
{
|
10894 |
+
"epoch": 3.5536480686695278,
|
10895 |
+
"grad_norm": 1.3482370376586914,
|
10896 |
+
"learning_rate": 0.00015651516907963246,
|
10897 |
+
"loss": 0.2864,
|
10898 |
+
"step": 1551
|
10899 |
+
},
|
10900 |
+
{
|
10901 |
+
"epoch": 3.555937052932761,
|
10902 |
+
"grad_norm": 1.777756690979004,
|
10903 |
+
"learning_rate": 0.0001564632185413896,
|
10904 |
+
"loss": 0.3162,
|
10905 |
+
"step": 1552
|
10906 |
+
},
|
10907 |
+
{
|
10908 |
+
"epoch": 3.558226037195994,
|
10909 |
+
"grad_norm": 1.7310400009155273,
|
10910 |
+
"learning_rate": 0.0001564112456229309,
|
10911 |
+
"loss": 0.3019,
|
10912 |
+
"step": 1553
|
10913 |
+
},
|
10914 |
+
{
|
10915 |
+
"epoch": 3.5605150214592274,
|
10916 |
+
"grad_norm": 2.4951937198638916,
|
10917 |
+
"learning_rate": 0.00015635925034485678,
|
10918 |
+
"loss": 0.2914,
|
10919 |
+
"step": 1554
|
10920 |
+
},
|
10921 |
+
{
|
10922 |
+
"epoch": 3.5628040057224606,
|
10923 |
+
"grad_norm": 1.6481578350067139,
|
10924 |
+
"learning_rate": 0.00015630723272777654,
|
10925 |
+
"loss": 0.2832,
|
10926 |
+
"step": 1555
|
10927 |
+
},
|
10928 |
+
{
|
10929 |
+
"epoch": 3.565092989985694,
|
10930 |
+
"grad_norm": 1.9817842245101929,
|
10931 |
+
"learning_rate": 0.00015625519279230827,
|
10932 |
+
"loss": 0.2218,
|
10933 |
+
"step": 1556
|
10934 |
+
},
|
10935 |
+
{
|
10936 |
+
"epoch": 3.567381974248927,
|
10937 |
+
"grad_norm": 2.244994640350342,
|
10938 |
+
"learning_rate": 0.00015620313055907892,
|
10939 |
+
"loss": 0.2811,
|
10940 |
+
"step": 1557
|
10941 |
+
},
|
10942 |
+
{
|
10943 |
+
"epoch": 3.56967095851216,
|
10944 |
+
"grad_norm": 1.7713173627853394,
|
10945 |
+
"learning_rate": 0.00015615104604872437,
|
10946 |
+
"loss": 0.2204,
|
10947 |
+
"step": 1558
|
10948 |
+
},
|
10949 |
+
{
|
10950 |
+
"epoch": 3.5719599427753934,
|
10951 |
+
"grad_norm": 3.4250166416168213,
|
10952 |
+
"learning_rate": 0.0001560989392818892,
|
10953 |
+
"loss": 0.3418,
|
10954 |
+
"step": 1559
|
10955 |
+
},
|
10956 |
+
{
|
10957 |
+
"epoch": 3.5742489270386266,
|
10958 |
+
"grad_norm": 0.8141402006149292,
|
10959 |
+
"learning_rate": 0.00015604681027922684,
|
10960 |
+
"loss": 1.5305,
|
10961 |
+
"step": 1560
|
10962 |
+
},
|
10963 |
+
{
|
10964 |
+
"epoch": 3.57653791130186,
|
10965 |
+
"grad_norm": 0.576050877571106,
|
10966 |
+
"learning_rate": 0.0001559946590613996,
|
10967 |
+
"loss": 1.7689,
|
10968 |
+
"step": 1561
|
10969 |
+
},
|
10970 |
+
{
|
10971 |
+
"epoch": 3.578826895565093,
|
10972 |
+
"grad_norm": 0.6083397269248962,
|
10973 |
+
"learning_rate": 0.00015594248564907857,
|
10974 |
+
"loss": 1.7663,
|
10975 |
+
"step": 1562
|
10976 |
+
},
|
10977 |
+
{
|
10978 |
+
"epoch": 3.5811158798283262,
|
10979 |
+
"grad_norm": 0.6765815615653992,
|
10980 |
+
"learning_rate": 0.00015589029006294363,
|
10981 |
+
"loss": 1.68,
|
10982 |
+
"step": 1563
|
10983 |
+
},
|
10984 |
+
{
|
10985 |
+
"epoch": 3.5834048640915594,
|
10986 |
+
"grad_norm": 0.684815526008606,
|
10987 |
+
"learning_rate": 0.00015583807232368336,
|
10988 |
+
"loss": 1.5431,
|
10989 |
+
"step": 1564
|
10990 |
+
},
|
10991 |
+
{
|
10992 |
+
"epoch": 3.5856938483547927,
|
10993 |
+
"grad_norm": 0.7227407693862915,
|
10994 |
+
"learning_rate": 0.00015578583245199533,
|
10995 |
+
"loss": 1.4256,
|
10996 |
+
"step": 1565
|
10997 |
+
},
|
10998 |
+
{
|
10999 |
+
"epoch": 3.587982832618026,
|
11000 |
+
"grad_norm": 0.7657136917114258,
|
11001 |
+
"learning_rate": 0.00015573357046858567,
|
11002 |
+
"loss": 1.5605,
|
11003 |
+
"step": 1566
|
11004 |
+
},
|
11005 |
+
{
|
11006 |
+
"epoch": 3.590271816881259,
|
11007 |
+
"grad_norm": 0.715350866317749,
|
11008 |
+
"learning_rate": 0.00015568128639416933,
|
11009 |
+
"loss": 1.6071,
|
11010 |
+
"step": 1567
|
11011 |
+
},
|
11012 |
+
{
|
11013 |
+
"epoch": 3.5925608011444923,
|
11014 |
+
"grad_norm": 0.7893856167793274,
|
11015 |
+
"learning_rate": 0.00015562898024947013,
|
11016 |
+
"loss": 1.5667,
|
11017 |
+
"step": 1568
|
11018 |
+
},
|
11019 |
+
{
|
11020 |
+
"epoch": 3.5948497854077255,
|
11021 |
+
"grad_norm": 0.8078209161758423,
|
11022 |
+
"learning_rate": 0.00015557665205522052,
|
11023 |
+
"loss": 1.3805,
|
11024 |
+
"step": 1569
|
11025 |
+
},
|
11026 |
+
{
|
11027 |
+
"epoch": 3.5971387696709582,
|
11028 |
+
"grad_norm": 0.7596533298492432,
|
11029 |
+
"learning_rate": 0.0001555243018321617,
|
11030 |
+
"loss": 1.3577,
|
11031 |
+
"step": 1570
|
11032 |
+
},
|
11033 |
+
{
|
11034 |
+
"epoch": 3.599427753934192,
|
11035 |
+
"grad_norm": 0.8347642421722412,
|
11036 |
+
"learning_rate": 0.00015547192960104367,
|
11037 |
+
"loss": 1.2966,
|
11038 |
+
"step": 1571
|
11039 |
+
},
|
11040 |
+
{
|
11041 |
+
"epoch": 3.6017167381974247,
|
11042 |
+
"grad_norm": 0.8208310604095459,
|
11043 |
+
"learning_rate": 0.0001554195353826251,
|
11044 |
+
"loss": 1.4773,
|
11045 |
+
"step": 1572
|
11046 |
+
},
|
11047 |
+
{
|
11048 |
+
"epoch": 3.6040057224606583,
|
11049 |
+
"grad_norm": 0.8004641532897949,
|
11050 |
+
"learning_rate": 0.00015536711919767338,
|
11051 |
+
"loss": 1.3247,
|
11052 |
+
"step": 1573
|
11053 |
+
},
|
11054 |
+
{
|
11055 |
+
"epoch": 3.606294706723891,
|
11056 |
+
"grad_norm": 0.8222883939743042,
|
11057 |
+
"learning_rate": 0.00015531468106696455,
|
11058 |
+
"loss": 1.2533,
|
11059 |
+
"step": 1574
|
11060 |
+
},
|
11061 |
+
{
|
11062 |
+
"epoch": 3.6085836909871247,
|
11063 |
+
"grad_norm": 0.7890996932983398,
|
11064 |
+
"learning_rate": 0.00015526222101128355,
|
11065 |
+
"loss": 1.1291,
|
11066 |
+
"step": 1575
|
11067 |
+
},
|
11068 |
+
{
|
11069 |
+
"epoch": 3.6108726752503575,
|
11070 |
+
"grad_norm": 0.7932547330856323,
|
11071 |
+
"learning_rate": 0.00015520973905142372,
|
11072 |
+
"loss": 1.2697,
|
11073 |
+
"step": 1576
|
11074 |
+
},
|
11075 |
+
{
|
11076 |
+
"epoch": 3.6131616595135907,
|
11077 |
+
"grad_norm": 0.7988429665565491,
|
11078 |
+
"learning_rate": 0.00015515723520818732,
|
11079 |
+
"loss": 1.2013,
|
11080 |
+
"step": 1577
|
11081 |
+
},
|
11082 |
+
{
|
11083 |
+
"epoch": 3.615450643776824,
|
11084 |
+
"grad_norm": 0.824824869632721,
|
11085 |
+
"learning_rate": 0.00015510470950238516,
|
11086 |
+
"loss": 1.0999,
|
11087 |
+
"step": 1578
|
11088 |
+
},
|
11089 |
+
{
|
11090 |
+
"epoch": 3.617739628040057,
|
11091 |
+
"grad_norm": 0.8311036229133606,
|
11092 |
+
"learning_rate": 0.00015505216195483675,
|
11093 |
+
"loss": 1.1142,
|
11094 |
+
"step": 1579
|
11095 |
+
},
|
11096 |
+
{
|
11097 |
+
"epoch": 3.6200286123032903,
|
11098 |
+
"grad_norm": 0.906527578830719,
|
11099 |
+
"learning_rate": 0.0001549995925863703,
|
11100 |
+
"loss": 1.1211,
|
11101 |
+
"step": 1580
|
11102 |
+
},
|
11103 |
+
{
|
11104 |
+
"epoch": 3.6223175965665235,
|
11105 |
+
"grad_norm": 0.8750422596931458,
|
11106 |
+
"learning_rate": 0.00015494700141782263,
|
11107 |
+
"loss": 1.0368,
|
11108 |
+
"step": 1581
|
11109 |
+
},
|
11110 |
+
{
|
11111 |
+
"epoch": 3.6246065808297567,
|
11112 |
+
"grad_norm": 0.87028968334198,
|
11113 |
+
"learning_rate": 0.0001548943884700391,
|
11114 |
+
"loss": 0.8723,
|
11115 |
+
"step": 1582
|
11116 |
+
},
|
11117 |
+
{
|
11118 |
+
"epoch": 3.62689556509299,
|
11119 |
+
"grad_norm": 0.9463768005371094,
|
11120 |
+
"learning_rate": 0.0001548417537638739,
|
11121 |
+
"loss": 0.9306,
|
11122 |
+
"step": 1583
|
11123 |
+
},
|
11124 |
+
{
|
11125 |
+
"epoch": 3.629184549356223,
|
11126 |
+
"grad_norm": 0.9717691540718079,
|
11127 |
+
"learning_rate": 0.0001547890973201897,
|
11128 |
+
"loss": 0.9884,
|
11129 |
+
"step": 1584
|
11130 |
+
},
|
11131 |
+
{
|
11132 |
+
"epoch": 3.6314735336194564,
|
11133 |
+
"grad_norm": 1.0096951723098755,
|
11134 |
+
"learning_rate": 0.0001547364191598579,
|
11135 |
+
"loss": 0.8862,
|
11136 |
+
"step": 1585
|
11137 |
+
},
|
11138 |
+
{
|
11139 |
+
"epoch": 3.6337625178826896,
|
11140 |
+
"grad_norm": 1.0112119913101196,
|
11141 |
+
"learning_rate": 0.00015468371930375834,
|
11142 |
+
"loss": 0.9107,
|
11143 |
+
"step": 1586
|
11144 |
+
},
|
11145 |
+
{
|
11146 |
+
"epoch": 3.6360515021459228,
|
11147 |
+
"grad_norm": 1.109299898147583,
|
11148 |
+
"learning_rate": 0.00015463099777277963,
|
11149 |
+
"loss": 0.7745,
|
11150 |
+
"step": 1587
|
11151 |
+
},
|
11152 |
+
{
|
11153 |
+
"epoch": 3.638340486409156,
|
11154 |
+
"grad_norm": 1.1044436693191528,
|
11155 |
+
"learning_rate": 0.00015457825458781887,
|
11156 |
+
"loss": 0.8227,
|
11157 |
+
"step": 1588
|
11158 |
+
},
|
11159 |
+
{
|
11160 |
+
"epoch": 3.640629470672389,
|
11161 |
+
"grad_norm": 0.9999521970748901,
|
11162 |
+
"learning_rate": 0.00015452548976978178,
|
11163 |
+
"loss": 0.6667,
|
11164 |
+
"step": 1589
|
11165 |
+
},
|
11166 |
+
{
|
11167 |
+
"epoch": 3.6429184549356224,
|
11168 |
+
"grad_norm": 1.1406440734863281,
|
11169 |
+
"learning_rate": 0.00015447270333958265,
|
11170 |
+
"loss": 0.6793,
|
11171 |
+
"step": 1590
|
11172 |
+
},
|
11173 |
+
{
|
11174 |
+
"epoch": 3.6452074391988556,
|
11175 |
+
"grad_norm": 1.1465872526168823,
|
11176 |
+
"learning_rate": 0.00015441989531814437,
|
11177 |
+
"loss": 0.5937,
|
11178 |
+
"step": 1591
|
11179 |
+
},
|
11180 |
+
{
|
11181 |
+
"epoch": 3.647496423462089,
|
11182 |
+
"grad_norm": 1.1943423748016357,
|
11183 |
+
"learning_rate": 0.00015436706572639826,
|
11184 |
+
"loss": 0.6536,
|
11185 |
+
"step": 1592
|
11186 |
+
},
|
11187 |
+
{
|
11188 |
+
"epoch": 3.649785407725322,
|
11189 |
+
"grad_norm": 1.4073413610458374,
|
11190 |
+
"learning_rate": 0.00015431421458528437,
|
11191 |
+
"loss": 0.6297,
|
11192 |
+
"step": 1593
|
11193 |
+
},
|
11194 |
+
{
|
11195 |
+
"epoch": 3.652074391988555,
|
11196 |
+
"grad_norm": 1.248165488243103,
|
11197 |
+
"learning_rate": 0.00015426134191575116,
|
11198 |
+
"loss": 0.5336,
|
11199 |
+
"step": 1594
|
11200 |
+
},
|
11201 |
+
{
|
11202 |
+
"epoch": 3.6543633762517884,
|
11203 |
+
"grad_norm": 1.0524934530258179,
|
11204 |
+
"learning_rate": 0.00015420844773875568,
|
11205 |
+
"loss": 0.4659,
|
11206 |
+
"step": 1595
|
11207 |
+
},
|
11208 |
+
{
|
11209 |
+
"epoch": 3.6566523605150216,
|
11210 |
+
"grad_norm": 1.3649994134902954,
|
11211 |
+
"learning_rate": 0.0001541555320752635,
|
11212 |
+
"loss": 0.4773,
|
11213 |
+
"step": 1596
|
11214 |
+
},
|
11215 |
+
{
|
11216 |
+
"epoch": 3.6589413447782544,
|
11217 |
+
"grad_norm": 1.3285311460494995,
|
11218 |
+
"learning_rate": 0.0001541025949462487,
|
11219 |
+
"loss": 0.4507,
|
11220 |
+
"step": 1597
|
11221 |
+
},
|
11222 |
+
{
|
11223 |
+
"epoch": 3.661230329041488,
|
11224 |
+
"grad_norm": 1.3772430419921875,
|
11225 |
+
"learning_rate": 0.00015404963637269381,
|
11226 |
+
"loss": 0.3966,
|
11227 |
+
"step": 1598
|
11228 |
+
},
|
11229 |
+
{
|
11230 |
+
"epoch": 3.663519313304721,
|
11231 |
+
"grad_norm": 1.4577609300613403,
|
11232 |
+
"learning_rate": 0.00015399665637559,
|
11233 |
+
"loss": 0.3689,
|
11234 |
+
"step": 1599
|
11235 |
+
},
|
11236 |
+
{
|
11237 |
+
"epoch": 3.6658082975679545,
|
11238 |
+
"grad_norm": 1.5130454301834106,
|
11239 |
+
"learning_rate": 0.00015394365497593682,
|
11240 |
+
"loss": 0.3042,
|
11241 |
+
"step": 1600
|
11242 |
+
},
|
11243 |
+
{
|
11244 |
+
"epoch": 3.668097281831187,
|
11245 |
+
"grad_norm": 1.4147976636886597,
|
11246 |
+
"learning_rate": 0.00015389063219474228,
|
11247 |
+
"loss": 0.3654,
|
11248 |
+
"step": 1601
|
11249 |
+
},
|
11250 |
+
{
|
11251 |
+
"epoch": 3.670386266094421,
|
11252 |
+
"grad_norm": 2.0109431743621826,
|
11253 |
+
"learning_rate": 0.000153837588053023,
|
11254 |
+
"loss": 0.3362,
|
11255 |
+
"step": 1602
|
11256 |
+
},
|
11257 |
+
{
|
11258 |
+
"epoch": 3.6726752503576536,
|
11259 |
+
"grad_norm": 2.0059592723846436,
|
11260 |
+
"learning_rate": 0.00015378452257180388,
|
11261 |
+
"loss": 0.279,
|
11262 |
+
"step": 1603
|
11263 |
+
},
|
11264 |
+
{
|
11265 |
+
"epoch": 3.674964234620887,
|
11266 |
+
"grad_norm": 2.262427806854248,
|
11267 |
+
"learning_rate": 0.0001537314357721185,
|
11268 |
+
"loss": 0.3572,
|
11269 |
+
"step": 1604
|
11270 |
+
},
|
11271 |
+
{
|
11272 |
+
"epoch": 3.67725321888412,
|
11273 |
+
"grad_norm": 1.9626609086990356,
|
11274 |
+
"learning_rate": 0.0001536783276750087,
|
11275 |
+
"loss": 0.2642,
|
11276 |
+
"step": 1605
|
11277 |
+
},
|
11278 |
+
{
|
11279 |
+
"epoch": 3.6795422031473533,
|
11280 |
+
"grad_norm": 2.9041426181793213,
|
11281 |
+
"learning_rate": 0.00015362519830152484,
|
11282 |
+
"loss": 0.3735,
|
11283 |
+
"step": 1606
|
11284 |
+
},
|
11285 |
+
{
|
11286 |
+
"epoch": 3.6818311874105865,
|
11287 |
+
"grad_norm": 2.211362838745117,
|
11288 |
+
"learning_rate": 0.00015357204767272572,
|
11289 |
+
"loss": 0.2951,
|
11290 |
+
"step": 1607
|
11291 |
+
},
|
11292 |
+
{
|
11293 |
+
"epoch": 3.6841201716738197,
|
11294 |
+
"grad_norm": 3.4044175148010254,
|
11295 |
+
"learning_rate": 0.00015351887580967855,
|
11296 |
+
"loss": 0.3566,
|
11297 |
+
"step": 1608
|
11298 |
+
},
|
11299 |
+
{
|
11300 |
+
"epoch": 3.686409155937053,
|
11301 |
+
"grad_norm": 3.792001485824585,
|
11302 |
+
"learning_rate": 0.00015346568273345897,
|
11303 |
+
"loss": 0.3662,
|
11304 |
+
"step": 1609
|
11305 |
+
},
|
11306 |
+
{
|
11307 |
+
"epoch": 3.688698140200286,
|
11308 |
+
"grad_norm": 1.4554511308670044,
|
11309 |
+
"learning_rate": 0.00015341246846515096,
|
11310 |
+
"loss": 1.6486,
|
11311 |
+
"step": 1610
|
11312 |
+
},
|
11313 |
+
{
|
11314 |
+
"epoch": 3.6909871244635193,
|
11315 |
+
"grad_norm": 0.5519815683364868,
|
11316 |
+
"learning_rate": 0.000153359233025847,
|
11317 |
+
"loss": 1.8248,
|
11318 |
+
"step": 1611
|
11319 |
+
},
|
11320 |
+
{
|
11321 |
+
"epoch": 3.6932761087267525,
|
11322 |
+
"grad_norm": 0.5921805500984192,
|
11323 |
+
"learning_rate": 0.0001533059764366479,
|
11324 |
+
"loss": 1.7423,
|
11325 |
+
"step": 1612
|
11326 |
+
},
|
11327 |
+
{
|
11328 |
+
"epoch": 3.6955650929899857,
|
11329 |
+
"grad_norm": 0.6531163454055786,
|
11330 |
+
"learning_rate": 0.00015325269871866291,
|
11331 |
+
"loss": 1.5875,
|
11332 |
+
"step": 1613
|
11333 |
+
},
|
11334 |
+
{
|
11335 |
+
"epoch": 3.697854077253219,
|
11336 |
+
"grad_norm": 0.6997845768928528,
|
11337 |
+
"learning_rate": 0.00015319939989300954,
|
11338 |
+
"loss": 1.59,
|
11339 |
+
"step": 1614
|
11340 |
+
},
|
11341 |
+
{
|
11342 |
+
"epoch": 3.700143061516452,
|
11343 |
+
"grad_norm": 0.7298904657363892,
|
11344 |
+
"learning_rate": 0.00015314607998081386,
|
11345 |
+
"loss": 1.6081,
|
11346 |
+
"step": 1615
|
11347 |
+
},
|
11348 |
+
{
|
11349 |
+
"epoch": 3.7024320457796853,
|
11350 |
+
"grad_norm": 0.7905436158180237,
|
11351 |
+
"learning_rate": 0.00015309273900321007,
|
11352 |
+
"loss": 1.5784,
|
11353 |
+
"step": 1616
|
11354 |
+
},
|
11355 |
+
{
|
11356 |
+
"epoch": 3.7047210300429185,
|
11357 |
+
"grad_norm": 0.7912278771400452,
|
11358 |
+
"learning_rate": 0.00015303937698134083,
|
11359 |
+
"loss": 1.4748,
|
11360 |
+
"step": 1617
|
11361 |
+
},
|
11362 |
+
{
|
11363 |
+
"epoch": 3.7070100143061517,
|
11364 |
+
"grad_norm": 0.8271948099136353,
|
11365 |
+
"learning_rate": 0.00015298599393635722,
|
11366 |
+
"loss": 1.489,
|
11367 |
+
"step": 1618
|
11368 |
+
},
|
11369 |
+
{
|
11370 |
+
"epoch": 3.709298998569385,
|
11371 |
+
"grad_norm": 0.7761908173561096,
|
11372 |
+
"learning_rate": 0.00015293258988941855,
|
11373 |
+
"loss": 1.41,
|
11374 |
+
"step": 1619
|
11375 |
+
},
|
11376 |
+
{
|
11377 |
+
"epoch": 3.711587982832618,
|
11378 |
+
"grad_norm": 0.8067419528961182,
|
11379 |
+
"learning_rate": 0.0001528791648616924,
|
11380 |
+
"loss": 1.4425,
|
11381 |
+
"step": 1620
|
11382 |
+
},
|
11383 |
+
{
|
11384 |
+
"epoch": 3.7138769670958514,
|
11385 |
+
"grad_norm": 0.8288164734840393,
|
11386 |
+
"learning_rate": 0.00015282571887435483,
|
11387 |
+
"loss": 1.4353,
|
11388 |
+
"step": 1621
|
11389 |
+
},
|
11390 |
+
{
|
11391 |
+
"epoch": 3.7161659513590846,
|
11392 |
+
"grad_norm": 0.7974148988723755,
|
11393 |
+
"learning_rate": 0.00015277225194859008,
|
11394 |
+
"loss": 1.3482,
|
11395 |
+
"step": 1622
|
11396 |
+
},
|
11397 |
+
{
|
11398 |
+
"epoch": 3.7184549356223178,
|
11399 |
+
"grad_norm": 0.7817927002906799,
|
11400 |
+
"learning_rate": 0.0001527187641055908,
|
11401 |
+
"loss": 1.3171,
|
11402 |
+
"step": 1623
|
11403 |
+
},
|
11404 |
+
{
|
11405 |
+
"epoch": 3.7207439198855505,
|
11406 |
+
"grad_norm": 0.8328514099121094,
|
11407 |
+
"learning_rate": 0.00015266525536655775,
|
11408 |
+
"loss": 1.4593,
|
11409 |
+
"step": 1624
|
11410 |
+
},
|
11411 |
+
{
|
11412 |
+
"epoch": 3.723032904148784,
|
11413 |
+
"grad_norm": 0.7940589189529419,
|
11414 |
+
"learning_rate": 0.00015261172575270016,
|
11415 |
+
"loss": 1.2146,
|
11416 |
+
"step": 1625
|
11417 |
+
},
|
11418 |
+
{
|
11419 |
+
"epoch": 3.725321888412017,
|
11420 |
+
"grad_norm": 0.8301095962524414,
|
11421 |
+
"learning_rate": 0.00015255817528523545,
|
11422 |
+
"loss": 1.2037,
|
11423 |
+
"step": 1626
|
11424 |
+
},
|
11425 |
+
{
|
11426 |
+
"epoch": 3.7276108726752506,
|
11427 |
+
"grad_norm": 0.8458488583564758,
|
11428 |
+
"learning_rate": 0.0001525046039853893,
|
11429 |
+
"loss": 1.2401,
|
11430 |
+
"step": 1627
|
11431 |
+
},
|
11432 |
+
{
|
11433 |
+
"epoch": 3.7298998569384834,
|
11434 |
+
"grad_norm": 0.85792076587677,
|
11435 |
+
"learning_rate": 0.00015245101187439563,
|
11436 |
+
"loss": 1.1158,
|
11437 |
+
"step": 1628
|
11438 |
+
},
|
11439 |
+
{
|
11440 |
+
"epoch": 3.732188841201717,
|
11441 |
+
"grad_norm": 0.8571141362190247,
|
11442 |
+
"learning_rate": 0.0001523973989734967,
|
11443 |
+
"loss": 1.0806,
|
11444 |
+
"step": 1629
|
11445 |
+
},
|
11446 |
+
{
|
11447 |
+
"epoch": 3.7344778254649498,
|
11448 |
+
"grad_norm": 0.8554336428642273,
|
11449 |
+
"learning_rate": 0.00015234376530394295,
|
11450 |
+
"loss": 1.1863,
|
11451 |
+
"step": 1630
|
11452 |
+
},
|
11453 |
+
{
|
11454 |
+
"epoch": 3.736766809728183,
|
11455 |
+
"grad_norm": 0.8635918498039246,
|
11456 |
+
"learning_rate": 0.00015229011088699302,
|
11457 |
+
"loss": 0.9766,
|
11458 |
+
"step": 1631
|
11459 |
+
},
|
11460 |
+
{
|
11461 |
+
"epoch": 3.739055793991416,
|
11462 |
+
"grad_norm": 0.9634860157966614,
|
11463 |
+
"learning_rate": 0.0001522364357439138,
|
11464 |
+
"loss": 1.1977,
|
11465 |
+
"step": 1632
|
11466 |
+
},
|
11467 |
+
{
|
11468 |
+
"epoch": 3.7413447782546494,
|
11469 |
+
"grad_norm": 0.9780722856521606,
|
11470 |
+
"learning_rate": 0.00015218273989598045,
|
11471 |
+
"loss": 1.0433,
|
11472 |
+
"step": 1633
|
11473 |
+
},
|
11474 |
+
{
|
11475 |
+
"epoch": 3.7436337625178826,
|
11476 |
+
"grad_norm": 0.9208489656448364,
|
11477 |
+
"learning_rate": 0.00015212902336447627,
|
11478 |
+
"loss": 1.0092,
|
11479 |
+
"step": 1634
|
11480 |
+
},
|
11481 |
+
{
|
11482 |
+
"epoch": 3.745922746781116,
|
11483 |
+
"grad_norm": 1.0279505252838135,
|
11484 |
+
"learning_rate": 0.00015207528617069272,
|
11485 |
+
"loss": 0.9366,
|
11486 |
+
"step": 1635
|
11487 |
+
},
|
11488 |
+
{
|
11489 |
+
"epoch": 3.748211731044349,
|
11490 |
+
"grad_norm": 1.0359374284744263,
|
11491 |
+
"learning_rate": 0.00015202152833592954,
|
11492 |
+
"loss": 0.8968,
|
11493 |
+
"step": 1636
|
11494 |
+
},
|
11495 |
+
{
|
11496 |
+
"epoch": 3.7505007153075822,
|
11497 |
+
"grad_norm": 1.081196665763855,
|
11498 |
+
"learning_rate": 0.00015196774988149466,
|
11499 |
+
"loss": 0.7949,
|
11500 |
+
"step": 1637
|
11501 |
+
},
|
11502 |
+
{
|
11503 |
+
"epoch": 3.7527896995708154,
|
11504 |
+
"grad_norm": 1.128497838973999,
|
11505 |
+
"learning_rate": 0.00015191395082870412,
|
11506 |
+
"loss": 0.8224,
|
11507 |
+
"step": 1638
|
11508 |
+
},
|
11509 |
+
{
|
11510 |
+
"epoch": 3.7550786838340486,
|
11511 |
+
"grad_norm": 1.035473346710205,
|
11512 |
+
"learning_rate": 0.00015186013119888213,
|
11513 |
+
"loss": 0.7625,
|
11514 |
+
"step": 1639
|
11515 |
+
},
|
11516 |
+
{
|
11517 |
+
"epoch": 3.757367668097282,
|
11518 |
+
"grad_norm": 1.1496644020080566,
|
11519 |
+
"learning_rate": 0.00015180629101336108,
|
11520 |
+
"loss": 0.8328,
|
11521 |
+
"step": 1640
|
11522 |
+
},
|
11523 |
+
{
|
11524 |
+
"epoch": 3.759656652360515,
|
11525 |
+
"grad_norm": 1.1070492267608643,
|
11526 |
+
"learning_rate": 0.0001517524302934815,
|
11527 |
+
"loss": 0.7106,
|
11528 |
+
"step": 1641
|
11529 |
+
},
|
11530 |
+
{
|
11531 |
+
"epoch": 3.7619456366237483,
|
11532 |
+
"grad_norm": 1.2384364604949951,
|
11533 |
+
"learning_rate": 0.000151698549060592,
|
11534 |
+
"loss": 0.6606,
|
11535 |
+
"step": 1642
|
11536 |
+
},
|
11537 |
+
{
|
11538 |
+
"epoch": 3.7642346208869815,
|
11539 |
+
"grad_norm": 1.1313549280166626,
|
11540 |
+
"learning_rate": 0.0001516446473360495,
|
11541 |
+
"loss": 0.5574,
|
11542 |
+
"step": 1643
|
11543 |
+
},
|
11544 |
+
{
|
11545 |
+
"epoch": 3.7665236051502147,
|
11546 |
+
"grad_norm": 1.3806557655334473,
|
11547 |
+
"learning_rate": 0.00015159072514121883,
|
11548 |
+
"loss": 0.6239,
|
11549 |
+
"step": 1644
|
11550 |
+
},
|
11551 |
+
{
|
11552 |
+
"epoch": 3.768812589413448,
|
11553 |
+
"grad_norm": 1.3479688167572021,
|
11554 |
+
"learning_rate": 0.00015153678249747306,
|
11555 |
+
"loss": 0.5784,
|
11556 |
+
"step": 1645
|
11557 |
+
},
|
11558 |
+
{
|
11559 |
+
"epoch": 3.771101573676681,
|
11560 |
+
"grad_norm": 1.3832741975784302,
|
11561 |
+
"learning_rate": 0.00015148281942619333,
|
11562 |
+
"loss": 0.4887,
|
11563 |
+
"step": 1646
|
11564 |
+
},
|
11565 |
+
{
|
11566 |
+
"epoch": 3.7733905579399143,
|
11567 |
+
"grad_norm": 1.5570096969604492,
|
11568 |
+
"learning_rate": 0.00015142883594876887,
|
11569 |
+
"loss": 0.6388,
|
11570 |
+
"step": 1647
|
11571 |
+
},
|
11572 |
+
{
|
11573 |
+
"epoch": 3.7756795422031475,
|
11574 |
+
"grad_norm": 1.3564070463180542,
|
11575 |
+
"learning_rate": 0.000151374832086597,
|
11576 |
+
"loss": 0.4629,
|
11577 |
+
"step": 1648
|
11578 |
+
},
|
11579 |
+
{
|
11580 |
+
"epoch": 3.7779685264663807,
|
11581 |
+
"grad_norm": 1.5236693620681763,
|
11582 |
+
"learning_rate": 0.00015132080786108313,
|
11583 |
+
"loss": 0.3627,
|
11584 |
+
"step": 1649
|
11585 |
+
},
|
11586 |
+
{
|
11587 |
+
"epoch": 3.780257510729614,
|
11588 |
+
"grad_norm": 1.6069036722183228,
|
11589 |
+
"learning_rate": 0.00015126676329364072,
|
11590 |
+
"loss": 0.3655,
|
11591 |
+
"step": 1650
|
11592 |
+
},
|
11593 |
+
{
|
11594 |
+
"epoch": 3.7825464949928467,
|
11595 |
+
"grad_norm": 1.5798829793930054,
|
11596 |
+
"learning_rate": 0.00015121269840569134,
|
11597 |
+
"loss": 0.367,
|
11598 |
+
"step": 1651
|
11599 |
+
},
|
11600 |
+
{
|
11601 |
+
"epoch": 3.7848354792560803,
|
11602 |
+
"grad_norm": 1.8589974641799927,
|
11603 |
+
"learning_rate": 0.00015115861321866455,
|
11604 |
+
"loss": 0.3061,
|
11605 |
+
"step": 1652
|
11606 |
+
},
|
11607 |
+
{
|
11608 |
+
"epoch": 3.787124463519313,
|
11609 |
+
"grad_norm": 1.395572543144226,
|
11610 |
+
"learning_rate": 0.00015110450775399805,
|
11611 |
+
"loss": 0.2572,
|
11612 |
+
"step": 1653
|
11613 |
+
},
|
11614 |
+
{
|
11615 |
+
"epoch": 3.7894134477825467,
|
11616 |
+
"grad_norm": 2.529019832611084,
|
11617 |
+
"learning_rate": 0.00015105038203313748,
|
11618 |
+
"loss": 0.4464,
|
11619 |
+
"step": 1654
|
11620 |
+
},
|
11621 |
+
{
|
11622 |
+
"epoch": 3.7917024320457795,
|
11623 |
+
"grad_norm": 1.760545253753662,
|
11624 |
+
"learning_rate": 0.0001509962360775365,
|
11625 |
+
"loss": 0.2562,
|
11626 |
+
"step": 1655
|
11627 |
+
},
|
11628 |
+
{
|
11629 |
+
"epoch": 3.7939914163090127,
|
11630 |
+
"grad_norm": 1.9058586359024048,
|
11631 |
+
"learning_rate": 0.00015094206990865693,
|
11632 |
+
"loss": 0.25,
|
11633 |
+
"step": 1656
|
11634 |
+
},
|
11635 |
+
{
|
11636 |
+
"epoch": 3.796280400572246,
|
11637 |
+
"grad_norm": 1.9067879915237427,
|
11638 |
+
"learning_rate": 0.00015088788354796843,
|
11639 |
+
"loss": 0.1715,
|
11640 |
+
"step": 1657
|
11641 |
+
},
|
11642 |
+
{
|
11643 |
+
"epoch": 3.798569384835479,
|
11644 |
+
"grad_norm": 2.5658154487609863,
|
11645 |
+
"learning_rate": 0.0001508336770169488,
|
11646 |
+
"loss": 0.2417,
|
11647 |
+
"step": 1658
|
11648 |
+
},
|
11649 |
+
{
|
11650 |
+
"epoch": 3.8008583690987123,
|
11651 |
+
"grad_norm": 5.0240912437438965,
|
11652 |
+
"learning_rate": 0.0001507794503370837,
|
11653 |
+
"loss": 0.2909,
|
11654 |
+
"step": 1659
|
11655 |
+
},
|
11656 |
+
{
|
11657 |
+
"epoch": 3.8031473533619455,
|
11658 |
+
"grad_norm": 1.011162519454956,
|
11659 |
+
"learning_rate": 0.00015072520352986697,
|
11660 |
+
"loss": 1.6048,
|
11661 |
+
"step": 1660
|
11662 |
+
},
|
11663 |
+
{
|
11664 |
+
"epoch": 3.8054363376251787,
|
11665 |
+
"grad_norm": 0.5463991165161133,
|
11666 |
+
"learning_rate": 0.0001506709366168002,
|
11667 |
+
"loss": 1.8731,
|
11668 |
+
"step": 1661
|
11669 |
+
},
|
11670 |
+
{
|
11671 |
+
"epoch": 3.807725321888412,
|
11672 |
+
"grad_norm": 0.6056814789772034,
|
11673 |
+
"learning_rate": 0.00015061664961939312,
|
11674 |
+
"loss": 1.7895,
|
11675 |
+
"step": 1662
|
11676 |
+
},
|
11677 |
+
{
|
11678 |
+
"epoch": 3.810014306151645,
|
11679 |
+
"grad_norm": 0.6620305776596069,
|
11680 |
+
"learning_rate": 0.00015056234255916335,
|
11681 |
+
"loss": 1.7562,
|
11682 |
+
"step": 1663
|
11683 |
+
},
|
11684 |
+
{
|
11685 |
+
"epoch": 3.8123032904148784,
|
11686 |
+
"grad_norm": 0.7236174941062927,
|
11687 |
+
"learning_rate": 0.00015050801545763646,
|
11688 |
+
"loss": 1.642,
|
11689 |
+
"step": 1664
|
11690 |
+
},
|
11691 |
+
{
|
11692 |
+
"epoch": 3.8145922746781116,
|
11693 |
+
"grad_norm": 0.7159740924835205,
|
11694 |
+
"learning_rate": 0.000150453668336346,
|
11695 |
+
"loss": 1.6126,
|
11696 |
+
"step": 1665
|
11697 |
+
},
|
11698 |
+
{
|
11699 |
+
"epoch": 3.816881258941345,
|
11700 |
+
"grad_norm": 0.7541245222091675,
|
11701 |
+
"learning_rate": 0.0001503993012168334,
|
11702 |
+
"loss": 1.4509,
|
11703 |
+
"step": 1666
|
11704 |
+
},
|
11705 |
+
{
|
11706 |
+
"epoch": 3.819170243204578,
|
11707 |
+
"grad_norm": 0.7407465577125549,
|
11708 |
+
"learning_rate": 0.00015034491412064806,
|
11709 |
+
"loss": 1.4913,
|
11710 |
+
"step": 1667
|
11711 |
+
},
|
11712 |
+
{
|
11713 |
+
"epoch": 3.821459227467811,
|
11714 |
+
"grad_norm": 0.8195914626121521,
|
11715 |
+
"learning_rate": 0.00015029050706934737,
|
11716 |
+
"loss": 1.6013,
|
11717 |
+
"step": 1668
|
11718 |
+
},
|
11719 |
+
{
|
11720 |
+
"epoch": 3.8237482117310444,
|
11721 |
+
"grad_norm": 0.8221433758735657,
|
11722 |
+
"learning_rate": 0.00015023608008449642,
|
11723 |
+
"loss": 1.4439,
|
11724 |
+
"step": 1669
|
11725 |
+
},
|
11726 |
+
{
|
11727 |
+
"epoch": 3.8260371959942776,
|
11728 |
+
"grad_norm": 0.8168205618858337,
|
11729 |
+
"learning_rate": 0.00015018163318766833,
|
11730 |
+
"loss": 1.3029,
|
11731 |
+
"step": 1670
|
11732 |
+
},
|
11733 |
+
{
|
11734 |
+
"epoch": 3.828326180257511,
|
11735 |
+
"grad_norm": 0.8368527889251709,
|
11736 |
+
"learning_rate": 0.0001501271664004442,
|
11737 |
+
"loss": 1.4415,
|
11738 |
+
"step": 1671
|
11739 |
+
},
|
11740 |
+
{
|
11741 |
+
"epoch": 3.830615164520744,
|
11742 |
+
"grad_norm": 0.8870606422424316,
|
11743 |
+
"learning_rate": 0.00015007267974441292,
|
11744 |
+
"loss": 1.4138,
|
11745 |
+
"step": 1672
|
11746 |
+
},
|
11747 |
+
{
|
11748 |
+
"epoch": 3.8329041487839772,
|
11749 |
+
"grad_norm": 0.8419132828712463,
|
11750 |
+
"learning_rate": 0.00015001817324117122,
|
11751 |
+
"loss": 1.3305,
|
11752 |
+
"step": 1673
|
11753 |
+
},
|
11754 |
+
{
|
11755 |
+
"epoch": 3.8351931330472104,
|
11756 |
+
"grad_norm": 0.8348716497421265,
|
11757 |
+
"learning_rate": 0.00014996364691232373,
|
11758 |
+
"loss": 1.2744,
|
11759 |
+
"step": 1674
|
11760 |
+
},
|
11761 |
+
{
|
11762 |
+
"epoch": 3.8374821173104436,
|
11763 |
+
"grad_norm": 0.8420340418815613,
|
11764 |
+
"learning_rate": 0.00014990910077948292,
|
11765 |
+
"loss": 1.3047,
|
11766 |
+
"step": 1675
|
11767 |
+
},
|
11768 |
+
{
|
11769 |
+
"epoch": 3.8397711015736764,
|
11770 |
+
"grad_norm": 0.8508443236351013,
|
11771 |
+
"learning_rate": 0.0001498545348642692,
|
11772 |
+
"loss": 1.285,
|
11773 |
+
"step": 1676
|
11774 |
+
},
|
11775 |
+
{
|
11776 |
+
"epoch": 3.84206008583691,
|
11777 |
+
"grad_norm": 0.8771832585334778,
|
11778 |
+
"learning_rate": 0.00014979994918831073,
|
11779 |
+
"loss": 1.3076,
|
11780 |
+
"step": 1677
|
11781 |
+
},
|
11782 |
+
{
|
11783 |
+
"epoch": 3.844349070100143,
|
11784 |
+
"grad_norm": 0.8925603628158569,
|
11785 |
+
"learning_rate": 0.00014974534377324355,
|
11786 |
+
"loss": 1.2541,
|
11787 |
+
"step": 1678
|
11788 |
+
},
|
11789 |
+
{
|
11790 |
+
"epoch": 3.8466380543633765,
|
11791 |
+
"grad_norm": 0.9128324389457703,
|
11792 |
+
"learning_rate": 0.00014969071864071143,
|
11793 |
+
"loss": 1.0793,
|
11794 |
+
"step": 1679
|
11795 |
+
},
|
11796 |
+
{
|
11797 |
+
"epoch": 3.8489270386266092,
|
11798 |
+
"grad_norm": 0.9458082914352417,
|
11799 |
+
"learning_rate": 0.00014963607381236608,
|
11800 |
+
"loss": 1.0655,
|
11801 |
+
"step": 1680
|
11802 |
+
},
|
11803 |
+
{
|
11804 |
+
"epoch": 3.851216022889843,
|
11805 |
+
"grad_norm": 0.9138215780258179,
|
11806 |
+
"learning_rate": 0.00014958140930986698,
|
11807 |
+
"loss": 1.0918,
|
11808 |
+
"step": 1681
|
11809 |
+
},
|
11810 |
+
{
|
11811 |
+
"epoch": 3.8535050071530756,
|
11812 |
+
"grad_norm": 0.9036693572998047,
|
11813 |
+
"learning_rate": 0.00014952672515488132,
|
11814 |
+
"loss": 0.8557,
|
11815 |
+
"step": 1682
|
11816 |
+
},
|
11817 |
+
{
|
11818 |
+
"epoch": 3.855793991416309,
|
11819 |
+
"grad_norm": 0.8931397795677185,
|
11820 |
+
"learning_rate": 0.00014947202136908426,
|
11821 |
+
"loss": 0.9188,
|
11822 |
+
"step": 1683
|
11823 |
+
},
|
11824 |
+
{
|
11825 |
+
"epoch": 3.858082975679542,
|
11826 |
+
"grad_norm": 0.9750208258628845,
|
11827 |
+
"learning_rate": 0.00014941729797415853,
|
11828 |
+
"loss": 0.9551,
|
11829 |
+
"step": 1684
|
11830 |
+
},
|
11831 |
+
{
|
11832 |
+
"epoch": 3.8603719599427753,
|
11833 |
+
"grad_norm": 1.0227867364883423,
|
11834 |
+
"learning_rate": 0.0001493625549917948,
|
11835 |
+
"loss": 0.8957,
|
11836 |
+
"step": 1685
|
11837 |
+
},
|
11838 |
+
{
|
11839 |
+
"epoch": 3.8626609442060085,
|
11840 |
+
"grad_norm": 1.04948890209198,
|
11841 |
+
"learning_rate": 0.00014930779244369142,
|
11842 |
+
"loss": 0.8852,
|
11843 |
+
"step": 1686
|
11844 |
+
},
|
11845 |
+
{
|
11846 |
+
"epoch": 3.8649499284692417,
|
11847 |
+
"grad_norm": 0.9962487816810608,
|
11848 |
+
"learning_rate": 0.0001492530103515545,
|
11849 |
+
"loss": 0.8416,
|
11850 |
+
"step": 1687
|
11851 |
+
},
|
11852 |
+
{
|
11853 |
+
"epoch": 3.867238912732475,
|
11854 |
+
"grad_norm": 1.0342847108840942,
|
11855 |
+
"learning_rate": 0.00014919820873709796,
|
11856 |
+
"loss": 0.9805,
|
11857 |
+
"step": 1688
|
11858 |
+
},
|
11859 |
+
{
|
11860 |
+
"epoch": 3.869527896995708,
|
11861 |
+
"grad_norm": 1.0398114919662476,
|
11862 |
+
"learning_rate": 0.00014914338762204331,
|
11863 |
+
"loss": 0.8916,
|
11864 |
+
"step": 1689
|
11865 |
+
},
|
11866 |
+
{
|
11867 |
+
"epoch": 3.8718168812589413,
|
11868 |
+
"grad_norm": 1.1402392387390137,
|
11869 |
+
"learning_rate": 0.00014908854702811998,
|
11870 |
+
"loss": 0.769,
|
11871 |
+
"step": 1690
|
11872 |
+
},
|
11873 |
+
{
|
11874 |
+
"epoch": 3.8741058655221745,
|
11875 |
+
"grad_norm": 1.217068076133728,
|
11876 |
+
"learning_rate": 0.00014903368697706502,
|
11877 |
+
"loss": 0.8346,
|
11878 |
+
"step": 1691
|
11879 |
+
},
|
11880 |
+
{
|
11881 |
+
"epoch": 3.8763948497854077,
|
11882 |
+
"grad_norm": 1.293504238128662,
|
11883 |
+
"learning_rate": 0.00014897880749062316,
|
11884 |
+
"loss": 0.8068,
|
11885 |
+
"step": 1692
|
11886 |
+
},
|
11887 |
+
{
|
11888 |
+
"epoch": 3.878683834048641,
|
11889 |
+
"grad_norm": 1.333113193511963,
|
11890 |
+
"learning_rate": 0.0001489239085905469,
|
11891 |
+
"loss": 0.6991,
|
11892 |
+
"step": 1693
|
11893 |
+
},
|
11894 |
+
{
|
11895 |
+
"epoch": 3.880972818311874,
|
11896 |
+
"grad_norm": 1.1802215576171875,
|
11897 |
+
"learning_rate": 0.00014886899029859643,
|
11898 |
+
"loss": 0.6316,
|
11899 |
+
"step": 1694
|
11900 |
+
},
|
11901 |
+
{
|
11902 |
+
"epoch": 3.8832618025751073,
|
11903 |
+
"grad_norm": 1.3808867931365967,
|
11904 |
+
"learning_rate": 0.00014881405263653954,
|
11905 |
+
"loss": 0.5928,
|
11906 |
+
"step": 1695
|
11907 |
+
},
|
11908 |
+
{
|
11909 |
+
"epoch": 3.8855507868383405,
|
11910 |
+
"grad_norm": 1.3361176252365112,
|
11911 |
+
"learning_rate": 0.00014875909562615184,
|
11912 |
+
"loss": 0.6402,
|
11913 |
+
"step": 1696
|
11914 |
+
},
|
11915 |
+
{
|
11916 |
+
"epoch": 3.8878397711015737,
|
11917 |
+
"grad_norm": 1.2890623807907104,
|
11918 |
+
"learning_rate": 0.0001487041192892165,
|
11919 |
+
"loss": 0.5675,
|
11920 |
+
"step": 1697
|
11921 |
+
},
|
11922 |
+
{
|
11923 |
+
"epoch": 3.890128755364807,
|
11924 |
+
"grad_norm": 1.430479884147644,
|
11925 |
+
"learning_rate": 0.00014864912364752434,
|
11926 |
+
"loss": 0.5139,
|
11927 |
+
"step": 1698
|
11928 |
+
},
|
11929 |
+
{
|
11930 |
+
"epoch": 3.89241773962804,
|
11931 |
+
"grad_norm": 1.2463384866714478,
|
11932 |
+
"learning_rate": 0.000148594108722874,
|
11933 |
+
"loss": 0.3208,
|
11934 |
+
"step": 1699
|
11935 |
+
},
|
11936 |
+
{
|
11937 |
+
"epoch": 3.8947067238912734,
|
11938 |
+
"grad_norm": 1.4024913311004639,
|
11939 |
+
"learning_rate": 0.00014853907453707147,
|
11940 |
+
"loss": 0.3998,
|
11941 |
+
"step": 1700
|
11942 |
+
},
|
11943 |
+
{
|
11944 |
+
"epoch": 3.8969957081545066,
|
11945 |
+
"grad_norm": 1.6548993587493896,
|
11946 |
+
"learning_rate": 0.00014848402111193067,
|
11947 |
+
"loss": 0.2924,
|
11948 |
+
"step": 1701
|
11949 |
+
},
|
11950 |
+
{
|
11951 |
+
"epoch": 3.89928469241774,
|
11952 |
+
"grad_norm": 1.315961480140686,
|
11953 |
+
"learning_rate": 0.000148428948469273,
|
11954 |
+
"loss": 0.2436,
|
11955 |
+
"step": 1702
|
11956 |
+
},
|
11957 |
+
{
|
11958 |
+
"epoch": 3.9015736766809725,
|
11959 |
+
"grad_norm": 2.049527168273926,
|
11960 |
+
"learning_rate": 0.00014837385663092744,
|
11961 |
+
"loss": 0.3896,
|
11962 |
+
"step": 1703
|
11963 |
+
},
|
11964 |
+
{
|
11965 |
+
"epoch": 3.903862660944206,
|
11966 |
+
"grad_norm": 1.8041912317276,
|
11967 |
+
"learning_rate": 0.0001483187456187307,
|
11968 |
+
"loss": 0.3867,
|
11969 |
+
"step": 1704
|
11970 |
+
},
|
11971 |
+
{
|
11972 |
+
"epoch": 3.906151645207439,
|
11973 |
+
"grad_norm": 1.984589695930481,
|
11974 |
+
"learning_rate": 0.00014826361545452696,
|
11975 |
+
"loss": 0.3802,
|
11976 |
+
"step": 1705
|
11977 |
+
},
|
11978 |
+
{
|
11979 |
+
"epoch": 3.9084406294706726,
|
11980 |
+
"grad_norm": 2.0554628372192383,
|
11981 |
+
"learning_rate": 0.0001482084661601681,
|
11982 |
+
"loss": 0.3724,
|
11983 |
+
"step": 1706
|
11984 |
+
},
|
11985 |
+
{
|
11986 |
+
"epoch": 3.9107296137339054,
|
11987 |
+
"grad_norm": 2.1720497608184814,
|
11988 |
+
"learning_rate": 0.00014815329775751357,
|
11989 |
+
"loss": 0.2343,
|
11990 |
+
"step": 1707
|
11991 |
+
},
|
11992 |
+
{
|
11993 |
+
"epoch": 3.913018597997139,
|
11994 |
+
"grad_norm": 3.0064074993133545,
|
11995 |
+
"learning_rate": 0.00014809811026843026,
|
11996 |
+
"loss": 0.2229,
|
11997 |
+
"step": 1708
|
11998 |
+
},
|
11999 |
+
{
|
12000 |
+
"epoch": 3.915307582260372,
|
12001 |
+
"grad_norm": 3.7488739490509033,
|
12002 |
+
"learning_rate": 0.0001480429037147928,
|
12003 |
+
"loss": 0.2322,
|
12004 |
+
"step": 1709
|
12005 |
+
},
|
12006 |
+
{
|
12007 |
+
"epoch": 3.917596566523605,
|
12008 |
+
"grad_norm": 0.7786121964454651,
|
12009 |
+
"learning_rate": 0.0001479876781184833,
|
12010 |
+
"loss": 1.6537,
|
12011 |
+
"step": 1710
|
12012 |
+
},
|
12013 |
+
{
|
12014 |
+
"epoch": 3.919885550786838,
|
12015 |
+
"grad_norm": 0.5747677087783813,
|
12016 |
+
"learning_rate": 0.00014793243350139142,
|
12017 |
+
"loss": 1.9256,
|
12018 |
+
"step": 1711
|
12019 |
+
},
|
12020 |
+
{
|
12021 |
+
"epoch": 3.9221745350500714,
|
12022 |
+
"grad_norm": 0.6486791372299194,
|
12023 |
+
"learning_rate": 0.00014787716988541436,
|
12024 |
+
"loss": 1.6479,
|
12025 |
+
"step": 1712
|
12026 |
+
},
|
12027 |
+
{
|
12028 |
+
"epoch": 3.9244635193133046,
|
12029 |
+
"grad_norm": 0.7661999464035034,
|
12030 |
+
"learning_rate": 0.00014782188729245684,
|
12031 |
+
"loss": 1.5968,
|
12032 |
+
"step": 1713
|
12033 |
+
},
|
12034 |
+
{
|
12035 |
+
"epoch": 3.926752503576538,
|
12036 |
+
"grad_norm": 0.7707553505897522,
|
12037 |
+
"learning_rate": 0.0001477665857444311,
|
12038 |
+
"loss": 1.5102,
|
12039 |
+
"step": 1714
|
12040 |
+
},
|
12041 |
+
{
|
12042 |
+
"epoch": 3.929041487839771,
|
12043 |
+
"grad_norm": 0.8604703545570374,
|
12044 |
+
"learning_rate": 0.00014771126526325694,
|
12045 |
+
"loss": 1.5109,
|
12046 |
+
"step": 1715
|
12047 |
+
},
|
12048 |
+
{
|
12049 |
+
"epoch": 3.9313304721030042,
|
12050 |
+
"grad_norm": 0.8465156555175781,
|
12051 |
+
"learning_rate": 0.00014765592587086162,
|
12052 |
+
"loss": 1.4792,
|
12053 |
+
"step": 1716
|
12054 |
+
},
|
12055 |
+
{
|
12056 |
+
"epoch": 3.9336194563662374,
|
12057 |
+
"grad_norm": 0.9079059362411499,
|
12058 |
+
"learning_rate": 0.0001476005675891799,
|
12059 |
+
"loss": 1.5047,
|
12060 |
+
"step": 1717
|
12061 |
+
},
|
12062 |
+
{
|
12063 |
+
"epoch": 3.9359084406294707,
|
12064 |
+
"grad_norm": 0.8793495297431946,
|
12065 |
+
"learning_rate": 0.00014754519044015405,
|
12066 |
+
"loss": 1.3054,
|
12067 |
+
"step": 1718
|
12068 |
+
},
|
12069 |
+
{
|
12070 |
+
"epoch": 3.938197424892704,
|
12071 |
+
"grad_norm": 0.9664410948753357,
|
12072 |
+
"learning_rate": 0.00014748979444573378,
|
12073 |
+
"loss": 1.2806,
|
12074 |
+
"step": 1719
|
12075 |
+
},
|
12076 |
+
{
|
12077 |
+
"epoch": 3.940486409155937,
|
12078 |
+
"grad_norm": 0.9473050236701965,
|
12079 |
+
"learning_rate": 0.0001474343796278763,
|
12080 |
+
"loss": 1.406,
|
12081 |
+
"step": 1720
|
12082 |
+
},
|
12083 |
+
{
|
12084 |
+
"epoch": 3.9427753934191703,
|
12085 |
+
"grad_norm": 0.9971415996551514,
|
12086 |
+
"learning_rate": 0.00014737894600854628,
|
12087 |
+
"loss": 1.3588,
|
12088 |
+
"step": 1721
|
12089 |
+
},
|
12090 |
+
{
|
12091 |
+
"epoch": 3.9450643776824035,
|
12092 |
+
"grad_norm": 1.0359668731689453,
|
12093 |
+
"learning_rate": 0.00014732349360971586,
|
12094 |
+
"loss": 1.226,
|
12095 |
+
"step": 1722
|
12096 |
+
},
|
12097 |
+
{
|
12098 |
+
"epoch": 3.9473533619456367,
|
12099 |
+
"grad_norm": 1.0823783874511719,
|
12100 |
+
"learning_rate": 0.0001472680224533645,
|
12101 |
+
"loss": 1.2084,
|
12102 |
+
"step": 1723
|
12103 |
+
},
|
12104 |
+
{
|
12105 |
+
"epoch": 3.94964234620887,
|
12106 |
+
"grad_norm": 0.9512320756912231,
|
12107 |
+
"learning_rate": 0.00014721253256147934,
|
12108 |
+
"loss": 1.0636,
|
12109 |
+
"step": 1724
|
12110 |
+
},
|
12111 |
+
{
|
12112 |
+
"epoch": 3.951931330472103,
|
12113 |
+
"grad_norm": 1.0435305833816528,
|
12114 |
+
"learning_rate": 0.0001471570239560547,
|
12115 |
+
"loss": 1.0408,
|
12116 |
+
"step": 1725
|
12117 |
+
},
|
12118 |
+
{
|
12119 |
+
"epoch": 3.9542203147353363,
|
12120 |
+
"grad_norm": 0.9986626505851746,
|
12121 |
+
"learning_rate": 0.00014710149665909242,
|
12122 |
+
"loss": 0.9198,
|
12123 |
+
"step": 1726
|
12124 |
+
},
|
12125 |
+
{
|
12126 |
+
"epoch": 3.9565092989985695,
|
12127 |
+
"grad_norm": 1.0072948932647705,
|
12128 |
+
"learning_rate": 0.00014704595069260182,
|
12129 |
+
"loss": 0.8214,
|
12130 |
+
"step": 1727
|
12131 |
+
},
|
12132 |
+
{
|
12133 |
+
"epoch": 3.9587982832618027,
|
12134 |
+
"grad_norm": 1.0602082014083862,
|
12135 |
+
"learning_rate": 0.00014699038607859946,
|
12136 |
+
"loss": 0.8308,
|
12137 |
+
"step": 1728
|
12138 |
+
},
|
12139 |
+
{
|
12140 |
+
"epoch": 3.961087267525036,
|
12141 |
+
"grad_norm": 1.0133410692214966,
|
12142 |
+
"learning_rate": 0.00014693480283910941,
|
12143 |
+
"loss": 0.8048,
|
12144 |
+
"step": 1729
|
12145 |
+
},
|
12146 |
+
{
|
12147 |
+
"epoch": 3.9633762517882687,
|
12148 |
+
"grad_norm": 1.1373006105422974,
|
12149 |
+
"learning_rate": 0.00014687920099616311,
|
12150 |
+
"loss": 0.984,
|
12151 |
+
"step": 1730
|
12152 |
+
},
|
12153 |
+
{
|
12154 |
+
"epoch": 3.9656652360515023,
|
12155 |
+
"grad_norm": 1.101943016052246,
|
12156 |
+
"learning_rate": 0.00014682358057179932,
|
12157 |
+
"loss": 0.8476,
|
12158 |
+
"step": 1731
|
12159 |
+
},
|
12160 |
+
{
|
12161 |
+
"epoch": 3.967954220314735,
|
12162 |
+
"grad_norm": 1.243362307548523,
|
12163 |
+
"learning_rate": 0.00014676794158806423,
|
12164 |
+
"loss": 0.8338,
|
12165 |
+
"step": 1732
|
12166 |
+
},
|
12167 |
+
{
|
12168 |
+
"epoch": 3.9702432045779688,
|
12169 |
+
"grad_norm": 1.2640669345855713,
|
12170 |
+
"learning_rate": 0.00014671228406701133,
|
12171 |
+
"loss": 0.8639,
|
12172 |
+
"step": 1733
|
12173 |
+
},
|
12174 |
+
{
|
12175 |
+
"epoch": 3.9725321888412015,
|
12176 |
+
"grad_norm": 1.2642742395401,
|
12177 |
+
"learning_rate": 0.00014665660803070153,
|
12178 |
+
"loss": 0.7593,
|
12179 |
+
"step": 1734
|
12180 |
+
},
|
12181 |
+
{
|
12182 |
+
"epoch": 3.974821173104435,
|
12183 |
+
"grad_norm": 1.2393301725387573,
|
12184 |
+
"learning_rate": 0.00014660091350120297,
|
12185 |
+
"loss": 0.532,
|
12186 |
+
"step": 1735
|
12187 |
+
},
|
12188 |
+
{
|
12189 |
+
"epoch": 3.977110157367668,
|
12190 |
+
"grad_norm": 1.4510412216186523,
|
12191 |
+
"learning_rate": 0.00014654520050059121,
|
12192 |
+
"loss": 0.6041,
|
12193 |
+
"step": 1736
|
12194 |
+
},
|
12195 |
+
{
|
12196 |
+
"epoch": 3.979399141630901,
|
12197 |
+
"grad_norm": 1.30900239944458,
|
12198 |
+
"learning_rate": 0.00014648946905094912,
|
12199 |
+
"loss": 0.3425,
|
12200 |
+
"step": 1737
|
12201 |
+
},
|
12202 |
+
{
|
12203 |
+
"epoch": 3.9816881258941343,
|
12204 |
+
"grad_norm": 1.4096959829330444,
|
12205 |
+
"learning_rate": 0.00014643371917436683,
|
12206 |
+
"loss": 0.4687,
|
12207 |
+
"step": 1738
|
12208 |
+
},
|
12209 |
+
{
|
12210 |
+
"epoch": 3.9839771101573676,
|
12211 |
+
"grad_norm": 1.5130640268325806,
|
12212 |
+
"learning_rate": 0.00014637795089294186,
|
12213 |
+
"loss": 0.4595,
|
12214 |
+
"step": 1739
|
12215 |
+
},
|
12216 |
+
{
|
12217 |
+
"epoch": 3.9862660944206008,
|
12218 |
+
"grad_norm": 1.5498608350753784,
|
12219 |
+
"learning_rate": 0.0001463221642287789,
|
12220 |
+
"loss": 0.4417,
|
12221 |
+
"step": 1740
|
12222 |
+
},
|
12223 |
+
{
|
12224 |
+
"epoch": 3.988555078683834,
|
12225 |
+
"grad_norm": 1.3768302202224731,
|
12226 |
+
"learning_rate": 0.00014626635920399008,
|
12227 |
+
"loss": 0.3324,
|
12228 |
+
"step": 1741
|
12229 |
+
},
|
12230 |
+
{
|
12231 |
+
"epoch": 3.990844062947067,
|
12232 |
+
"grad_norm": 1.9338836669921875,
|
12233 |
+
"learning_rate": 0.00014621053584069468,
|
12234 |
+
"loss": 0.3389,
|
12235 |
+
"step": 1742
|
12236 |
+
},
|
12237 |
+
{
|
12238 |
+
"epoch": 3.9931330472103004,
|
12239 |
+
"grad_norm": 1.5647732019424438,
|
12240 |
+
"learning_rate": 0.0001461546941610193,
|
12241 |
+
"loss": 0.2612,
|
12242 |
+
"step": 1743
|
12243 |
+
},
|
12244 |
+
{
|
12245 |
+
"epoch": 3.9954220314735336,
|
12246 |
+
"grad_norm": 2.102107524871826,
|
12247 |
+
"learning_rate": 0.00014609883418709784,
|
12248 |
+
"loss": 0.3016,
|
12249 |
+
"step": 1744
|
12250 |
+
},
|
12251 |
+
{
|
12252 |
+
"epoch": 3.997711015736767,
|
12253 |
+
"grad_norm": 1.862336277961731,
|
12254 |
+
"learning_rate": 0.00014604295594107137,
|
12255 |
+
"loss": 0.1563,
|
12256 |
+
"step": 1745
|
12257 |
+
},
|
12258 |
+
{
|
12259 |
+
"epoch": 4.001144492131616,
|
12260 |
+
"grad_norm": 5.682592391967773,
|
12261 |
+
"learning_rate": 0.00014598705944508824,
|
12262 |
+
"loss": 1.076,
|
12263 |
+
"step": 1746
|
12264 |
+
},
|
12265 |
+
{
|
12266 |
+
"epoch": 4.00343347639485,
|
12267 |
+
"grad_norm": 0.44566667079925537,
|
12268 |
+
"learning_rate": 0.00014593114472130406,
|
12269 |
+
"loss": 1.6827,
|
12270 |
+
"step": 1747
|
12271 |
+
},
|
12272 |
+
{
|
12273 |
+
"epoch": 4.005722460658083,
|
12274 |
+
"grad_norm": 0.4679805040359497,
|
12275 |
+
"learning_rate": 0.0001458752117918816,
|
12276 |
+
"loss": 1.851,
|
12277 |
+
"step": 1748
|
12278 |
+
},
|
12279 |
+
{
|
12280 |
+
"epoch": 4.0080114449213164,
|
12281 |
+
"grad_norm": 0.5022954344749451,
|
12282 |
+
"learning_rate": 0.0001458192606789909,
|
12283 |
+
"loss": 1.567,
|
12284 |
+
"step": 1749
|
12285 |
+
},
|
12286 |
+
{
|
12287 |
+
"epoch": 4.010300429184549,
|
12288 |
+
"grad_norm": 0.5355679392814636,
|
12289 |
+
"learning_rate": 0.00014576329140480925,
|
12290 |
+
"loss": 1.2447,
|
12291 |
+
"step": 1750
|
12292 |
+
},
|
12293 |
+
{
|
12294 |
+
"epoch": 4.012589413447783,
|
12295 |
+
"grad_norm": 0.6063524484634399,
|
12296 |
+
"learning_rate": 0.000145707303991521,
|
12297 |
+
"loss": 1.5429,
|
12298 |
+
"step": 1751
|
12299 |
+
},
|
12300 |
+
{
|
12301 |
+
"epoch": 4.014878397711016,
|
12302 |
+
"grad_norm": 0.6033669710159302,
|
12303 |
+
"learning_rate": 0.00014565129846131784,
|
12304 |
+
"loss": 1.2655,
|
12305 |
+
"step": 1752
|
12306 |
+
},
|
12307 |
+
{
|
12308 |
+
"epoch": 4.017167381974249,
|
12309 |
+
"grad_norm": 0.6651263236999512,
|
12310 |
+
"learning_rate": 0.0001455952748363985,
|
12311 |
+
"loss": 1.3036,
|
12312 |
+
"step": 1753
|
12313 |
+
},
|
12314 |
+
{
|
12315 |
+
"epoch": 4.019456366237482,
|
12316 |
+
"grad_norm": 0.6865150332450867,
|
12317 |
+
"learning_rate": 0.00014553923313896902,
|
12318 |
+
"loss": 1.2273,
|
12319 |
+
"step": 1754
|
12320 |
+
},
|
12321 |
+
{
|
12322 |
+
"epoch": 4.021745350500716,
|
12323 |
+
"grad_norm": 0.8032099604606628,
|
12324 |
+
"learning_rate": 0.00014548317339124252,
|
12325 |
+
"loss": 1.3473,
|
12326 |
+
"step": 1755
|
12327 |
+
},
|
12328 |
+
{
|
12329 |
+
"epoch": 4.0240343347639485,
|
12330 |
+
"grad_norm": 0.7824806571006775,
|
12331 |
+
"learning_rate": 0.0001454270956154393,
|
12332 |
+
"loss": 1.1447,
|
12333 |
+
"step": 1756
|
12334 |
+
},
|
12335 |
+
{
|
12336 |
+
"epoch": 4.026323319027182,
|
12337 |
+
"grad_norm": 0.816189169883728,
|
12338 |
+
"learning_rate": 0.00014537099983378675,
|
12339 |
+
"loss": 1.1298,
|
12340 |
+
"step": 1757
|
12341 |
+
},
|
12342 |
+
{
|
12343 |
+
"epoch": 4.028612303290415,
|
12344 |
+
"grad_norm": 0.8846980333328247,
|
12345 |
+
"learning_rate": 0.0001453148860685195,
|
12346 |
+
"loss": 1.1162,
|
12347 |
+
"step": 1758
|
12348 |
+
},
|
12349 |
+
{
|
12350 |
+
"epoch": 4.030901287553648,
|
12351 |
+
"grad_norm": 0.9195663332939148,
|
12352 |
+
"learning_rate": 0.00014525875434187924,
|
12353 |
+
"loss": 1.1653,
|
12354 |
+
"step": 1759
|
12355 |
+
},
|
12356 |
+
{
|
12357 |
+
"epoch": 4.033190271816881,
|
12358 |
+
"grad_norm": 0.9265051484107971,
|
12359 |
+
"learning_rate": 0.0001452026046761148,
|
12360 |
+
"loss": 1.0801,
|
12361 |
+
"step": 1760
|
12362 |
+
},
|
12363 |
+
{
|
12364 |
+
"epoch": 4.035479256080114,
|
12365 |
+
"grad_norm": 1.041785717010498,
|
12366 |
+
"learning_rate": 0.0001451464370934821,
|
12367 |
+
"loss": 1.0668,
|
12368 |
+
"step": 1761
|
12369 |
+
},
|
12370 |
+
{
|
12371 |
+
"epoch": 4.037768240343348,
|
12372 |
+
"grad_norm": 0.9964609742164612,
|
12373 |
+
"learning_rate": 0.00014509025161624412,
|
12374 |
+
"loss": 0.9903,
|
12375 |
+
"step": 1762
|
12376 |
+
},
|
12377 |
+
{
|
12378 |
+
"epoch": 4.0400572246065805,
|
12379 |
+
"grad_norm": 1.0553271770477295,
|
12380 |
+
"learning_rate": 0.0001450340482666711,
|
12381 |
+
"loss": 0.8965,
|
12382 |
+
"step": 1763
|
12383 |
+
},
|
12384 |
+
{
|
12385 |
+
"epoch": 4.042346208869814,
|
12386 |
+
"grad_norm": 1.1435829401016235,
|
12387 |
+
"learning_rate": 0.00014497782706704017,
|
12388 |
+
"loss": 0.8686,
|
12389 |
+
"step": 1764
|
12390 |
+
},
|
12391 |
+
{
|
12392 |
+
"epoch": 4.044635193133047,
|
12393 |
+
"grad_norm": 1.0699126720428467,
|
12394 |
+
"learning_rate": 0.00014492158803963568,
|
12395 |
+
"loss": 0.7944,
|
12396 |
+
"step": 1765
|
12397 |
+
},
|
12398 |
+
{
|
12399 |
+
"epoch": 4.0469241773962805,
|
12400 |
+
"grad_norm": 1.1480063199996948,
|
12401 |
+
"learning_rate": 0.00014486533120674892,
|
12402 |
+
"loss": 0.8461,
|
12403 |
+
"step": 1766
|
12404 |
+
},
|
12405 |
+
{
|
12406 |
+
"epoch": 4.049213161659513,
|
12407 |
+
"grad_norm": 1.246055006980896,
|
12408 |
+
"learning_rate": 0.00014480905659067833,
|
12409 |
+
"loss": 0.776,
|
12410 |
+
"step": 1767
|
12411 |
+
},
|
12412 |
+
{
|
12413 |
+
"epoch": 4.051502145922747,
|
12414 |
+
"grad_norm": 1.2290637493133545,
|
12415 |
+
"learning_rate": 0.00014475276421372935,
|
12416 |
+
"loss": 0.7211,
|
12417 |
+
"step": 1768
|
12418 |
+
},
|
12419 |
+
{
|
12420 |
+
"epoch": 4.05379113018598,
|
12421 |
+
"grad_norm": 1.1152641773223877,
|
12422 |
+
"learning_rate": 0.00014469645409821455,
|
12423 |
+
"loss": 0.5442,
|
12424 |
+
"step": 1769
|
12425 |
+
},
|
12426 |
+
{
|
12427 |
+
"epoch": 4.056080114449213,
|
12428 |
+
"grad_norm": 1.152429461479187,
|
12429 |
+
"learning_rate": 0.00014464012626645335,
|
12430 |
+
"loss": 0.5681,
|
12431 |
+
"step": 1770
|
12432 |
+
},
|
12433 |
+
{
|
12434 |
+
"epoch": 4.058369098712446,
|
12435 |
+
"grad_norm": 1.2199641466140747,
|
12436 |
+
"learning_rate": 0.00014458378074077243,
|
12437 |
+
"loss": 0.5907,
|
12438 |
+
"step": 1771
|
12439 |
+
},
|
12440 |
+
{
|
12441 |
+
"epoch": 4.06065808297568,
|
12442 |
+
"grad_norm": 1.0936607122421265,
|
12443 |
+
"learning_rate": 0.00014452741754350526,
|
12444 |
+
"loss": 0.4856,
|
12445 |
+
"step": 1772
|
12446 |
+
},
|
12447 |
+
{
|
12448 |
+
"epoch": 4.0629470672389125,
|
12449 |
+
"grad_norm": 1.371535301208496,
|
12450 |
+
"learning_rate": 0.00014447103669699242,
|
12451 |
+
"loss": 0.544,
|
12452 |
+
"step": 1773
|
12453 |
+
},
|
12454 |
+
{
|
12455 |
+
"epoch": 4.065236051502146,
|
12456 |
+
"grad_norm": 1.1755516529083252,
|
12457 |
+
"learning_rate": 0.0001444146382235815,
|
12458 |
+
"loss": 0.4203,
|
12459 |
+
"step": 1774
|
12460 |
+
},
|
12461 |
+
{
|
12462 |
+
"epoch": 4.067525035765379,
|
12463 |
+
"grad_norm": 1.295109748840332,
|
12464 |
+
"learning_rate": 0.00014435822214562704,
|
12465 |
+
"loss": 0.4168,
|
12466 |
+
"step": 1775
|
12467 |
+
},
|
12468 |
+
{
|
12469 |
+
"epoch": 4.069814020028613,
|
12470 |
+
"grad_norm": 1.1494113206863403,
|
12471 |
+
"learning_rate": 0.0001443017884854906,
|
12472 |
+
"loss": 0.3503,
|
12473 |
+
"step": 1776
|
12474 |
+
},
|
12475 |
+
{
|
12476 |
+
"epoch": 4.072103004291845,
|
12477 |
+
"grad_norm": 1.2138291597366333,
|
12478 |
+
"learning_rate": 0.00014424533726554065,
|
12479 |
+
"loss": 0.3473,
|
12480 |
+
"step": 1777
|
12481 |
+
},
|
12482 |
+
{
|
12483 |
+
"epoch": 4.074391988555079,
|
12484 |
+
"grad_norm": 1.2661527395248413,
|
12485 |
+
"learning_rate": 0.00014418886850815267,
|
12486 |
+
"loss": 0.3394,
|
12487 |
+
"step": 1778
|
12488 |
+
},
|
12489 |
+
{
|
12490 |
+
"epoch": 4.076680972818312,
|
12491 |
+
"grad_norm": 1.399715781211853,
|
12492 |
+
"learning_rate": 0.000144132382235709,
|
12493 |
+
"loss": 0.3488,
|
12494 |
+
"step": 1779
|
12495 |
+
},
|
12496 |
+
{
|
12497 |
+
"epoch": 4.078969957081545,
|
12498 |
+
"grad_norm": 1.4522619247436523,
|
12499 |
+
"learning_rate": 0.00014407587847059913,
|
12500 |
+
"loss": 0.2779,
|
12501 |
+
"step": 1780
|
12502 |
+
},
|
12503 |
+
{
|
12504 |
+
"epoch": 4.081258941344778,
|
12505 |
+
"grad_norm": 1.2140947580337524,
|
12506 |
+
"learning_rate": 0.00014401935723521928,
|
12507 |
+
"loss": 0.2534,
|
12508 |
+
"step": 1781
|
12509 |
+
},
|
12510 |
+
{
|
12511 |
+
"epoch": 4.083547925608012,
|
12512 |
+
"grad_norm": 1.366463541984558,
|
12513 |
+
"learning_rate": 0.0001439628185519726,
|
12514 |
+
"loss": 0.2425,
|
12515 |
+
"step": 1782
|
12516 |
+
},
|
12517 |
+
{
|
12518 |
+
"epoch": 4.085836909871245,
|
12519 |
+
"grad_norm": 1.3253388404846191,
|
12520 |
+
"learning_rate": 0.00014390626244326925,
|
12521 |
+
"loss": 0.2125,
|
12522 |
+
"step": 1783
|
12523 |
+
},
|
12524 |
+
{
|
12525 |
+
"epoch": 4.088125894134478,
|
12526 |
+
"grad_norm": 1.2972759008407593,
|
12527 |
+
"learning_rate": 0.00014384968893152634,
|
12528 |
+
"loss": 0.1956,
|
12529 |
+
"step": 1784
|
12530 |
+
},
|
12531 |
+
{
|
12532 |
+
"epoch": 4.090414878397711,
|
12533 |
+
"grad_norm": 1.161211609840393,
|
12534 |
+
"learning_rate": 0.0001437930980391677,
|
12535 |
+
"loss": 0.1329,
|
12536 |
+
"step": 1785
|
12537 |
+
},
|
12538 |
+
{
|
12539 |
+
"epoch": 4.092703862660944,
|
12540 |
+
"grad_norm": 1.3612005710601807,
|
12541 |
+
"learning_rate": 0.00014373648978862426,
|
12542 |
+
"loss": 0.1395,
|
12543 |
+
"step": 1786
|
12544 |
+
},
|
12545 |
+
{
|
12546 |
+
"epoch": 4.094992846924177,
|
12547 |
+
"grad_norm": 1.3977785110473633,
|
12548 |
+
"learning_rate": 0.0001436798642023336,
|
12549 |
+
"loss": 0.1925,
|
12550 |
+
"step": 1787
|
12551 |
+
},
|
12552 |
+
{
|
12553 |
+
"epoch": 4.09728183118741,
|
12554 |
+
"grad_norm": 1.3967665433883667,
|
12555 |
+
"learning_rate": 0.0001436232213027404,
|
12556 |
+
"loss": 0.1068,
|
12557 |
+
"step": 1788
|
12558 |
+
},
|
12559 |
+
{
|
12560 |
+
"epoch": 4.099570815450644,
|
12561 |
+
"grad_norm": 2.064885139465332,
|
12562 |
+
"learning_rate": 0.00014356656111229603,
|
12563 |
+
"loss": 0.1559,
|
12564 |
+
"step": 1789
|
12565 |
+
},
|
12566 |
+
{
|
12567 |
+
"epoch": 4.101859799713877,
|
12568 |
+
"grad_norm": 1.9638558626174927,
|
12569 |
+
"learning_rate": 0.00014350988365345878,
|
12570 |
+
"loss": 0.1913,
|
12571 |
+
"step": 1790
|
12572 |
+
},
|
12573 |
+
{
|
12574 |
+
"epoch": 4.10414878397711,
|
12575 |
+
"grad_norm": 2.244676113128662,
|
12576 |
+
"learning_rate": 0.00014345318894869383,
|
12577 |
+
"loss": 0.1793,
|
12578 |
+
"step": 1791
|
12579 |
+
},
|
12580 |
+
{
|
12581 |
+
"epoch": 4.106437768240343,
|
12582 |
+
"grad_norm": 2.107057809829712,
|
12583 |
+
"learning_rate": 0.0001433964770204731,
|
12584 |
+
"loss": 0.1607,
|
12585 |
+
"step": 1792
|
12586 |
+
},
|
12587 |
+
{
|
12588 |
+
"epoch": 4.108726752503577,
|
12589 |
+
"grad_norm": 2.0315301418304443,
|
12590 |
+
"learning_rate": 0.00014333974789127542,
|
12591 |
+
"loss": 0.0995,
|
12592 |
+
"step": 1793
|
12593 |
+
},
|
12594 |
+
{
|
12595 |
+
"epoch": 4.111015736766809,
|
12596 |
+
"grad_norm": 4.222057819366455,
|
12597 |
+
"learning_rate": 0.0001432830015835864,
|
12598 |
+
"loss": 0.2596,
|
12599 |
+
"step": 1794
|
12600 |
+
},
|
12601 |
+
{
|
12602 |
+
"epoch": 4.113304721030043,
|
12603 |
+
"grad_norm": 2.639320135116577,
|
12604 |
+
"learning_rate": 0.00014322623811989843,
|
12605 |
+
"loss": 0.2111,
|
12606 |
+
"step": 1795
|
12607 |
+
},
|
12608 |
+
{
|
12609 |
+
"epoch": 4.115593705293276,
|
12610 |
+
"grad_norm": 0.8525104522705078,
|
12611 |
+
"learning_rate": 0.0001431694575227108,
|
12612 |
+
"loss": 0.8591,
|
12613 |
+
"step": 1796
|
12614 |
+
},
|
12615 |
+
{
|
12616 |
+
"epoch": 4.1178826895565095,
|
12617 |
+
"grad_norm": 0.6069846153259277,
|
12618 |
+
"learning_rate": 0.00014311265981452946,
|
12619 |
+
"loss": 1.7719,
|
12620 |
+
"step": 1797
|
12621 |
+
},
|
12622 |
+
{
|
12623 |
+
"epoch": 4.120171673819742,
|
12624 |
+
"grad_norm": 0.6406371593475342,
|
12625 |
+
"learning_rate": 0.0001430558450178672,
|
12626 |
+
"loss": 1.6625,
|
12627 |
+
"step": 1798
|
12628 |
+
},
|
12629 |
+
{
|
12630 |
+
"epoch": 4.122460658082976,
|
12631 |
+
"grad_norm": 0.7228275537490845,
|
12632 |
+
"learning_rate": 0.00014299901315524368,
|
12633 |
+
"loss": 1.5061,
|
12634 |
+
"step": 1799
|
12635 |
+
},
|
12636 |
+
{
|
12637 |
+
"epoch": 4.124749642346209,
|
12638 |
+
"grad_norm": 0.7601426243782043,
|
12639 |
+
"learning_rate": 0.00014294216424918515,
|
12640 |
+
"loss": 1.4428,
|
12641 |
+
"step": 1800
|
12642 |
+
},
|
12643 |
+
{
|
12644 |
+
"epoch": 4.127038626609442,
|
12645 |
+
"grad_norm": 0.7522489428520203,
|
12646 |
+
"learning_rate": 0.00014288529832222474,
|
12647 |
+
"loss": 1.3279,
|
12648 |
+
"step": 1801
|
12649 |
+
},
|
12650 |
+
{
|
12651 |
+
"epoch": 4.129327610872675,
|
12652 |
+
"grad_norm": 0.7654016613960266,
|
12653 |
+
"learning_rate": 0.00014282841539690221,
|
12654 |
+
"loss": 1.2668,
|
12655 |
+
"step": 1802
|
12656 |
+
},
|
12657 |
+
{
|
12658 |
+
"epoch": 4.131616595135909,
|
12659 |
+
"grad_norm": 0.7934605479240417,
|
12660 |
+
"learning_rate": 0.00014277151549576427,
|
12661 |
+
"loss": 1.331,
|
12662 |
+
"step": 1803
|
12663 |
+
},
|
12664 |
+
{
|
12665 |
+
"epoch": 4.1339055793991415,
|
12666 |
+
"grad_norm": 0.8227596879005432,
|
12667 |
+
"learning_rate": 0.0001427145986413641,
|
12668 |
+
"loss": 1.2089,
|
12669 |
+
"step": 1804
|
12670 |
+
},
|
12671 |
+
{
|
12672 |
+
"epoch": 4.136194563662375,
|
12673 |
+
"grad_norm": 0.7974002361297607,
|
12674 |
+
"learning_rate": 0.0001426576648562618,
|
12675 |
+
"loss": 1.2156,
|
12676 |
+
"step": 1805
|
12677 |
+
},
|
12678 |
+
{
|
12679 |
+
"epoch": 4.138483547925608,
|
12680 |
+
"grad_norm": 0.8188871741294861,
|
12681 |
+
"learning_rate": 0.00014260071416302407,
|
12682 |
+
"loss": 1.1281,
|
12683 |
+
"step": 1806
|
12684 |
+
},
|
12685 |
+
{
|
12686 |
+
"epoch": 4.140772532188842,
|
12687 |
+
"grad_norm": 0.8775198459625244,
|
12688 |
+
"learning_rate": 0.00014254374658422434,
|
12689 |
+
"loss": 1.1846,
|
12690 |
+
"step": 1807
|
12691 |
+
},
|
12692 |
+
{
|
12693 |
+
"epoch": 4.143061516452074,
|
12694 |
+
"grad_norm": 0.8636072278022766,
|
12695 |
+
"learning_rate": 0.00014248676214244273,
|
12696 |
+
"loss": 0.969,
|
12697 |
+
"step": 1808
|
12698 |
+
},
|
12699 |
+
{
|
12700 |
+
"epoch": 4.145350500715308,
|
12701 |
+
"grad_norm": 0.9492164850234985,
|
12702 |
+
"learning_rate": 0.0001424297608602661,
|
12703 |
+
"loss": 1.0989,
|
12704 |
+
"step": 1809
|
12705 |
+
},
|
12706 |
+
{
|
12707 |
+
"epoch": 4.147639484978541,
|
12708 |
+
"grad_norm": 0.9339204430580139,
|
12709 |
+
"learning_rate": 0.0001423727427602879,
|
12710 |
+
"loss": 1.0414,
|
12711 |
+
"step": 1810
|
12712 |
+
},
|
12713 |
+
{
|
12714 |
+
"epoch": 4.149928469241774,
|
12715 |
+
"grad_norm": 0.9688881039619446,
|
12716 |
+
"learning_rate": 0.0001423157078651083,
|
12717 |
+
"loss": 1.0163,
|
12718 |
+
"step": 1811
|
12719 |
+
},
|
12720 |
+
{
|
12721 |
+
"epoch": 4.152217453505007,
|
12722 |
+
"grad_norm": 0.9714307188987732,
|
12723 |
+
"learning_rate": 0.0001422586561973341,
|
12724 |
+
"loss": 0.9926,
|
12725 |
+
"step": 1812
|
12726 |
+
},
|
12727 |
+
{
|
12728 |
+
"epoch": 4.154506437768241,
|
12729 |
+
"grad_norm": 1.0081877708435059,
|
12730 |
+
"learning_rate": 0.00014220158777957876,
|
12731 |
+
"loss": 0.8907,
|
12732 |
+
"step": 1813
|
12733 |
+
},
|
12734 |
+
{
|
12735 |
+
"epoch": 4.156795422031474,
|
12736 |
+
"grad_norm": 1.0111851692199707,
|
12737 |
+
"learning_rate": 0.00014214450263446234,
|
12738 |
+
"loss": 0.8518,
|
12739 |
+
"step": 1814
|
12740 |
+
},
|
12741 |
+
{
|
12742 |
+
"epoch": 4.159084406294706,
|
12743 |
+
"grad_norm": 1.0751018524169922,
|
12744 |
+
"learning_rate": 0.00014208740078461166,
|
12745 |
+
"loss": 0.8703,
|
12746 |
+
"step": 1815
|
12747 |
+
},
|
12748 |
+
{
|
12749 |
+
"epoch": 4.16137339055794,
|
12750 |
+
"grad_norm": 1.1573535203933716,
|
12751 |
+
"learning_rate": 0.00014203028225265999,
|
12752 |
+
"loss": 0.8156,
|
12753 |
+
"step": 1816
|
12754 |
+
},
|
12755 |
+
{
|
12756 |
+
"epoch": 4.163662374821173,
|
12757 |
+
"grad_norm": 1.0764294862747192,
|
12758 |
+
"learning_rate": 0.00014197314706124729,
|
12759 |
+
"loss": 0.8363,
|
12760 |
+
"step": 1817
|
12761 |
+
},
|
12762 |
+
{
|
12763 |
+
"epoch": 4.165951359084406,
|
12764 |
+
"grad_norm": 1.1808897256851196,
|
12765 |
+
"learning_rate": 0.00014191599523302018,
|
12766 |
+
"loss": 0.7065,
|
12767 |
+
"step": 1818
|
12768 |
+
},
|
12769 |
+
{
|
12770 |
+
"epoch": 4.168240343347639,
|
12771 |
+
"grad_norm": 1.208436369895935,
|
12772 |
+
"learning_rate": 0.00014185882679063175,
|
12773 |
+
"loss": 0.6565,
|
12774 |
+
"step": 1819
|
12775 |
+
},
|
12776 |
+
{
|
12777 |
+
"epoch": 4.170529327610873,
|
12778 |
+
"grad_norm": 1.145291805267334,
|
12779 |
+
"learning_rate": 0.00014180164175674184,
|
12780 |
+
"loss": 0.6326,
|
12781 |
+
"step": 1820
|
12782 |
+
},
|
12783 |
+
{
|
12784 |
+
"epoch": 4.172818311874106,
|
12785 |
+
"grad_norm": 1.1896421909332275,
|
12786 |
+
"learning_rate": 0.00014174444015401666,
|
12787 |
+
"loss": 0.6041,
|
12788 |
+
"step": 1821
|
12789 |
+
},
|
12790 |
+
{
|
12791 |
+
"epoch": 4.175107296137339,
|
12792 |
+
"grad_norm": 1.1539980173110962,
|
12793 |
+
"learning_rate": 0.00014168722200512917,
|
12794 |
+
"loss": 0.5126,
|
12795 |
+
"step": 1822
|
12796 |
+
},
|
12797 |
+
{
|
12798 |
+
"epoch": 4.177396280400572,
|
12799 |
+
"grad_norm": 1.2064073085784912,
|
12800 |
+
"learning_rate": 0.00014162998733275876,
|
12801 |
+
"loss": 0.547,
|
12802 |
+
"step": 1823
|
12803 |
+
},
|
12804 |
+
{
|
12805 |
+
"epoch": 4.179685264663806,
|
12806 |
+
"grad_norm": 1.160438895225525,
|
12807 |
+
"learning_rate": 0.0001415727361595915,
|
12808 |
+
"loss": 0.4952,
|
12809 |
+
"step": 1824
|
12810 |
+
},
|
12811 |
+
{
|
12812 |
+
"epoch": 4.181974248927038,
|
12813 |
+
"grad_norm": 1.2637494802474976,
|
12814 |
+
"learning_rate": 0.00014151546850831988,
|
12815 |
+
"loss": 0.5077,
|
12816 |
+
"step": 1825
|
12817 |
+
},
|
12818 |
+
{
|
12819 |
+
"epoch": 4.184263233190272,
|
12820 |
+
"grad_norm": 1.291296362876892,
|
12821 |
+
"learning_rate": 0.00014145818440164298,
|
12822 |
+
"loss": 0.4651,
|
12823 |
+
"step": 1826
|
12824 |
+
},
|
12825 |
+
{
|
12826 |
+
"epoch": 4.186552217453505,
|
12827 |
+
"grad_norm": 1.1155532598495483,
|
12828 |
+
"learning_rate": 0.00014140088386226638,
|
12829 |
+
"loss": 0.3349,
|
12830 |
+
"step": 1827
|
12831 |
+
},
|
12832 |
+
{
|
12833 |
+
"epoch": 4.1888412017167385,
|
12834 |
+
"grad_norm": 1.3315963745117188,
|
12835 |
+
"learning_rate": 0.00014134356691290217,
|
12836 |
+
"loss": 0.3803,
|
12837 |
+
"step": 1828
|
12838 |
+
},
|
12839 |
+
{
|
12840 |
+
"epoch": 4.191130185979971,
|
12841 |
+
"grad_norm": 1.1778143644332886,
|
12842 |
+
"learning_rate": 0.00014128623357626897,
|
12843 |
+
"loss": 0.3056,
|
12844 |
+
"step": 1829
|
12845 |
+
},
|
12846 |
+
{
|
12847 |
+
"epoch": 4.193419170243205,
|
12848 |
+
"grad_norm": 1.4100370407104492,
|
12849 |
+
"learning_rate": 0.00014122888387509193,
|
12850 |
+
"loss": 0.2322,
|
12851 |
+
"step": 1830
|
12852 |
+
},
|
12853 |
+
{
|
12854 |
+
"epoch": 4.195708154506438,
|
12855 |
+
"grad_norm": 1.3123832941055298,
|
12856 |
+
"learning_rate": 0.00014117151783210255,
|
12857 |
+
"loss": 0.2406,
|
12858 |
+
"step": 1831
|
12859 |
+
},
|
12860 |
+
{
|
12861 |
+
"epoch": 4.197997138769671,
|
12862 |
+
"grad_norm": 1.4539953470230103,
|
12863 |
+
"learning_rate": 0.00014111413547003896,
|
12864 |
+
"loss": 0.2775,
|
12865 |
+
"step": 1832
|
12866 |
+
},
|
12867 |
+
{
|
12868 |
+
"epoch": 4.200286123032904,
|
12869 |
+
"grad_norm": 1.5130772590637207,
|
12870 |
+
"learning_rate": 0.00014105673681164569,
|
12871 |
+
"loss": 0.2328,
|
12872 |
+
"step": 1833
|
12873 |
+
},
|
12874 |
+
{
|
12875 |
+
"epoch": 4.202575107296138,
|
12876 |
+
"grad_norm": 1.4674046039581299,
|
12877 |
+
"learning_rate": 0.00014099932187967368,
|
12878 |
+
"loss": 0.1935,
|
12879 |
+
"step": 1834
|
12880 |
+
},
|
12881 |
+
{
|
12882 |
+
"epoch": 4.2048640915593705,
|
12883 |
+
"grad_norm": 1.2991887331008911,
|
12884 |
+
"learning_rate": 0.00014094189069688046,
|
12885 |
+
"loss": 0.1563,
|
12886 |
+
"step": 1835
|
12887 |
+
},
|
12888 |
+
{
|
12889 |
+
"epoch": 4.207153075822604,
|
12890 |
+
"grad_norm": 1.2331081628799438,
|
12891 |
+
"learning_rate": 0.00014088444328602985,
|
12892 |
+
"loss": 0.1224,
|
12893 |
+
"step": 1836
|
12894 |
+
},
|
12895 |
+
{
|
12896 |
+
"epoch": 4.209442060085837,
|
12897 |
+
"grad_norm": 1.1798337697982788,
|
12898 |
+
"learning_rate": 0.00014082697966989223,
|
12899 |
+
"loss": 0.126,
|
12900 |
+
"step": 1837
|
12901 |
+
},
|
12902 |
+
{
|
12903 |
+
"epoch": 4.2117310443490705,
|
12904 |
+
"grad_norm": 1.1834417581558228,
|
12905 |
+
"learning_rate": 0.00014076949987124423,
|
12906 |
+
"loss": 0.1049,
|
12907 |
+
"step": 1838
|
12908 |
+
},
|
12909 |
+
{
|
12910 |
+
"epoch": 4.214020028612303,
|
12911 |
+
"grad_norm": 1.417945146560669,
|
12912 |
+
"learning_rate": 0.00014071200391286914,
|
12913 |
+
"loss": 0.1426,
|
12914 |
+
"step": 1839
|
12915 |
+
},
|
12916 |
+
{
|
12917 |
+
"epoch": 4.216309012875536,
|
12918 |
+
"grad_norm": 1.5263046026229858,
|
12919 |
+
"learning_rate": 0.00014065449181755642,
|
12920 |
+
"loss": 0.1735,
|
12921 |
+
"step": 1840
|
12922 |
+
},
|
12923 |
+
{
|
12924 |
+
"epoch": 4.21859799713877,
|
12925 |
+
"grad_norm": 2.0970356464385986,
|
12926 |
+
"learning_rate": 0.00014059696360810208,
|
12927 |
+
"loss": 0.1309,
|
12928 |
+
"step": 1841
|
12929 |
+
},
|
12930 |
+
{
|
12931 |
+
"epoch": 4.2208869814020025,
|
12932 |
+
"grad_norm": 2.2149500846862793,
|
12933 |
+
"learning_rate": 0.00014053941930730846,
|
12934 |
+
"loss": 0.1119,
|
12935 |
+
"step": 1842
|
12936 |
+
},
|
12937 |
+
{
|
12938 |
+
"epoch": 4.223175965665236,
|
12939 |
+
"grad_norm": 2.0250422954559326,
|
12940 |
+
"learning_rate": 0.00014048185893798424,
|
12941 |
+
"loss": 0.122,
|
12942 |
+
"step": 1843
|
12943 |
+
},
|
12944 |
+
{
|
12945 |
+
"epoch": 4.225464949928469,
|
12946 |
+
"grad_norm": 3.0951108932495117,
|
12947 |
+
"learning_rate": 0.00014042428252294457,
|
12948 |
+
"loss": 0.1005,
|
12949 |
+
"step": 1844
|
12950 |
+
},
|
12951 |
+
{
|
12952 |
+
"epoch": 4.2277539341917025,
|
12953 |
+
"grad_norm": 2.9070775508880615,
|
12954 |
+
"learning_rate": 0.0001403666900850109,
|
12955 |
+
"loss": 0.2827,
|
12956 |
+
"step": 1845
|
12957 |
+
},
|
12958 |
+
{
|
12959 |
+
"epoch": 4.230042918454935,
|
12960 |
+
"grad_norm": 0.9913147687911987,
|
12961 |
+
"learning_rate": 0.00014030908164701098,
|
12962 |
+
"loss": 0.9888,
|
12963 |
+
"step": 1846
|
12964 |
+
},
|
12965 |
+
{
|
12966 |
+
"epoch": 4.232331902718169,
|
12967 |
+
"grad_norm": 0.5928571224212646,
|
12968 |
+
"learning_rate": 0.00014025145723177895,
|
12969 |
+
"loss": 1.6881,
|
12970 |
+
"step": 1847
|
12971 |
+
},
|
12972 |
+
{
|
12973 |
+
"epoch": 4.234620886981402,
|
12974 |
+
"grad_norm": 0.6608178019523621,
|
12975 |
+
"learning_rate": 0.00014019381686215535,
|
12976 |
+
"loss": 1.7316,
|
12977 |
+
"step": 1848
|
12978 |
+
},
|
12979 |
+
{
|
12980 |
+
"epoch": 4.236909871244635,
|
12981 |
+
"grad_norm": 0.6736155152320862,
|
12982 |
+
"learning_rate": 0.00014013616056098694,
|
12983 |
+
"loss": 1.452,
|
12984 |
+
"step": 1849
|
12985 |
+
},
|
12986 |
+
{
|
12987 |
+
"epoch": 4.239198855507868,
|
12988 |
+
"grad_norm": 0.7292165160179138,
|
12989 |
+
"learning_rate": 0.00014007848835112687,
|
12990 |
+
"loss": 1.583,
|
12991 |
+
"step": 1850
|
12992 |
+
},
|
12993 |
+
{
|
12994 |
+
"epoch": 4.241487839771102,
|
12995 |
+
"grad_norm": 0.7763010263442993,
|
12996 |
+
"learning_rate": 0.00014002080025543451,
|
12997 |
+
"loss": 1.4831,
|
12998 |
+
"step": 1851
|
12999 |
+
},
|
13000 |
+
{
|
13001 |
+
"epoch": 4.2437768240343345,
|
13002 |
+
"grad_norm": 0.827728271484375,
|
13003 |
+
"learning_rate": 0.00013996309629677562,
|
13004 |
+
"loss": 1.52,
|
13005 |
+
"step": 1852
|
13006 |
+
},
|
13007 |
+
{
|
13008 |
+
"epoch": 4.246065808297568,
|
13009 |
+
"grad_norm": 0.8173840641975403,
|
13010 |
+
"learning_rate": 0.00013990537649802217,
|
13011 |
+
"loss": 1.31,
|
13012 |
+
"step": 1853
|
13013 |
+
},
|
13014 |
+
{
|
13015 |
+
"epoch": 4.248354792560801,
|
13016 |
+
"grad_norm": 0.8371086120605469,
|
13017 |
+
"learning_rate": 0.00013984764088205247,
|
13018 |
+
"loss": 1.259,
|
13019 |
+
"step": 1854
|
13020 |
+
},
|
13021 |
+
{
|
13022 |
+
"epoch": 4.250643776824035,
|
13023 |
+
"grad_norm": 0.8675817251205444,
|
13024 |
+
"learning_rate": 0.00013978988947175108,
|
13025 |
+
"loss": 1.2215,
|
13026 |
+
"step": 1855
|
13027 |
+
},
|
13028 |
+
{
|
13029 |
+
"epoch": 4.252932761087267,
|
13030 |
+
"grad_norm": 0.9176662564277649,
|
13031 |
+
"learning_rate": 0.0001397321222900088,
|
13032 |
+
"loss": 1.3494,
|
13033 |
+
"step": 1856
|
13034 |
+
},
|
13035 |
+
{
|
13036 |
+
"epoch": 4.255221745350501,
|
13037 |
+
"grad_norm": 0.8640986084938049,
|
13038 |
+
"learning_rate": 0.00013967433935972267,
|
13039 |
+
"loss": 0.9971,
|
13040 |
+
"step": 1857
|
13041 |
+
},
|
13042 |
+
{
|
13043 |
+
"epoch": 4.257510729613734,
|
13044 |
+
"grad_norm": 0.9197400212287903,
|
13045 |
+
"learning_rate": 0.00013961654070379604,
|
13046 |
+
"loss": 1.3061,
|
13047 |
+
"step": 1858
|
13048 |
+
},
|
13049 |
+
{
|
13050 |
+
"epoch": 4.259799713876967,
|
13051 |
+
"grad_norm": 0.9120035767555237,
|
13052 |
+
"learning_rate": 0.00013955872634513844,
|
13053 |
+
"loss": 0.9622,
|
13054 |
+
"step": 1859
|
13055 |
+
},
|
13056 |
+
{
|
13057 |
+
"epoch": 4.2620886981402,
|
13058 |
+
"grad_norm": 0.977082371711731,
|
13059 |
+
"learning_rate": 0.00013950089630666564,
|
13060 |
+
"loss": 1.1379,
|
13061 |
+
"step": 1860
|
13062 |
+
},
|
13063 |
+
{
|
13064 |
+
"epoch": 4.264377682403434,
|
13065 |
+
"grad_norm": 1.0064245462417603,
|
13066 |
+
"learning_rate": 0.00013944305061129957,
|
13067 |
+
"loss": 1.1149,
|
13068 |
+
"step": 1861
|
13069 |
+
},
|
13070 |
+
{
|
13071 |
+
"epoch": 4.266666666666667,
|
13072 |
+
"grad_norm": 1.0343989133834839,
|
13073 |
+
"learning_rate": 0.0001393851892819685,
|
13074 |
+
"loss": 1.0638,
|
13075 |
+
"step": 1862
|
13076 |
+
},
|
13077 |
+
{
|
13078 |
+
"epoch": 4.2689556509299,
|
13079 |
+
"grad_norm": 0.9780799746513367,
|
13080 |
+
"learning_rate": 0.00013932731234160675,
|
13081 |
+
"loss": 0.8629,
|
13082 |
+
"step": 1863
|
13083 |
+
},
|
13084 |
+
{
|
13085 |
+
"epoch": 4.271244635193133,
|
13086 |
+
"grad_norm": 1.014587163925171,
|
13087 |
+
"learning_rate": 0.00013926941981315486,
|
13088 |
+
"loss": 0.9844,
|
13089 |
+
"step": 1864
|
13090 |
+
},
|
13091 |
+
{
|
13092 |
+
"epoch": 4.273533619456366,
|
13093 |
+
"grad_norm": 1.0625803470611572,
|
13094 |
+
"learning_rate": 0.0001392115117195597,
|
13095 |
+
"loss": 0.8582,
|
13096 |
+
"step": 1865
|
13097 |
+
},
|
13098 |
+
{
|
13099 |
+
"epoch": 4.275822603719599,
|
13100 |
+
"grad_norm": 1.1477220058441162,
|
13101 |
+
"learning_rate": 0.00013915358808377405,
|
13102 |
+
"loss": 0.8825,
|
13103 |
+
"step": 1866
|
13104 |
+
},
|
13105 |
+
{
|
13106 |
+
"epoch": 4.278111587982833,
|
13107 |
+
"grad_norm": 1.1152499914169312,
|
13108 |
+
"learning_rate": 0.0001390956489287571,
|
13109 |
+
"loss": 0.6892,
|
13110 |
+
"step": 1867
|
13111 |
+
},
|
13112 |
+
{
|
13113 |
+
"epoch": 4.280400572246066,
|
13114 |
+
"grad_norm": 1.157660961151123,
|
13115 |
+
"learning_rate": 0.000139037694277474,
|
13116 |
+
"loss": 0.7486,
|
13117 |
+
"step": 1868
|
13118 |
+
},
|
13119 |
+
{
|
13120 |
+
"epoch": 4.282689556509299,
|
13121 |
+
"grad_norm": 1.1074392795562744,
|
13122 |
+
"learning_rate": 0.00013897972415289616,
|
13123 |
+
"loss": 0.6215,
|
13124 |
+
"step": 1869
|
13125 |
+
},
|
13126 |
+
{
|
13127 |
+
"epoch": 4.284978540772532,
|
13128 |
+
"grad_norm": 1.1611192226409912,
|
13129 |
+
"learning_rate": 0.00013892173857800108,
|
13130 |
+
"loss": 0.6177,
|
13131 |
+
"step": 1870
|
13132 |
+
},
|
13133 |
+
{
|
13134 |
+
"epoch": 4.287267525035765,
|
13135 |
+
"grad_norm": 1.1261109113693237,
|
13136 |
+
"learning_rate": 0.00013886373757577238,
|
13137 |
+
"loss": 0.5579,
|
13138 |
+
"step": 1871
|
13139 |
+
},
|
13140 |
+
{
|
13141 |
+
"epoch": 4.289556509298999,
|
13142 |
+
"grad_norm": 1.1944984197616577,
|
13143 |
+
"learning_rate": 0.00013880572116919983,
|
13144 |
+
"loss": 0.5132,
|
13145 |
+
"step": 1872
|
13146 |
+
},
|
13147 |
+
{
|
13148 |
+
"epoch": 4.291845493562231,
|
13149 |
+
"grad_norm": 1.183237075805664,
|
13150 |
+
"learning_rate": 0.00013874768938127924,
|
13151 |
+
"loss": 0.474,
|
13152 |
+
"step": 1873
|
13153 |
+
},
|
13154 |
+
{
|
13155 |
+
"epoch": 4.294134477825465,
|
13156 |
+
"grad_norm": 1.2248847484588623,
|
13157 |
+
"learning_rate": 0.00013868964223501262,
|
13158 |
+
"loss": 0.437,
|
13159 |
+
"step": 1874
|
13160 |
+
},
|
13161 |
+
{
|
13162 |
+
"epoch": 4.296423462088698,
|
13163 |
+
"grad_norm": 1.3775705099105835,
|
13164 |
+
"learning_rate": 0.00013863157975340791,
|
13165 |
+
"loss": 0.4138,
|
13166 |
+
"step": 1875
|
13167 |
+
},
|
13168 |
+
{
|
13169 |
+
"epoch": 4.2987124463519315,
|
13170 |
+
"grad_norm": 1.3187086582183838,
|
13171 |
+
"learning_rate": 0.00013857350195947932,
|
13172 |
+
"loss": 0.3899,
|
13173 |
+
"step": 1876
|
13174 |
+
},
|
13175 |
+
{
|
13176 |
+
"epoch": 4.301001430615164,
|
13177 |
+
"grad_norm": 1.2714444398880005,
|
13178 |
+
"learning_rate": 0.00013851540887624693,
|
13179 |
+
"loss": 0.3745,
|
13180 |
+
"step": 1877
|
13181 |
+
},
|
13182 |
+
{
|
13183 |
+
"epoch": 4.303290414878398,
|
13184 |
+
"grad_norm": 1.565042495727539,
|
13185 |
+
"learning_rate": 0.0001384573005267371,
|
13186 |
+
"loss": 0.3452,
|
13187 |
+
"step": 1878
|
13188 |
+
},
|
13189 |
+
{
|
13190 |
+
"epoch": 4.305579399141631,
|
13191 |
+
"grad_norm": 1.5059938430786133,
|
13192 |
+
"learning_rate": 0.00013839917693398202,
|
13193 |
+
"loss": 0.3695,
|
13194 |
+
"step": 1879
|
13195 |
+
},
|
13196 |
+
{
|
13197 |
+
"epoch": 4.307868383404864,
|
13198 |
+
"grad_norm": 1.5211122035980225,
|
13199 |
+
"learning_rate": 0.00013834103812102012,
|
13200 |
+
"loss": 0.3363,
|
13201 |
+
"step": 1880
|
13202 |
+
},
|
13203 |
+
{
|
13204 |
+
"epoch": 4.310157367668097,
|
13205 |
+
"grad_norm": 1.726873755455017,
|
13206 |
+
"learning_rate": 0.00013828288411089565,
|
13207 |
+
"loss": 0.2977,
|
13208 |
+
"step": 1881
|
13209 |
+
},
|
13210 |
+
{
|
13211 |
+
"epoch": 4.312446351931331,
|
13212 |
+
"grad_norm": 1.762004017829895,
|
13213 |
+
"learning_rate": 0.0001382247149266591,
|
13214 |
+
"loss": 0.289,
|
13215 |
+
"step": 1882
|
13216 |
+
},
|
13217 |
+
{
|
13218 |
+
"epoch": 4.3147353361945635,
|
13219 |
+
"grad_norm": 1.7233407497406006,
|
13220 |
+
"learning_rate": 0.00013816653059136683,
|
13221 |
+
"loss": 0.2772,
|
13222 |
+
"step": 1883
|
13223 |
+
},
|
13224 |
+
{
|
13225 |
+
"epoch": 4.317024320457797,
|
13226 |
+
"grad_norm": 1.2020918130874634,
|
13227 |
+
"learning_rate": 0.0001381083311280813,
|
13228 |
+
"loss": 0.1716,
|
13229 |
+
"step": 1884
|
13230 |
+
},
|
13231 |
+
{
|
13232 |
+
"epoch": 4.31931330472103,
|
13233 |
+
"grad_norm": 1.3072203397750854,
|
13234 |
+
"learning_rate": 0.00013805011655987083,
|
13235 |
+
"loss": 0.1881,
|
13236 |
+
"step": 1885
|
13237 |
+
},
|
13238 |
+
{
|
13239 |
+
"epoch": 4.321602288984264,
|
13240 |
+
"grad_norm": 1.6461601257324219,
|
13241 |
+
"learning_rate": 0.00013799188690980988,
|
13242 |
+
"loss": 0.2338,
|
13243 |
+
"step": 1886
|
13244 |
+
},
|
13245 |
+
{
|
13246 |
+
"epoch": 4.323891273247496,
|
13247 |
+
"grad_norm": 1.901731252670288,
|
13248 |
+
"learning_rate": 0.00013793364220097883,
|
13249 |
+
"loss": 0.2204,
|
13250 |
+
"step": 1887
|
13251 |
+
},
|
13252 |
+
{
|
13253 |
+
"epoch": 4.32618025751073,
|
13254 |
+
"grad_norm": 1.323927402496338,
|
13255 |
+
"learning_rate": 0.000137875382456464,
|
13256 |
+
"loss": 0.1716,
|
13257 |
+
"step": 1888
|
13258 |
+
},
|
13259 |
+
{
|
13260 |
+
"epoch": 4.328469241773963,
|
13261 |
+
"grad_norm": 1.318861722946167,
|
13262 |
+
"learning_rate": 0.00013781710769935772,
|
13263 |
+
"loss": 0.1424,
|
13264 |
+
"step": 1889
|
13265 |
+
},
|
13266 |
+
{
|
13267 |
+
"epoch": 4.330758226037196,
|
13268 |
+
"grad_norm": 1.673195719718933,
|
13269 |
+
"learning_rate": 0.00013775881795275816,
|
13270 |
+
"loss": 0.1442,
|
13271 |
+
"step": 1890
|
13272 |
+
},
|
13273 |
+
{
|
13274 |
+
"epoch": 4.333047210300429,
|
13275 |
+
"grad_norm": 1.337529182434082,
|
13276 |
+
"learning_rate": 0.0001377005132397696,
|
13277 |
+
"loss": 0.1543,
|
13278 |
+
"step": 1891
|
13279 |
+
},
|
13280 |
+
{
|
13281 |
+
"epoch": 4.335336194563663,
|
13282 |
+
"grad_norm": 2.0561656951904297,
|
13283 |
+
"learning_rate": 0.00013764219358350211,
|
13284 |
+
"loss": 0.1759,
|
13285 |
+
"step": 1892
|
13286 |
+
},
|
13287 |
+
{
|
13288 |
+
"epoch": 4.337625178826896,
|
13289 |
+
"grad_norm": 1.5654786825180054,
|
13290 |
+
"learning_rate": 0.0001375838590070718,
|
13291 |
+
"loss": 0.0971,
|
13292 |
+
"step": 1893
|
13293 |
+
},
|
13294 |
+
{
|
13295 |
+
"epoch": 4.339914163090128,
|
13296 |
+
"grad_norm": 3.0831820964813232,
|
13297 |
+
"learning_rate": 0.00013752550953360056,
|
13298 |
+
"loss": 0.0735,
|
13299 |
+
"step": 1894
|
13300 |
+
},
|
13301 |
+
{
|
13302 |
+
"epoch": 4.342203147353362,
|
13303 |
+
"grad_norm": 2.8486666679382324,
|
13304 |
+
"learning_rate": 0.0001374671451862163,
|
13305 |
+
"loss": 0.1149,
|
13306 |
+
"step": 1895
|
13307 |
+
},
|
13308 |
+
{
|
13309 |
+
"epoch": 4.344492131616595,
|
13310 |
+
"grad_norm": 1.0494810342788696,
|
13311 |
+
"learning_rate": 0.00013740876598805277,
|
13312 |
+
"loss": 0.9351,
|
13313 |
+
"step": 1896
|
13314 |
+
},
|
13315 |
+
{
|
13316 |
+
"epoch": 4.346781115879828,
|
13317 |
+
"grad_norm": 0.6020961999893188,
|
13318 |
+
"learning_rate": 0.00013735037196224963,
|
13319 |
+
"loss": 1.7943,
|
13320 |
+
"step": 1897
|
13321 |
+
},
|
13322 |
+
{
|
13323 |
+
"epoch": 4.349070100143061,
|
13324 |
+
"grad_norm": 0.6643849015235901,
|
13325 |
+
"learning_rate": 0.0001372919631319524,
|
13326 |
+
"loss": 1.7295,
|
13327 |
+
"step": 1898
|
13328 |
+
},
|
13329 |
+
{
|
13330 |
+
"epoch": 4.351359084406295,
|
13331 |
+
"grad_norm": 0.7122296094894409,
|
13332 |
+
"learning_rate": 0.00013723353952031246,
|
13333 |
+
"loss": 1.4286,
|
13334 |
+
"step": 1899
|
13335 |
+
},
|
13336 |
+
{
|
13337 |
+
"epoch": 4.353648068669528,
|
13338 |
+
"grad_norm": 0.7596356868743896,
|
13339 |
+
"learning_rate": 0.0001371751011504871,
|
13340 |
+
"loss": 1.4306,
|
13341 |
+
"step": 1900
|
13342 |
+
},
|
13343 |
+
{
|
13344 |
+
"epoch": 4.355937052932761,
|
13345 |
+
"grad_norm": 0.7785231471061707,
|
13346 |
+
"learning_rate": 0.00013711664804563935,
|
13347 |
+
"loss": 1.4756,
|
13348 |
+
"step": 1901
|
13349 |
+
},
|
13350 |
+
{
|
13351 |
+
"epoch": 4.358226037195994,
|
13352 |
+
"grad_norm": 0.7578595280647278,
|
13353 |
+
"learning_rate": 0.00013705818022893826,
|
13354 |
+
"loss": 1.2556,
|
13355 |
+
"step": 1902
|
13356 |
+
},
|
13357 |
+
{
|
13358 |
+
"epoch": 4.360515021459228,
|
13359 |
+
"grad_norm": 0.8627766966819763,
|
13360 |
+
"learning_rate": 0.00013699969772355852,
|
13361 |
+
"loss": 1.4254,
|
13362 |
+
"step": 1903
|
13363 |
+
},
|
13364 |
+
{
|
13365 |
+
"epoch": 4.36280400572246,
|
13366 |
+
"grad_norm": 0.8474234938621521,
|
13367 |
+
"learning_rate": 0.00013694120055268076,
|
13368 |
+
"loss": 1.2509,
|
13369 |
+
"step": 1904
|
13370 |
+
},
|
13371 |
+
{
|
13372 |
+
"epoch": 4.365092989985694,
|
13373 |
+
"grad_norm": 0.8625220656394958,
|
13374 |
+
"learning_rate": 0.00013688268873949134,
|
13375 |
+
"loss": 1.0913,
|
13376 |
+
"step": 1905
|
13377 |
+
},
|
13378 |
+
{
|
13379 |
+
"epoch": 4.367381974248927,
|
13380 |
+
"grad_norm": 0.9123669862747192,
|
13381 |
+
"learning_rate": 0.00013682416230718258,
|
13382 |
+
"loss": 1.085,
|
13383 |
+
"step": 1906
|
13384 |
+
},
|
13385 |
+
{
|
13386 |
+
"epoch": 4.3696709585121605,
|
13387 |
+
"grad_norm": 0.8739493489265442,
|
13388 |
+
"learning_rate": 0.00013676562127895237,
|
13389 |
+
"loss": 1.0619,
|
13390 |
+
"step": 1907
|
13391 |
+
},
|
13392 |
+
{
|
13393 |
+
"epoch": 4.371959942775393,
|
13394 |
+
"grad_norm": 0.9685913920402527,
|
13395 |
+
"learning_rate": 0.0001367070656780046,
|
13396 |
+
"loss": 1.2055,
|
13397 |
+
"step": 1908
|
13398 |
+
},
|
13399 |
+
{
|
13400 |
+
"epoch": 4.374248927038627,
|
13401 |
+
"grad_norm": 0.9431620836257935,
|
13402 |
+
"learning_rate": 0.00013664849552754877,
|
13403 |
+
"loss": 1.1056,
|
13404 |
+
"step": 1909
|
13405 |
+
},
|
13406 |
+
{
|
13407 |
+
"epoch": 4.37653791130186,
|
13408 |
+
"grad_norm": 0.9369290471076965,
|
13409 |
+
"learning_rate": 0.00013658991085080025,
|
13410 |
+
"loss": 1.0644,
|
13411 |
+
"step": 1910
|
13412 |
+
},
|
13413 |
+
{
|
13414 |
+
"epoch": 4.378826895565093,
|
13415 |
+
"grad_norm": 0.9952932000160217,
|
13416 |
+
"learning_rate": 0.00013653131167098013,
|
13417 |
+
"loss": 0.9128,
|
13418 |
+
"step": 1911
|
13419 |
+
},
|
13420 |
+
{
|
13421 |
+
"epoch": 4.381115879828326,
|
13422 |
+
"grad_norm": 1.0159112215042114,
|
13423 |
+
"learning_rate": 0.00013647269801131523,
|
13424 |
+
"loss": 0.9172,
|
13425 |
+
"step": 1912
|
13426 |
+
},
|
13427 |
+
{
|
13428 |
+
"epoch": 4.38340486409156,
|
13429 |
+
"grad_norm": 1.0917634963989258,
|
13430 |
+
"learning_rate": 0.00013641406989503815,
|
13431 |
+
"loss": 0.9761,
|
13432 |
+
"step": 1913
|
13433 |
+
},
|
13434 |
+
{
|
13435 |
+
"epoch": 4.3856938483547925,
|
13436 |
+
"grad_norm": 1.0909875631332397,
|
13437 |
+
"learning_rate": 0.00013635542734538725,
|
13438 |
+
"loss": 1.0508,
|
13439 |
+
"step": 1914
|
13440 |
+
},
|
13441 |
+
{
|
13442 |
+
"epoch": 4.387982832618026,
|
13443 |
+
"grad_norm": 1.0644196271896362,
|
13444 |
+
"learning_rate": 0.0001362967703856065,
|
13445 |
+
"loss": 0.8636,
|
13446 |
+
"step": 1915
|
13447 |
+
},
|
13448 |
+
{
|
13449 |
+
"epoch": 4.390271816881259,
|
13450 |
+
"grad_norm": 1.0008410215377808,
|
13451 |
+
"learning_rate": 0.00013623809903894562,
|
13452 |
+
"loss": 0.7,
|
13453 |
+
"step": 1916
|
13454 |
+
},
|
13455 |
+
{
|
13456 |
+
"epoch": 4.3925608011444925,
|
13457 |
+
"grad_norm": 1.0553101301193237,
|
13458 |
+
"learning_rate": 0.00013617941332866011,
|
13459 |
+
"loss": 0.903,
|
13460 |
+
"step": 1917
|
13461 |
+
},
|
13462 |
+
{
|
13463 |
+
"epoch": 4.394849785407725,
|
13464 |
+
"grad_norm": 1.1645140647888184,
|
13465 |
+
"learning_rate": 0.00013612071327801114,
|
13466 |
+
"loss": 0.8311,
|
13467 |
+
"step": 1918
|
13468 |
+
},
|
13469 |
+
{
|
13470 |
+
"epoch": 4.397138769670958,
|
13471 |
+
"grad_norm": 1.2693705558776855,
|
13472 |
+
"learning_rate": 0.00013606199891026544,
|
13473 |
+
"loss": 0.8169,
|
13474 |
+
"step": 1919
|
13475 |
+
},
|
13476 |
+
{
|
13477 |
+
"epoch": 4.399427753934192,
|
13478 |
+
"grad_norm": 1.233024001121521,
|
13479 |
+
"learning_rate": 0.0001360032702486956,
|
13480 |
+
"loss": 0.6266,
|
13481 |
+
"step": 1920
|
13482 |
+
},
|
13483 |
+
{
|
13484 |
+
"epoch": 4.4017167381974245,
|
13485 |
+
"grad_norm": 1.209771752357483,
|
13486 |
+
"learning_rate": 0.0001359445273165797,
|
13487 |
+
"loss": 0.5964,
|
13488 |
+
"step": 1921
|
13489 |
+
},
|
13490 |
+
{
|
13491 |
+
"epoch": 4.404005722460658,
|
13492 |
+
"grad_norm": 1.2486642599105835,
|
13493 |
+
"learning_rate": 0.00013588577013720166,
|
13494 |
+
"loss": 0.5071,
|
13495 |
+
"step": 1922
|
13496 |
+
},
|
13497 |
+
{
|
13498 |
+
"epoch": 4.406294706723891,
|
13499 |
+
"grad_norm": 1.131710171699524,
|
13500 |
+
"learning_rate": 0.0001358269987338509,
|
13501 |
+
"loss": 0.4773,
|
13502 |
+
"step": 1923
|
13503 |
+
},
|
13504 |
+
{
|
13505 |
+
"epoch": 4.4085836909871245,
|
13506 |
+
"grad_norm": 1.1338353157043457,
|
13507 |
+
"learning_rate": 0.0001357682131298225,
|
13508 |
+
"loss": 0.4969,
|
13509 |
+
"step": 1924
|
13510 |
+
},
|
13511 |
+
{
|
13512 |
+
"epoch": 4.410872675250357,
|
13513 |
+
"grad_norm": 1.1553444862365723,
|
13514 |
+
"learning_rate": 0.00013570941334841723,
|
13515 |
+
"loss": 0.5181,
|
13516 |
+
"step": 1925
|
13517 |
+
},
|
13518 |
+
{
|
13519 |
+
"epoch": 4.413161659513591,
|
13520 |
+
"grad_norm": 1.2361962795257568,
|
13521 |
+
"learning_rate": 0.00013565059941294142,
|
13522 |
+
"loss": 0.4904,
|
13523 |
+
"step": 1926
|
13524 |
+
},
|
13525 |
+
{
|
13526 |
+
"epoch": 4.415450643776824,
|
13527 |
+
"grad_norm": 1.3251562118530273,
|
13528 |
+
"learning_rate": 0.00013559177134670706,
|
13529 |
+
"loss": 0.4725,
|
13530 |
+
"step": 1927
|
13531 |
+
},
|
13532 |
+
{
|
13533 |
+
"epoch": 4.417739628040057,
|
13534 |
+
"grad_norm": 1.3303446769714355,
|
13535 |
+
"learning_rate": 0.0001355329291730317,
|
13536 |
+
"loss": 0.363,
|
13537 |
+
"step": 1928
|
13538 |
+
},
|
13539 |
+
{
|
13540 |
+
"epoch": 4.42002861230329,
|
13541 |
+
"grad_norm": 1.414873719215393,
|
13542 |
+
"learning_rate": 0.00013547407291523853,
|
13543 |
+
"loss": 0.3727,
|
13544 |
+
"step": 1929
|
13545 |
+
},
|
13546 |
+
{
|
13547 |
+
"epoch": 4.422317596566524,
|
13548 |
+
"grad_norm": 1.3383206129074097,
|
13549 |
+
"learning_rate": 0.0001354152025966562,
|
13550 |
+
"loss": 0.3201,
|
13551 |
+
"step": 1930
|
13552 |
+
},
|
13553 |
+
{
|
13554 |
+
"epoch": 4.4246065808297566,
|
13555 |
+
"grad_norm": 1.3916726112365723,
|
13556 |
+
"learning_rate": 0.00013535631824061913,
|
13557 |
+
"loss": 0.2309,
|
13558 |
+
"step": 1931
|
13559 |
+
},
|
13560 |
+
{
|
13561 |
+
"epoch": 4.42689556509299,
|
13562 |
+
"grad_norm": 1.3709685802459717,
|
13563 |
+
"learning_rate": 0.00013529741987046716,
|
13564 |
+
"loss": 0.2759,
|
13565 |
+
"step": 1932
|
13566 |
+
},
|
13567 |
+
{
|
13568 |
+
"epoch": 4.429184549356223,
|
13569 |
+
"grad_norm": 1.3704681396484375,
|
13570 |
+
"learning_rate": 0.0001352385075095457,
|
13571 |
+
"loss": 0.2658,
|
13572 |
+
"step": 1933
|
13573 |
+
},
|
13574 |
+
{
|
13575 |
+
"epoch": 4.431473533619457,
|
13576 |
+
"grad_norm": 1.756007432937622,
|
13577 |
+
"learning_rate": 0.00013517958118120576,
|
13578 |
+
"loss": 0.2528,
|
13579 |
+
"step": 1934
|
13580 |
+
},
|
13581 |
+
{
|
13582 |
+
"epoch": 4.433762517882689,
|
13583 |
+
"grad_norm": 1.0347654819488525,
|
13584 |
+
"learning_rate": 0.0001351206409088038,
|
13585 |
+
"loss": 0.1138,
|
13586 |
+
"step": 1935
|
13587 |
+
},
|
13588 |
+
{
|
13589 |
+
"epoch": 4.436051502145923,
|
13590 |
+
"grad_norm": 1.4181766510009766,
|
13591 |
+
"learning_rate": 0.00013506168671570192,
|
13592 |
+
"loss": 0.1408,
|
13593 |
+
"step": 1936
|
13594 |
+
},
|
13595 |
+
{
|
13596 |
+
"epoch": 4.438340486409156,
|
13597 |
+
"grad_norm": 1.846653699874878,
|
13598 |
+
"learning_rate": 0.00013500271862526764,
|
13599 |
+
"loss": 0.2026,
|
13600 |
+
"step": 1937
|
13601 |
+
},
|
13602 |
+
{
|
13603 |
+
"epoch": 4.4406294706723894,
|
13604 |
+
"grad_norm": 1.4231027364730835,
|
13605 |
+
"learning_rate": 0.0001349437366608741,
|
13606 |
+
"loss": 0.1554,
|
13607 |
+
"step": 1938
|
13608 |
+
},
|
13609 |
+
{
|
13610 |
+
"epoch": 4.442918454935622,
|
13611 |
+
"grad_norm": 1.0651813745498657,
|
13612 |
+
"learning_rate": 0.00013488474084589976,
|
13613 |
+
"loss": 0.114,
|
13614 |
+
"step": 1939
|
13615 |
+
},
|
13616 |
+
{
|
13617 |
+
"epoch": 4.445207439198856,
|
13618 |
+
"grad_norm": 1.6582481861114502,
|
13619 |
+
"learning_rate": 0.00013482573120372873,
|
13620 |
+
"loss": 0.1301,
|
13621 |
+
"step": 1940
|
13622 |
+
},
|
13623 |
+
{
|
13624 |
+
"epoch": 4.447496423462089,
|
13625 |
+
"grad_norm": 1.7687350511550903,
|
13626 |
+
"learning_rate": 0.00013476670775775052,
|
13627 |
+
"loss": 0.1559,
|
13628 |
+
"step": 1941
|
13629 |
+
},
|
13630 |
+
{
|
13631 |
+
"epoch": 4.449785407725322,
|
13632 |
+
"grad_norm": 1.3473821878433228,
|
13633 |
+
"learning_rate": 0.0001347076705313602,
|
13634 |
+
"loss": 0.0956,
|
13635 |
+
"step": 1942
|
13636 |
+
},
|
13637 |
+
{
|
13638 |
+
"epoch": 4.452074391988555,
|
13639 |
+
"grad_norm": 1.5209802389144897,
|
13640 |
+
"learning_rate": 0.0001346486195479582,
|
13641 |
+
"loss": 0.0777,
|
13642 |
+
"step": 1943
|
13643 |
+
},
|
13644 |
+
{
|
13645 |
+
"epoch": 4.454363376251788,
|
13646 |
+
"grad_norm": 2.565901279449463,
|
13647 |
+
"learning_rate": 0.00013458955483095042,
|
13648 |
+
"loss": 0.0962,
|
13649 |
+
"step": 1944
|
13650 |
+
},
|
13651 |
+
{
|
13652 |
+
"epoch": 4.4566523605150214,
|
13653 |
+
"grad_norm": 1.7184423208236694,
|
13654 |
+
"learning_rate": 0.00013453047640374827,
|
13655 |
+
"loss": 0.1249,
|
13656 |
+
"step": 1945
|
13657 |
+
},
|
13658 |
+
{
|
13659 |
+
"epoch": 4.458941344778255,
|
13660 |
+
"grad_norm": 1.5781924724578857,
|
13661 |
+
"learning_rate": 0.00013447138428976854,
|
13662 |
+
"loss": 1.0313,
|
13663 |
+
"step": 1946
|
13664 |
+
},
|
13665 |
+
{
|
13666 |
+
"epoch": 4.461230329041488,
|
13667 |
+
"grad_norm": 0.6325271129608154,
|
13668 |
+
"learning_rate": 0.0001344122785124334,
|
13669 |
+
"loss": 1.7381,
|
13670 |
+
"step": 1947
|
13671 |
+
},
|
13672 |
+
{
|
13673 |
+
"epoch": 4.463519313304721,
|
13674 |
+
"grad_norm": 0.6897770762443542,
|
13675 |
+
"learning_rate": 0.00013435315909517058,
|
13676 |
+
"loss": 1.7595,
|
13677 |
+
"step": 1948
|
13678 |
+
},
|
13679 |
+
{
|
13680 |
+
"epoch": 4.465808297567954,
|
13681 |
+
"grad_norm": 0.7247394323348999,
|
13682 |
+
"learning_rate": 0.0001342940260614131,
|
13683 |
+
"loss": 1.5707,
|
13684 |
+
"step": 1949
|
13685 |
+
},
|
13686 |
+
{
|
13687 |
+
"epoch": 4.468097281831187,
|
13688 |
+
"grad_norm": 0.7909899353981018,
|
13689 |
+
"learning_rate": 0.00013423487943459934,
|
13690 |
+
"loss": 1.4698,
|
13691 |
+
"step": 1950
|
13692 |
+
},
|
13693 |
+
{
|
13694 |
+
"epoch": 4.470386266094421,
|
13695 |
+
"grad_norm": 0.8112832903862,
|
13696 |
+
"learning_rate": 0.00013417571923817322,
|
13697 |
+
"loss": 1.4059,
|
13698 |
+
"step": 1951
|
13699 |
+
},
|
13700 |
+
{
|
13701 |
+
"epoch": 4.4726752503576535,
|
13702 |
+
"grad_norm": 0.8589115142822266,
|
13703 |
+
"learning_rate": 0.0001341165454955839,
|
13704 |
+
"loss": 1.5253,
|
13705 |
+
"step": 1952
|
13706 |
+
},
|
13707 |
+
{
|
13708 |
+
"epoch": 4.474964234620887,
|
13709 |
+
"grad_norm": 0.8326034545898438,
|
13710 |
+
"learning_rate": 0.00013405735823028602,
|
13711 |
+
"loss": 1.2412,
|
13712 |
+
"step": 1953
|
13713 |
+
},
|
13714 |
+
{
|
13715 |
+
"epoch": 4.47725321888412,
|
13716 |
+
"grad_norm": 0.8572644591331482,
|
13717 |
+
"learning_rate": 0.00013399815746573945,
|
13718 |
+
"loss": 1.3859,
|
13719 |
+
"step": 1954
|
13720 |
+
},
|
13721 |
+
{
|
13722 |
+
"epoch": 4.4795422031473535,
|
13723 |
+
"grad_norm": 0.899202287197113,
|
13724 |
+
"learning_rate": 0.0001339389432254095,
|
13725 |
+
"loss": 1.3033,
|
13726 |
+
"step": 1955
|
13727 |
+
},
|
13728 |
+
{
|
13729 |
+
"epoch": 4.481831187410586,
|
13730 |
+
"grad_norm": 0.9051918983459473,
|
13731 |
+
"learning_rate": 0.00013387971553276682,
|
13732 |
+
"loss": 1.2522,
|
13733 |
+
"step": 1956
|
13734 |
+
},
|
13735 |
+
{
|
13736 |
+
"epoch": 4.48412017167382,
|
13737 |
+
"grad_norm": 0.8995559811592102,
|
13738 |
+
"learning_rate": 0.0001338204744112874,
|
13739 |
+
"loss": 1.1044,
|
13740 |
+
"step": 1957
|
13741 |
+
},
|
13742 |
+
{
|
13743 |
+
"epoch": 4.486409155937053,
|
13744 |
+
"grad_norm": 0.9207310676574707,
|
13745 |
+
"learning_rate": 0.00013376121988445248,
|
13746 |
+
"loss": 1.0933,
|
13747 |
+
"step": 1958
|
13748 |
+
},
|
13749 |
+
{
|
13750 |
+
"epoch": 4.488698140200286,
|
13751 |
+
"grad_norm": 0.9572367072105408,
|
13752 |
+
"learning_rate": 0.00013370195197574865,
|
13753 |
+
"loss": 1.0125,
|
13754 |
+
"step": 1959
|
13755 |
+
},
|
13756 |
+
{
|
13757 |
+
"epoch": 4.490987124463519,
|
13758 |
+
"grad_norm": 0.9588605761528015,
|
13759 |
+
"learning_rate": 0.00013364267070866786,
|
13760 |
+
"loss": 1.0196,
|
13761 |
+
"step": 1960
|
13762 |
+
},
|
13763 |
+
{
|
13764 |
+
"epoch": 4.493276108726753,
|
13765 |
+
"grad_norm": 1.030645489692688,
|
13766 |
+
"learning_rate": 0.0001335833761067073,
|
13767 |
+
"loss": 1.1733,
|
13768 |
+
"step": 1961
|
13769 |
+
},
|
13770 |
+
{
|
13771 |
+
"epoch": 4.4955650929899855,
|
13772 |
+
"grad_norm": 1.0392740964889526,
|
13773 |
+
"learning_rate": 0.00013352406819336945,
|
13774 |
+
"loss": 1.0174,
|
13775 |
+
"step": 1962
|
13776 |
+
},
|
13777 |
+
{
|
13778 |
+
"epoch": 4.497854077253219,
|
13779 |
+
"grad_norm": 1.0212891101837158,
|
13780 |
+
"learning_rate": 0.00013346474699216208,
|
13781 |
+
"loss": 0.9613,
|
13782 |
+
"step": 1963
|
13783 |
+
},
|
13784 |
+
{
|
13785 |
+
"epoch": 4.500143061516452,
|
13786 |
+
"grad_norm": 1.131136417388916,
|
13787 |
+
"learning_rate": 0.00013340541252659818,
|
13788 |
+
"loss": 1.0567,
|
13789 |
+
"step": 1964
|
13790 |
+
},
|
13791 |
+
{
|
13792 |
+
"epoch": 4.502432045779686,
|
13793 |
+
"grad_norm": 1.0588047504425049,
|
13794 |
+
"learning_rate": 0.00013334606482019607,
|
13795 |
+
"loss": 0.7574,
|
13796 |
+
"step": 1965
|
13797 |
+
},
|
13798 |
+
{
|
13799 |
+
"epoch": 4.504721030042918,
|
13800 |
+
"grad_norm": 1.5936955213546753,
|
13801 |
+
"learning_rate": 0.0001332867038964793,
|
13802 |
+
"loss": 0.7497,
|
13803 |
+
"step": 1966
|
13804 |
+
},
|
13805 |
+
{
|
13806 |
+
"epoch": 4.507010014306152,
|
13807 |
+
"grad_norm": 1.227865219116211,
|
13808 |
+
"learning_rate": 0.00013322732977897662,
|
13809 |
+
"loss": 0.946,
|
13810 |
+
"step": 1967
|
13811 |
+
},
|
13812 |
+
{
|
13813 |
+
"epoch": 4.509298998569385,
|
13814 |
+
"grad_norm": 1.226654291152954,
|
13815 |
+
"learning_rate": 0.00013316794249122206,
|
13816 |
+
"loss": 0.7259,
|
13817 |
+
"step": 1968
|
13818 |
+
},
|
13819 |
+
{
|
13820 |
+
"epoch": 4.5115879828326175,
|
13821 |
+
"grad_norm": 1.1799983978271484,
|
13822 |
+
"learning_rate": 0.0001331085420567548,
|
13823 |
+
"loss": 0.6394,
|
13824 |
+
"step": 1969
|
13825 |
+
},
|
13826 |
+
{
|
13827 |
+
"epoch": 4.513876967095851,
|
13828 |
+
"grad_norm": 1.2049132585525513,
|
13829 |
+
"learning_rate": 0.00013304912849911927,
|
13830 |
+
"loss": 0.6035,
|
13831 |
+
"step": 1970
|
13832 |
+
},
|
13833 |
+
{
|
13834 |
+
"epoch": 4.516165951359085,
|
13835 |
+
"grad_norm": 1.347949743270874,
|
13836 |
+
"learning_rate": 0.00013298970184186514,
|
13837 |
+
"loss": 0.6276,
|
13838 |
+
"step": 1971
|
13839 |
+
},
|
13840 |
+
{
|
13841 |
+
"epoch": 4.518454935622318,
|
13842 |
+
"grad_norm": 1.2323335409164429,
|
13843 |
+
"learning_rate": 0.00013293026210854726,
|
13844 |
+
"loss": 0.5482,
|
13845 |
+
"step": 1972
|
13846 |
+
},
|
13847 |
+
{
|
13848 |
+
"epoch": 4.52074391988555,
|
13849 |
+
"grad_norm": 1.2629224061965942,
|
13850 |
+
"learning_rate": 0.0001328708093227256,
|
13851 |
+
"loss": 0.587,
|
13852 |
+
"step": 1973
|
13853 |
+
},
|
13854 |
+
{
|
13855 |
+
"epoch": 4.523032904148784,
|
13856 |
+
"grad_norm": 1.4115405082702637,
|
13857 |
+
"learning_rate": 0.00013281134350796539,
|
13858 |
+
"loss": 0.5042,
|
13859 |
+
"step": 1974
|
13860 |
+
},
|
13861 |
+
{
|
13862 |
+
"epoch": 4.525321888412018,
|
13863 |
+
"grad_norm": 1.3867058753967285,
|
13864 |
+
"learning_rate": 0.00013275186468783687,
|
13865 |
+
"loss": 0.5705,
|
13866 |
+
"step": 1975
|
13867 |
+
},
|
13868 |
+
{
|
13869 |
+
"epoch": 4.52761087267525,
|
13870 |
+
"grad_norm": 1.5213968753814697,
|
13871 |
+
"learning_rate": 0.00013269237288591565,
|
13872 |
+
"loss": 0.5916,
|
13873 |
+
"step": 1976
|
13874 |
+
},
|
13875 |
+
{
|
13876 |
+
"epoch": 4.529899856938483,
|
13877 |
+
"grad_norm": 1.5112426280975342,
|
13878 |
+
"learning_rate": 0.00013263286812578235,
|
13879 |
+
"loss": 0.4245,
|
13880 |
+
"step": 1977
|
13881 |
+
},
|
13882 |
+
{
|
13883 |
+
"epoch": 4.532188841201717,
|
13884 |
+
"grad_norm": 1.2434872388839722,
|
13885 |
+
"learning_rate": 0.00013257335043102277,
|
13886 |
+
"loss": 0.3977,
|
13887 |
+
"step": 1978
|
13888 |
+
},
|
13889 |
+
{
|
13890 |
+
"epoch": 4.53447782546495,
|
13891 |
+
"grad_norm": 1.2847955226898193,
|
13892 |
+
"learning_rate": 0.0001325138198252278,
|
13893 |
+
"loss": 0.3967,
|
13894 |
+
"step": 1979
|
13895 |
+
},
|
13896 |
+
{
|
13897 |
+
"epoch": 4.536766809728183,
|
13898 |
+
"grad_norm": 1.3733184337615967,
|
13899 |
+
"learning_rate": 0.00013245427633199345,
|
13900 |
+
"loss": 0.3502,
|
13901 |
+
"step": 1980
|
13902 |
+
},
|
13903 |
+
{
|
13904 |
+
"epoch": 4.539055793991416,
|
13905 |
+
"grad_norm": 1.3960249423980713,
|
13906 |
+
"learning_rate": 0.00013239471997492092,
|
13907 |
+
"loss": 0.3305,
|
13908 |
+
"step": 1981
|
13909 |
+
},
|
13910 |
+
{
|
13911 |
+
"epoch": 4.54134477825465,
|
13912 |
+
"grad_norm": 1.3462896347045898,
|
13913 |
+
"learning_rate": 0.0001323351507776164,
|
13914 |
+
"loss": 0.2457,
|
13915 |
+
"step": 1982
|
13916 |
+
},
|
13917 |
+
{
|
13918 |
+
"epoch": 4.543633762517882,
|
13919 |
+
"grad_norm": 1.4513142108917236,
|
13920 |
+
"learning_rate": 0.00013227556876369123,
|
13921 |
+
"loss": 0.2284,
|
13922 |
+
"step": 1983
|
13923 |
+
},
|
13924 |
+
{
|
13925 |
+
"epoch": 4.545922746781116,
|
13926 |
+
"grad_norm": 1.4674392938613892,
|
13927 |
+
"learning_rate": 0.0001322159739567618,
|
13928 |
+
"loss": 0.2691,
|
13929 |
+
"step": 1984
|
13930 |
+
},
|
13931 |
+
{
|
13932 |
+
"epoch": 4.548211731044349,
|
13933 |
+
"grad_norm": 1.4074273109436035,
|
13934 |
+
"learning_rate": 0.0001321563663804496,
|
13935 |
+
"loss": 0.1861,
|
13936 |
+
"step": 1985
|
13937 |
+
},
|
13938 |
+
{
|
13939 |
+
"epoch": 4.5505007153075825,
|
13940 |
+
"grad_norm": 1.080726146697998,
|
13941 |
+
"learning_rate": 0.00013209674605838117,
|
13942 |
+
"loss": 0.0974,
|
13943 |
+
"step": 1986
|
13944 |
+
},
|
13945 |
+
{
|
13946 |
+
"epoch": 4.552789699570815,
|
13947 |
+
"grad_norm": 1.5537816286087036,
|
13948 |
+
"learning_rate": 0.00013203711301418815,
|
13949 |
+
"loss": 0.1991,
|
13950 |
+
"step": 1987
|
13951 |
+
},
|
13952 |
+
{
|
13953 |
+
"epoch": 4.555078683834049,
|
13954 |
+
"grad_norm": 1.4420602321624756,
|
13955 |
+
"learning_rate": 0.00013197746727150708,
|
13956 |
+
"loss": 0.1775,
|
13957 |
+
"step": 1988
|
13958 |
+
},
|
13959 |
+
{
|
13960 |
+
"epoch": 4.557367668097282,
|
13961 |
+
"grad_norm": 1.2284140586853027,
|
13962 |
+
"learning_rate": 0.0001319178088539797,
|
13963 |
+
"loss": 0.1238,
|
13964 |
+
"step": 1989
|
13965 |
+
},
|
13966 |
+
{
|
13967 |
+
"epoch": 4.559656652360515,
|
13968 |
+
"grad_norm": 1.7686671018600464,
|
13969 |
+
"learning_rate": 0.00013185813778525263,
|
13970 |
+
"loss": 0.1987,
|
13971 |
+
"step": 1990
|
13972 |
+
},
|
13973 |
+
{
|
13974 |
+
"epoch": 4.561945636623748,
|
13975 |
+
"grad_norm": 2.3643531799316406,
|
13976 |
+
"learning_rate": 0.00013179845408897766,
|
13977 |
+
"loss": 0.1214,
|
13978 |
+
"step": 1991
|
13979 |
+
},
|
13980 |
+
{
|
13981 |
+
"epoch": 4.564234620886982,
|
13982 |
+
"grad_norm": 1.6342504024505615,
|
13983 |
+
"learning_rate": 0.00013173875778881147,
|
13984 |
+
"loss": 0.0951,
|
13985 |
+
"step": 1992
|
13986 |
+
},
|
13987 |
+
{
|
13988 |
+
"epoch": 4.5665236051502145,
|
13989 |
+
"grad_norm": 1.2456626892089844,
|
13990 |
+
"learning_rate": 0.0001316790489084157,
|
13991 |
+
"loss": 0.0993,
|
13992 |
+
"step": 1993
|
13993 |
+
},
|
13994 |
+
{
|
13995 |
+
"epoch": 4.568812589413448,
|
13996 |
+
"grad_norm": 3.6577072143554688,
|
13997 |
+
"learning_rate": 0.00013161932747145712,
|
13998 |
+
"loss": 0.1036,
|
13999 |
+
"step": 1994
|
14000 |
+
},
|
14001 |
+
{
|
14002 |
+
"epoch": 4.571101573676681,
|
14003 |
+
"grad_norm": 2.6007823944091797,
|
14004 |
+
"learning_rate": 0.00013155959350160738,
|
14005 |
+
"loss": 0.1888,
|
14006 |
+
"step": 1995
|
14007 |
+
},
|
14008 |
+
{
|
14009 |
+
"epoch": 4.573390557939915,
|
14010 |
+
"grad_norm": 4.163236618041992,
|
14011 |
+
"learning_rate": 0.00013149984702254312,
|
14012 |
+
"loss": 0.9818,
|
14013 |
+
"step": 1996
|
14014 |
+
},
|
14015 |
+
{
|
14016 |
+
"epoch": 4.575679542203147,
|
14017 |
+
"grad_norm": 0.6608015298843384,
|
14018 |
+
"learning_rate": 0.00013144008805794592,
|
14019 |
+
"loss": 1.9279,
|
14020 |
+
"step": 1997
|
14021 |
+
},
|
14022 |
+
{
|
14023 |
+
"epoch": 4.57796852646638,
|
14024 |
+
"grad_norm": 0.6357399225234985,
|
14025 |
+
"learning_rate": 0.00013138031663150235,
|
14026 |
+
"loss": 1.601,
|
14027 |
+
"step": 1998
|
14028 |
+
},
|
14029 |
+
{
|
14030 |
+
"epoch": 4.580257510729614,
|
14031 |
+
"grad_norm": 0.7199594974517822,
|
14032 |
+
"learning_rate": 0.00013132053276690385,
|
14033 |
+
"loss": 1.8058,
|
14034 |
+
"step": 1999
|
14035 |
+
},
|
14036 |
+
{
|
14037 |
+
"epoch": 4.582546494992847,
|
14038 |
+
"grad_norm": 0.774132490158081,
|
14039 |
+
"learning_rate": 0.00013126073648784688,
|
14040 |
+
"loss": 1.5383,
|
14041 |
+
"step": 2000
|
14042 |
+
},
|
14043 |
+
{
|
14044 |
+
"epoch": 4.582546494992847,
|
14045 |
+
"eval_loss": 2.1012461185455322,
|
14046 |
+
"eval_runtime": 24.8502,
|
14047 |
+
"eval_samples_per_second": 29.617,
|
14048 |
+
"eval_steps_per_second": 14.809,
|
14049 |
+
"step": 2000
|
14050 |
}
|
14051 |
],
|
14052 |
"logging_steps": 1,
|
|
|
14061 |
"early_stopping_threshold": 0.0
|
14062 |
},
|
14063 |
"attributes": {
|
14064 |
+
"early_stopping_patience_counter": 3
|
14065 |
}
|
14066 |
},
|
14067 |
"TrainerControl": {
|
|
|
14070 |
"should_evaluate": false,
|
14071 |
"should_log": false,
|
14072 |
"should_save": true,
|
14073 |
+
"should_training_stop": true
|
14074 |
},
|
14075 |
"attributes": {}
|
14076 |
}
|
14077 |
},
|
14078 |
+
"total_flos": 1.4327953570953953e+18,
|
14079 |
"train_batch_size": 4,
|
14080 |
"trial_name": null,
|
14081 |
"trial_params": null
|