Training in progress, step 1673, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2437475520
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ca3d99d44e2aa12d0df225ceafc0f988cf405d316b0e7d3d11535fa17bb13b9
|
3 |
size 2437475520
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4875220300
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb9f0073d2c90e0198c91f3e64e457e0cd375b33b2195f432c45cfbc7ee494c3
|
3 |
size 4875220300
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae26b9b26d2873ace8605093e3ce9c56a6013bc29bbe33012a09fe36a5336895
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0aaf497771dc3480aaa5f5633a14e6b4842ef72750502263affb259a2c07f8fc
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -10045,6 +10045,1679 @@
|
|
10045 |
"learning_rate": 1.2750172138226618e-05,
|
10046 |
"loss": 0.1006,
|
10047 |
"step": 1434
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10048 |
}
|
10049 |
],
|
10050 |
"logging_steps": 1,
|
@@ -10064,7 +11737,7 @@
|
|
10064 |
"attributes": {}
|
10065 |
}
|
10066 |
},
|
10067 |
-
"total_flos": 2.
|
10068 |
"train_batch_size": 4,
|
10069 |
"trial_name": null,
|
10070 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9101047191622467,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1673,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
10045 |
"learning_rate": 1.2750172138226618e-05,
|
10046 |
"loss": 0.1006,
|
10047 |
"step": 1434
|
10048 |
+
},
|
10049 |
+
{
|
10050 |
+
"epoch": 0.7806337549299606,
|
10051 |
+
"grad_norm": 2.297020196914673,
|
10052 |
+
"learning_rate": 1.268994370182719e-05,
|
10053 |
+
"loss": 0.1081,
|
10054 |
+
"step": 1435
|
10055 |
+
},
|
10056 |
+
{
|
10057 |
+
"epoch": 0.7811777505779953,
|
10058 |
+
"grad_norm": 2.2114057540893555,
|
10059 |
+
"learning_rate": 1.2629837171682052e-05,
|
10060 |
+
"loss": 0.0814,
|
10061 |
+
"step": 1436
|
10062 |
+
},
|
10063 |
+
{
|
10064 |
+
"epoch": 0.7817217462260302,
|
10065 |
+
"grad_norm": 1.7250666618347168,
|
10066 |
+
"learning_rate": 1.2569852744182243e-05,
|
10067 |
+
"loss": 0.1275,
|
10068 |
+
"step": 1437
|
10069 |
+
},
|
10070 |
+
{
|
10071 |
+
"epoch": 0.7822657418740651,
|
10072 |
+
"grad_norm": 2.8865597248077393,
|
10073 |
+
"learning_rate": 1.2509990615319944e-05,
|
10074 |
+
"loss": 0.189,
|
10075 |
+
"step": 1438
|
10076 |
+
},
|
10077 |
+
{
|
10078 |
+
"epoch": 0.7828097375220998,
|
10079 |
+
"grad_norm": 2.3319942951202393,
|
10080 |
+
"learning_rate": 1.245025098068764e-05,
|
10081 |
+
"loss": 0.1519,
|
10082 |
+
"step": 1439
|
10083 |
+
},
|
10084 |
+
{
|
10085 |
+
"epoch": 0.7833537331701347,
|
10086 |
+
"grad_norm": 2.9569590091705322,
|
10087 |
+
"learning_rate": 1.23906340354776e-05,
|
10088 |
+
"loss": 0.205,
|
10089 |
+
"step": 1440
|
10090 |
+
},
|
10091 |
+
{
|
10092 |
+
"epoch": 0.7838977288181694,
|
10093 |
+
"grad_norm": 1.9512279033660889,
|
10094 |
+
"learning_rate": 1.2331139974481248e-05,
|
10095 |
+
"loss": 0.133,
|
10096 |
+
"step": 1441
|
10097 |
+
},
|
10098 |
+
{
|
10099 |
+
"epoch": 0.7844417244662043,
|
10100 |
+
"grad_norm": 2.3612194061279297,
|
10101 |
+
"learning_rate": 1.2271768992088489e-05,
|
10102 |
+
"loss": 0.165,
|
10103 |
+
"step": 1442
|
10104 |
+
},
|
10105 |
+
{
|
10106 |
+
"epoch": 0.7849857201142391,
|
10107 |
+
"grad_norm": 2.2667036056518555,
|
10108 |
+
"learning_rate": 1.2212521282287092e-05,
|
10109 |
+
"loss": 0.1499,
|
10110 |
+
"step": 1443
|
10111 |
+
},
|
10112 |
+
{
|
10113 |
+
"epoch": 0.7855297157622739,
|
10114 |
+
"grad_norm": 2.3945088386535645,
|
10115 |
+
"learning_rate": 1.2153397038662012e-05,
|
10116 |
+
"loss": 0.1495,
|
10117 |
+
"step": 1444
|
10118 |
+
},
|
10119 |
+
{
|
10120 |
+
"epoch": 0.7860737114103087,
|
10121 |
+
"grad_norm": 2.76117205619812,
|
10122 |
+
"learning_rate": 1.2094396454394797e-05,
|
10123 |
+
"loss": 0.2922,
|
10124 |
+
"step": 1445
|
10125 |
+
},
|
10126 |
+
{
|
10127 |
+
"epoch": 0.7866177070583436,
|
10128 |
+
"grad_norm": 2.3595540523529053,
|
10129 |
+
"learning_rate": 1.2035519722263023e-05,
|
10130 |
+
"loss": 0.1476,
|
10131 |
+
"step": 1446
|
10132 |
+
},
|
10133 |
+
{
|
10134 |
+
"epoch": 0.7871617027063783,
|
10135 |
+
"grad_norm": 4.047318935394287,
|
10136 |
+
"learning_rate": 1.1976767034639502e-05,
|
10137 |
+
"loss": 0.1823,
|
10138 |
+
"step": 1447
|
10139 |
+
},
|
10140 |
+
{
|
10141 |
+
"epoch": 0.7877056983544132,
|
10142 |
+
"grad_norm": 2.4783074855804443,
|
10143 |
+
"learning_rate": 1.1918138583491784e-05,
|
10144 |
+
"loss": 0.1299,
|
10145 |
+
"step": 1448
|
10146 |
+
},
|
10147 |
+
{
|
10148 |
+
"epoch": 0.7882496940024479,
|
10149 |
+
"grad_norm": 2.551173686981201,
|
10150 |
+
"learning_rate": 1.1859634560381494e-05,
|
10151 |
+
"loss": 0.1214,
|
10152 |
+
"step": 1449
|
10153 |
+
},
|
10154 |
+
{
|
10155 |
+
"epoch": 0.7887936896504828,
|
10156 |
+
"grad_norm": 1.925065040588379,
|
10157 |
+
"learning_rate": 1.1801255156463703e-05,
|
10158 |
+
"loss": 0.073,
|
10159 |
+
"step": 1450
|
10160 |
+
},
|
10161 |
+
{
|
10162 |
+
"epoch": 0.7893376852985177,
|
10163 |
+
"grad_norm": 0.8589705228805542,
|
10164 |
+
"learning_rate": 1.1743000562486317e-05,
|
10165 |
+
"loss": 0.2774,
|
10166 |
+
"step": 1451
|
10167 |
+
},
|
10168 |
+
{
|
10169 |
+
"epoch": 0.7898816809465524,
|
10170 |
+
"grad_norm": 1.296121597290039,
|
10171 |
+
"learning_rate": 1.1684870968789402e-05,
|
10172 |
+
"loss": 0.1509,
|
10173 |
+
"step": 1452
|
10174 |
+
},
|
10175 |
+
{
|
10176 |
+
"epoch": 0.7904256765945873,
|
10177 |
+
"grad_norm": 1.252631425857544,
|
10178 |
+
"learning_rate": 1.1626866565304596e-05,
|
10179 |
+
"loss": 0.2223,
|
10180 |
+
"step": 1453
|
10181 |
+
},
|
10182 |
+
{
|
10183 |
+
"epoch": 0.790969672242622,
|
10184 |
+
"grad_norm": 0.7784968018531799,
|
10185 |
+
"learning_rate": 1.1568987541554577e-05,
|
10186 |
+
"loss": 0.0695,
|
10187 |
+
"step": 1454
|
10188 |
+
},
|
10189 |
+
{
|
10190 |
+
"epoch": 0.7915136678906569,
|
10191 |
+
"grad_norm": 1.6878989934921265,
|
10192 |
+
"learning_rate": 1.1511234086652262e-05,
|
10193 |
+
"loss": 0.1985,
|
10194 |
+
"step": 1455
|
10195 |
+
},
|
10196 |
+
{
|
10197 |
+
"epoch": 0.7920576635386917,
|
10198 |
+
"grad_norm": 2.069828748703003,
|
10199 |
+
"learning_rate": 1.145360638930032e-05,
|
10200 |
+
"loss": 0.2687,
|
10201 |
+
"step": 1456
|
10202 |
+
},
|
10203 |
+
{
|
10204 |
+
"epoch": 0.7926016591867265,
|
10205 |
+
"grad_norm": 2.9908523559570312,
|
10206 |
+
"learning_rate": 1.1396104637790534e-05,
|
10207 |
+
"loss": 0.3874,
|
10208 |
+
"step": 1457
|
10209 |
+
},
|
10210 |
+
{
|
10211 |
+
"epoch": 0.7931456548347613,
|
10212 |
+
"grad_norm": 2.0268144607543945,
|
10213 |
+
"learning_rate": 1.1338729020003169e-05,
|
10214 |
+
"loss": 0.2378,
|
10215 |
+
"step": 1458
|
10216 |
+
},
|
10217 |
+
{
|
10218 |
+
"epoch": 0.7936896504827962,
|
10219 |
+
"grad_norm": 1.7170679569244385,
|
10220 |
+
"learning_rate": 1.1281479723406374e-05,
|
10221 |
+
"loss": 0.2915,
|
10222 |
+
"step": 1459
|
10223 |
+
},
|
10224 |
+
{
|
10225 |
+
"epoch": 0.7942336461308309,
|
10226 |
+
"grad_norm": 1.7947033643722534,
|
10227 |
+
"learning_rate": 1.1224356935055524e-05,
|
10228 |
+
"loss": 0.2425,
|
10229 |
+
"step": 1460
|
10230 |
+
},
|
10231 |
+
{
|
10232 |
+
"epoch": 0.7947776417788658,
|
10233 |
+
"grad_norm": 1.5672327280044556,
|
10234 |
+
"learning_rate": 1.1167360841592644e-05,
|
10235 |
+
"loss": 0.1897,
|
10236 |
+
"step": 1461
|
10237 |
+
},
|
10238 |
+
{
|
10239 |
+
"epoch": 0.7953216374269005,
|
10240 |
+
"grad_norm": 1.682944655418396,
|
10241 |
+
"learning_rate": 1.1110491629245862e-05,
|
10242 |
+
"loss": 0.1473,
|
10243 |
+
"step": 1462
|
10244 |
+
},
|
10245 |
+
{
|
10246 |
+
"epoch": 0.7958656330749354,
|
10247 |
+
"grad_norm": 1.9672317504882812,
|
10248 |
+
"learning_rate": 1.1053749483828673e-05,
|
10249 |
+
"loss": 0.223,
|
10250 |
+
"step": 1463
|
10251 |
+
},
|
10252 |
+
{
|
10253 |
+
"epoch": 0.7964096287229702,
|
10254 |
+
"grad_norm": 1.5830997228622437,
|
10255 |
+
"learning_rate": 1.09971345907394e-05,
|
10256 |
+
"loss": 0.1368,
|
10257 |
+
"step": 1464
|
10258 |
+
},
|
10259 |
+
{
|
10260 |
+
"epoch": 0.796953624371005,
|
10261 |
+
"grad_norm": 1.4714478254318237,
|
10262 |
+
"learning_rate": 1.0940647134960607e-05,
|
10263 |
+
"loss": 0.1093,
|
10264 |
+
"step": 1465
|
10265 |
+
},
|
10266 |
+
{
|
10267 |
+
"epoch": 0.7974976200190399,
|
10268 |
+
"grad_norm": 1.4242087602615356,
|
10269 |
+
"learning_rate": 1.0884287301058465e-05,
|
10270 |
+
"loss": 0.1026,
|
10271 |
+
"step": 1466
|
10272 |
+
},
|
10273 |
+
{
|
10274 |
+
"epoch": 0.7980416156670747,
|
10275 |
+
"grad_norm": 1.1947529315948486,
|
10276 |
+
"learning_rate": 1.082805527318217e-05,
|
10277 |
+
"loss": 0.0618,
|
10278 |
+
"step": 1467
|
10279 |
+
},
|
10280 |
+
{
|
10281 |
+
"epoch": 0.7985856113151095,
|
10282 |
+
"grad_norm": 1.940982460975647,
|
10283 |
+
"learning_rate": 1.0771951235063288e-05,
|
10284 |
+
"loss": 0.1179,
|
10285 |
+
"step": 1468
|
10286 |
+
},
|
10287 |
+
{
|
10288 |
+
"epoch": 0.7991296069631443,
|
10289 |
+
"grad_norm": 2.400984525680542,
|
10290 |
+
"learning_rate": 1.0715975370015196e-05,
|
10291 |
+
"loss": 0.0672,
|
10292 |
+
"step": 1469
|
10293 |
+
},
|
10294 |
+
{
|
10295 |
+
"epoch": 0.7996736026111791,
|
10296 |
+
"grad_norm": 2.2756922245025635,
|
10297 |
+
"learning_rate": 1.066012786093255e-05,
|
10298 |
+
"loss": 0.24,
|
10299 |
+
"step": 1470
|
10300 |
+
},
|
10301 |
+
{
|
10302 |
+
"epoch": 0.8002175982592139,
|
10303 |
+
"grad_norm": 2.2270493507385254,
|
10304 |
+
"learning_rate": 1.0604408890290545e-05,
|
10305 |
+
"loss": 0.2143,
|
10306 |
+
"step": 1471
|
10307 |
+
},
|
10308 |
+
{
|
10309 |
+
"epoch": 0.8007615939072488,
|
10310 |
+
"grad_norm": 1.608907699584961,
|
10311 |
+
"learning_rate": 1.0548818640144403e-05,
|
10312 |
+
"loss": 0.1132,
|
10313 |
+
"step": 1472
|
10314 |
+
},
|
10315 |
+
{
|
10316 |
+
"epoch": 0.8013055895552835,
|
10317 |
+
"grad_norm": 1.3964457511901855,
|
10318 |
+
"learning_rate": 1.0493357292128781e-05,
|
10319 |
+
"loss": 0.1171,
|
10320 |
+
"step": 1473
|
10321 |
+
},
|
10322 |
+
{
|
10323 |
+
"epoch": 0.8018495852033184,
|
10324 |
+
"grad_norm": 1.0506035089492798,
|
10325 |
+
"learning_rate": 1.0438025027457165e-05,
|
10326 |
+
"loss": 0.0346,
|
10327 |
+
"step": 1474
|
10328 |
+
},
|
10329 |
+
{
|
10330 |
+
"epoch": 0.8023935808513531,
|
10331 |
+
"grad_norm": 1.4303292036056519,
|
10332 |
+
"learning_rate": 1.038282202692129e-05,
|
10333 |
+
"loss": 0.0917,
|
10334 |
+
"step": 1475
|
10335 |
+
},
|
10336 |
+
{
|
10337 |
+
"epoch": 0.802937576499388,
|
10338 |
+
"grad_norm": 2.0131571292877197,
|
10339 |
+
"learning_rate": 1.0327748470890497e-05,
|
10340 |
+
"loss": 0.1674,
|
10341 |
+
"step": 1476
|
10342 |
+
},
|
10343 |
+
{
|
10344 |
+
"epoch": 0.8034815721474228,
|
10345 |
+
"grad_norm": 1.6086413860321045,
|
10346 |
+
"learning_rate": 1.0272804539311177e-05,
|
10347 |
+
"loss": 0.1131,
|
10348 |
+
"step": 1477
|
10349 |
+
},
|
10350 |
+
{
|
10351 |
+
"epoch": 0.8040255677954576,
|
10352 |
+
"grad_norm": 1.3215925693511963,
|
10353 |
+
"learning_rate": 1.0217990411706273e-05,
|
10354 |
+
"loss": 0.0851,
|
10355 |
+
"step": 1478
|
10356 |
+
},
|
10357 |
+
{
|
10358 |
+
"epoch": 0.8045695634434924,
|
10359 |
+
"grad_norm": 1.3193762302398682,
|
10360 |
+
"learning_rate": 1.0163306267174516e-05,
|
10361 |
+
"loss": 0.0616,
|
10362 |
+
"step": 1479
|
10363 |
+
},
|
10364 |
+
{
|
10365 |
+
"epoch": 0.8051135590915273,
|
10366 |
+
"grad_norm": 2.5981080532073975,
|
10367 |
+
"learning_rate": 1.0108752284389956e-05,
|
10368 |
+
"loss": 0.2827,
|
10369 |
+
"step": 1480
|
10370 |
+
},
|
10371 |
+
{
|
10372 |
+
"epoch": 0.8056575547395621,
|
10373 |
+
"grad_norm": 2.486163377761841,
|
10374 |
+
"learning_rate": 1.005432864160139e-05,
|
10375 |
+
"loss": 0.2213,
|
10376 |
+
"step": 1481
|
10377 |
+
},
|
10378 |
+
{
|
10379 |
+
"epoch": 0.8062015503875969,
|
10380 |
+
"grad_norm": 1.8398014307022095,
|
10381 |
+
"learning_rate": 1.0000035516631678e-05,
|
10382 |
+
"loss": 0.1026,
|
10383 |
+
"step": 1482
|
10384 |
+
},
|
10385 |
+
{
|
10386 |
+
"epoch": 0.8067455460356318,
|
10387 |
+
"grad_norm": 1.326554536819458,
|
10388 |
+
"learning_rate": 9.945873086877322e-06,
|
10389 |
+
"loss": 0.0598,
|
10390 |
+
"step": 1483
|
10391 |
+
},
|
10392 |
+
{
|
10393 |
+
"epoch": 0.8072895416836665,
|
10394 |
+
"grad_norm": 2.775106191635132,
|
10395 |
+
"learning_rate": 9.891841529307715e-06,
|
10396 |
+
"loss": 0.2738,
|
10397 |
+
"step": 1484
|
10398 |
+
},
|
10399 |
+
{
|
10400 |
+
"epoch": 0.8078335373317014,
|
10401 |
+
"grad_norm": 2.1927573680877686,
|
10402 |
+
"learning_rate": 9.837941020464648e-06,
|
10403 |
+
"loss": 0.0971,
|
10404 |
+
"step": 1485
|
10405 |
+
},
|
10406 |
+
{
|
10407 |
+
"epoch": 0.8083775329797361,
|
10408 |
+
"grad_norm": 2.488726854324341,
|
10409 |
+
"learning_rate": 9.784171736461762e-06,
|
10410 |
+
"loss": 0.1751,
|
10411 |
+
"step": 1486
|
10412 |
+
},
|
10413 |
+
{
|
10414 |
+
"epoch": 0.808921528627771,
|
10415 |
+
"grad_norm": 2.367971181869507,
|
10416 |
+
"learning_rate": 9.730533852983914e-06,
|
10417 |
+
"loss": 0.1714,
|
10418 |
+
"step": 1487
|
10419 |
+
},
|
10420 |
+
{
|
10421 |
+
"epoch": 0.8094655242758058,
|
10422 |
+
"grad_norm": 1.7886788845062256,
|
10423 |
+
"learning_rate": 9.677027545286638e-06,
|
10424 |
+
"loss": 0.0942,
|
10425 |
+
"step": 1488
|
10426 |
+
},
|
10427 |
+
{
|
10428 |
+
"epoch": 0.8100095199238406,
|
10429 |
+
"grad_norm": 1.8454316854476929,
|
10430 |
+
"learning_rate": 9.623652988195536e-06,
|
10431 |
+
"loss": 0.1341,
|
10432 |
+
"step": 1489
|
10433 |
+
},
|
10434 |
+
{
|
10435 |
+
"epoch": 0.8105535155718754,
|
10436 |
+
"grad_norm": 1.5037360191345215,
|
10437 |
+
"learning_rate": 9.570410356105724e-06,
|
10438 |
+
"loss": 0.0536,
|
10439 |
+
"step": 1490
|
10440 |
+
},
|
10441 |
+
{
|
10442 |
+
"epoch": 0.8110975112199102,
|
10443 |
+
"grad_norm": 2.4228768348693848,
|
10444 |
+
"learning_rate": 9.517299822981335e-06,
|
10445 |
+
"loss": 0.2055,
|
10446 |
+
"step": 1491
|
10447 |
+
},
|
10448 |
+
{
|
10449 |
+
"epoch": 0.811641506867945,
|
10450 |
+
"grad_norm": 1.0392967462539673,
|
10451 |
+
"learning_rate": 9.46432156235481e-06,
|
10452 |
+
"loss": 0.0619,
|
10453 |
+
"step": 1492
|
10454 |
+
},
|
10455 |
+
{
|
10456 |
+
"epoch": 0.8121855025159799,
|
10457 |
+
"grad_norm": 2.179161310195923,
|
10458 |
+
"learning_rate": 9.411475747326425e-06,
|
10459 |
+
"loss": 0.1245,
|
10460 |
+
"step": 1493
|
10461 |
+
},
|
10462 |
+
{
|
10463 |
+
"epoch": 0.8127294981640147,
|
10464 |
+
"grad_norm": 1.5810810327529907,
|
10465 |
+
"learning_rate": 9.358762550563722e-06,
|
10466 |
+
"loss": 0.0755,
|
10467 |
+
"step": 1494
|
10468 |
+
},
|
10469 |
+
{
|
10470 |
+
"epoch": 0.8132734938120495,
|
10471 |
+
"grad_norm": 2.773293972015381,
|
10472 |
+
"learning_rate": 9.306182144300917e-06,
|
10473 |
+
"loss": 0.0977,
|
10474 |
+
"step": 1495
|
10475 |
+
},
|
10476 |
+
{
|
10477 |
+
"epoch": 0.8138174894600844,
|
10478 |
+
"grad_norm": 3.5057032108306885,
|
10479 |
+
"learning_rate": 9.253734700338368e-06,
|
10480 |
+
"loss": 0.3488,
|
10481 |
+
"step": 1496
|
10482 |
+
},
|
10483 |
+
{
|
10484 |
+
"epoch": 0.8143614851081191,
|
10485 |
+
"grad_norm": 1.7542012929916382,
|
10486 |
+
"learning_rate": 9.201420390041965e-06,
|
10487 |
+
"loss": 0.0488,
|
10488 |
+
"step": 1497
|
10489 |
+
},
|
10490 |
+
{
|
10491 |
+
"epoch": 0.814905480756154,
|
10492 |
+
"grad_norm": 1.6453478336334229,
|
10493 |
+
"learning_rate": 9.149239384342572e-06,
|
10494 |
+
"loss": 0.0576,
|
10495 |
+
"step": 1498
|
10496 |
+
},
|
10497 |
+
{
|
10498 |
+
"epoch": 0.8154494764041887,
|
10499 |
+
"grad_norm": 0.8648439645767212,
|
10500 |
+
"learning_rate": 9.097191853735604e-06,
|
10501 |
+
"loss": 0.0116,
|
10502 |
+
"step": 1499
|
10503 |
+
},
|
10504 |
+
{
|
10505 |
+
"epoch": 0.8159934720522236,
|
10506 |
+
"grad_norm": 0.8709718585014343,
|
10507 |
+
"learning_rate": 9.045277968280259e-06,
|
10508 |
+
"loss": 0.0113,
|
10509 |
+
"step": 1500
|
10510 |
+
},
|
10511 |
+
{
|
10512 |
+
"epoch": 0.8165374677002584,
|
10513 |
+
"grad_norm": 0.7547265887260437,
|
10514 |
+
"learning_rate": 8.993497897599084e-06,
|
10515 |
+
"loss": 0.223,
|
10516 |
+
"step": 1501
|
10517 |
+
},
|
10518 |
+
{
|
10519 |
+
"epoch": 0.8170814633482932,
|
10520 |
+
"grad_norm": 0.992436408996582,
|
10521 |
+
"learning_rate": 8.941851810877428e-06,
|
10522 |
+
"loss": 0.1712,
|
10523 |
+
"step": 1502
|
10524 |
+
},
|
10525 |
+
{
|
10526 |
+
"epoch": 0.817625458996328,
|
10527 |
+
"grad_norm": 1.2754369974136353,
|
10528 |
+
"learning_rate": 8.890339876862858e-06,
|
10529 |
+
"loss": 0.2472,
|
10530 |
+
"step": 1503
|
10531 |
+
},
|
10532 |
+
{
|
10533 |
+
"epoch": 0.8181694546443629,
|
10534 |
+
"grad_norm": 0.8094856142997742,
|
10535 |
+
"learning_rate": 8.838962263864614e-06,
|
10536 |
+
"loss": 0.1188,
|
10537 |
+
"step": 1504
|
10538 |
+
},
|
10539 |
+
{
|
10540 |
+
"epoch": 0.8187134502923976,
|
10541 |
+
"grad_norm": 1.2168220281600952,
|
10542 |
+
"learning_rate": 8.787719139753038e-06,
|
10543 |
+
"loss": 0.1599,
|
10544 |
+
"step": 1505
|
10545 |
+
},
|
10546 |
+
{
|
10547 |
+
"epoch": 0.8192574459404325,
|
10548 |
+
"grad_norm": 1.2538522481918335,
|
10549 |
+
"learning_rate": 8.736610671959027e-06,
|
10550 |
+
"loss": 0.1046,
|
10551 |
+
"step": 1506
|
10552 |
+
},
|
10553 |
+
{
|
10554 |
+
"epoch": 0.8198014415884672,
|
10555 |
+
"grad_norm": 1.0030944347381592,
|
10556 |
+
"learning_rate": 8.685637027473598e-06,
|
10557 |
+
"loss": 0.0683,
|
10558 |
+
"step": 1507
|
10559 |
+
},
|
10560 |
+
{
|
10561 |
+
"epoch": 0.8203454372365021,
|
10562 |
+
"grad_norm": 1.0258569717407227,
|
10563 |
+
"learning_rate": 8.634798372847148e-06,
|
10564 |
+
"loss": 0.0789,
|
10565 |
+
"step": 1508
|
10566 |
+
},
|
10567 |
+
{
|
10568 |
+
"epoch": 0.820889432884537,
|
10569 |
+
"grad_norm": 1.493883490562439,
|
10570 |
+
"learning_rate": 8.584094874189042e-06,
|
10571 |
+
"loss": 0.1712,
|
10572 |
+
"step": 1509
|
10573 |
+
},
|
10574 |
+
{
|
10575 |
+
"epoch": 0.8214334285325717,
|
10576 |
+
"grad_norm": 0.9724941849708557,
|
10577 |
+
"learning_rate": 8.533526697167049e-06,
|
10578 |
+
"loss": 0.0996,
|
10579 |
+
"step": 1510
|
10580 |
+
},
|
10581 |
+
{
|
10582 |
+
"epoch": 0.8219774241806066,
|
10583 |
+
"grad_norm": 1.256732702255249,
|
10584 |
+
"learning_rate": 8.483094007006787e-06,
|
10585 |
+
"loss": 0.1367,
|
10586 |
+
"step": 1511
|
10587 |
+
},
|
10588 |
+
{
|
10589 |
+
"epoch": 0.8225214198286414,
|
10590 |
+
"grad_norm": 1.1733742952346802,
|
10591 |
+
"learning_rate": 8.432796968491208e-06,
|
10592 |
+
"loss": 0.1159,
|
10593 |
+
"step": 1512
|
10594 |
+
},
|
10595 |
+
{
|
10596 |
+
"epoch": 0.8230654154766762,
|
10597 |
+
"grad_norm": 1.3118342161178589,
|
10598 |
+
"learning_rate": 8.382635745959988e-06,
|
10599 |
+
"loss": 0.1014,
|
10600 |
+
"step": 1513
|
10601 |
+
},
|
10602 |
+
{
|
10603 |
+
"epoch": 0.823609411124711,
|
10604 |
+
"grad_norm": 1.6488131284713745,
|
10605 |
+
"learning_rate": 8.332610503309047e-06,
|
10606 |
+
"loss": 0.1935,
|
10607 |
+
"step": 1514
|
10608 |
+
},
|
10609 |
+
{
|
10610 |
+
"epoch": 0.8241534067727458,
|
10611 |
+
"grad_norm": 1.5959988832473755,
|
10612 |
+
"learning_rate": 8.282721403990084e-06,
|
10613 |
+
"loss": 0.11,
|
10614 |
+
"step": 1515
|
10615 |
+
},
|
10616 |
+
{
|
10617 |
+
"epoch": 0.8246974024207806,
|
10618 |
+
"grad_norm": 1.6436313390731812,
|
10619 |
+
"learning_rate": 8.232968611009873e-06,
|
10620 |
+
"loss": 0.1112,
|
10621 |
+
"step": 1516
|
10622 |
+
},
|
10623 |
+
{
|
10624 |
+
"epoch": 0.8252413980688155,
|
10625 |
+
"grad_norm": 1.7652490139007568,
|
10626 |
+
"learning_rate": 8.183352286929847e-06,
|
10627 |
+
"loss": 0.1492,
|
10628 |
+
"step": 1517
|
10629 |
+
},
|
10630 |
+
{
|
10631 |
+
"epoch": 0.8257853937168502,
|
10632 |
+
"grad_norm": 1.386525273323059,
|
10633 |
+
"learning_rate": 8.133872593865572e-06,
|
10634 |
+
"loss": 0.0786,
|
10635 |
+
"step": 1518
|
10636 |
+
},
|
10637 |
+
{
|
10638 |
+
"epoch": 0.8263293893648851,
|
10639 |
+
"grad_norm": 1.4787259101867676,
|
10640 |
+
"learning_rate": 8.08452969348617e-06,
|
10641 |
+
"loss": 0.0764,
|
10642 |
+
"step": 1519
|
10643 |
+
},
|
10644 |
+
{
|
10645 |
+
"epoch": 0.8268733850129198,
|
10646 |
+
"grad_norm": 1.336531639099121,
|
10647 |
+
"learning_rate": 8.035323747013812e-06,
|
10648 |
+
"loss": 0.0872,
|
10649 |
+
"step": 1520
|
10650 |
+
},
|
10651 |
+
{
|
10652 |
+
"epoch": 0.8274173806609547,
|
10653 |
+
"grad_norm": 1.3021833896636963,
|
10654 |
+
"learning_rate": 7.986254915223185e-06,
|
10655 |
+
"loss": 0.0629,
|
10656 |
+
"step": 1521
|
10657 |
+
},
|
10658 |
+
{
|
10659 |
+
"epoch": 0.8279613763089896,
|
10660 |
+
"grad_norm": 1.1365309953689575,
|
10661 |
+
"learning_rate": 7.937323358440935e-06,
|
10662 |
+
"loss": 0.0637,
|
10663 |
+
"step": 1522
|
10664 |
+
},
|
10665 |
+
{
|
10666 |
+
"epoch": 0.8285053719570243,
|
10667 |
+
"grad_norm": 2.3605737686157227,
|
10668 |
+
"learning_rate": 7.888529236545267e-06,
|
10669 |
+
"loss": 0.1593,
|
10670 |
+
"step": 1523
|
10671 |
+
},
|
10672 |
+
{
|
10673 |
+
"epoch": 0.8290493676050592,
|
10674 |
+
"grad_norm": 1.8843390941619873,
|
10675 |
+
"learning_rate": 7.839872708965257e-06,
|
10676 |
+
"loss": 0.0917,
|
10677 |
+
"step": 1524
|
10678 |
+
},
|
10679 |
+
{
|
10680 |
+
"epoch": 0.829593363253094,
|
10681 |
+
"grad_norm": 1.4369595050811768,
|
10682 |
+
"learning_rate": 7.791353934680413e-06,
|
10683 |
+
"loss": 0.0753,
|
10684 |
+
"step": 1525
|
10685 |
+
},
|
10686 |
+
{
|
10687 |
+
"epoch": 0.8301373589011288,
|
10688 |
+
"grad_norm": 2.1893367767333984,
|
10689 |
+
"learning_rate": 7.742973072220177e-06,
|
10690 |
+
"loss": 0.1629,
|
10691 |
+
"step": 1526
|
10692 |
+
},
|
10693 |
+
{
|
10694 |
+
"epoch": 0.8306813545491636,
|
10695 |
+
"grad_norm": 1.5978200435638428,
|
10696 |
+
"learning_rate": 7.694730279663375e-06,
|
10697 |
+
"loss": 0.0499,
|
10698 |
+
"step": 1527
|
10699 |
+
},
|
10700 |
+
{
|
10701 |
+
"epoch": 0.8312253501971985,
|
10702 |
+
"grad_norm": 0.9856419563293457,
|
10703 |
+
"learning_rate": 7.646625714637712e-06,
|
10704 |
+
"loss": 0.0342,
|
10705 |
+
"step": 1528
|
10706 |
+
},
|
10707 |
+
{
|
10708 |
+
"epoch": 0.8317693458452332,
|
10709 |
+
"grad_norm": 1.908694863319397,
|
10710 |
+
"learning_rate": 7.59865953431923e-06,
|
10711 |
+
"loss": 0.1142,
|
10712 |
+
"step": 1529
|
10713 |
+
},
|
10714 |
+
{
|
10715 |
+
"epoch": 0.8323133414932681,
|
10716 |
+
"grad_norm": 2.624952793121338,
|
10717 |
+
"learning_rate": 7.550831895431798e-06,
|
10718 |
+
"loss": 0.3445,
|
10719 |
+
"step": 1530
|
10720 |
+
},
|
10721 |
+
{
|
10722 |
+
"epoch": 0.8328573371413028,
|
10723 |
+
"grad_norm": 1.5002707242965698,
|
10724 |
+
"learning_rate": 7.503142954246695e-06,
|
10725 |
+
"loss": 0.0832,
|
10726 |
+
"step": 1531
|
10727 |
+
},
|
10728 |
+
{
|
10729 |
+
"epoch": 0.8334013327893377,
|
10730 |
+
"grad_norm": 0.8018018007278442,
|
10731 |
+
"learning_rate": 7.455592866581929e-06,
|
10732 |
+
"loss": 0.0316,
|
10733 |
+
"step": 1532
|
10734 |
+
},
|
10735 |
+
{
|
10736 |
+
"epoch": 0.8339453284373725,
|
10737 |
+
"grad_norm": 1.603079080581665,
|
10738 |
+
"learning_rate": 7.4081817878018825e-06,
|
10739 |
+
"loss": 0.122,
|
10740 |
+
"step": 1533
|
10741 |
+
},
|
10742 |
+
{
|
10743 |
+
"epoch": 0.8344893240854073,
|
10744 |
+
"grad_norm": 4.352491855621338,
|
10745 |
+
"learning_rate": 7.360909872816724e-06,
|
10746 |
+
"loss": 0.1148,
|
10747 |
+
"step": 1534
|
10748 |
+
},
|
10749 |
+
{
|
10750 |
+
"epoch": 0.8350333197334421,
|
10751 |
+
"grad_norm": 1.8911892175674438,
|
10752 |
+
"learning_rate": 7.313777276081879e-06,
|
10753 |
+
"loss": 0.1193,
|
10754 |
+
"step": 1535
|
10755 |
+
},
|
10756 |
+
{
|
10757 |
+
"epoch": 0.835577315381477,
|
10758 |
+
"grad_norm": 1.1270118951797485,
|
10759 |
+
"learning_rate": 7.266784151597667e-06,
|
10760 |
+
"loss": 0.0457,
|
10761 |
+
"step": 1536
|
10762 |
+
},
|
10763 |
+
{
|
10764 |
+
"epoch": 0.8361213110295118,
|
10765 |
+
"grad_norm": 2.0327908992767334,
|
10766 |
+
"learning_rate": 7.219930652908602e-06,
|
10767 |
+
"loss": 0.0786,
|
10768 |
+
"step": 1537
|
10769 |
+
},
|
10770 |
+
{
|
10771 |
+
"epoch": 0.8366653066775466,
|
10772 |
+
"grad_norm": 2.7913618087768555,
|
10773 |
+
"learning_rate": 7.173216933103022e-06,
|
10774 |
+
"loss": 0.2529,
|
10775 |
+
"step": 1538
|
10776 |
+
},
|
10777 |
+
{
|
10778 |
+
"epoch": 0.8372093023255814,
|
10779 |
+
"grad_norm": 1.7808500528335571,
|
10780 |
+
"learning_rate": 7.126643144812556e-06,
|
10781 |
+
"loss": 0.1105,
|
10782 |
+
"step": 1539
|
10783 |
+
},
|
10784 |
+
{
|
10785 |
+
"epoch": 0.8377532979736162,
|
10786 |
+
"grad_norm": 2.997976303100586,
|
10787 |
+
"learning_rate": 7.080209440211627e-06,
|
10788 |
+
"loss": 0.2288,
|
10789 |
+
"step": 1540
|
10790 |
+
},
|
10791 |
+
{
|
10792 |
+
"epoch": 0.8382972936216511,
|
10793 |
+
"grad_norm": 1.3418859243392944,
|
10794 |
+
"learning_rate": 7.033915971016952e-06,
|
10795 |
+
"loss": 0.0556,
|
10796 |
+
"step": 1541
|
10797 |
+
},
|
10798 |
+
{
|
10799 |
+
"epoch": 0.8388412892696858,
|
10800 |
+
"grad_norm": 2.080580234527588,
|
10801 |
+
"learning_rate": 6.9877628884870315e-06,
|
10802 |
+
"loss": 0.1315,
|
10803 |
+
"step": 1542
|
10804 |
+
},
|
10805 |
+
{
|
10806 |
+
"epoch": 0.8393852849177207,
|
10807 |
+
"grad_norm": 2.3703489303588867,
|
10808 |
+
"learning_rate": 6.941750343421655e-06,
|
10809 |
+
"loss": 0.162,
|
10810 |
+
"step": 1543
|
10811 |
+
},
|
10812 |
+
{
|
10813 |
+
"epoch": 0.8399292805657554,
|
10814 |
+
"grad_norm": 1.6766456365585327,
|
10815 |
+
"learning_rate": 6.895878486161483e-06,
|
10816 |
+
"loss": 0.086,
|
10817 |
+
"step": 1544
|
10818 |
+
},
|
10819 |
+
{
|
10820 |
+
"epoch": 0.8404732762137903,
|
10821 |
+
"grad_norm": 3.4833881855010986,
|
10822 |
+
"learning_rate": 6.850147466587437e-06,
|
10823 |
+
"loss": 0.3433,
|
10824 |
+
"step": 1545
|
10825 |
+
},
|
10826 |
+
{
|
10827 |
+
"epoch": 0.8410172718618251,
|
10828 |
+
"grad_norm": 3.487314462661743,
|
10829 |
+
"learning_rate": 6.804557434120268e-06,
|
10830 |
+
"loss": 0.1398,
|
10831 |
+
"step": 1546
|
10832 |
+
},
|
10833 |
+
{
|
10834 |
+
"epoch": 0.8415612675098599,
|
10835 |
+
"grad_norm": 2.505648612976074,
|
10836 |
+
"learning_rate": 6.759108537720104e-06,
|
10837 |
+
"loss": 0.1135,
|
10838 |
+
"step": 1547
|
10839 |
+
},
|
10840 |
+
{
|
10841 |
+
"epoch": 0.8421052631578947,
|
10842 |
+
"grad_norm": 0.9417368769645691,
|
10843 |
+
"learning_rate": 6.713800925885905e-06,
|
10844 |
+
"loss": 0.0266,
|
10845 |
+
"step": 1548
|
10846 |
+
},
|
10847 |
+
{
|
10848 |
+
"epoch": 0.8426492588059296,
|
10849 |
+
"grad_norm": 2.6039652824401855,
|
10850 |
+
"learning_rate": 6.668634746655023e-06,
|
10851 |
+
"loss": 0.1447,
|
10852 |
+
"step": 1549
|
10853 |
+
},
|
10854 |
+
{
|
10855 |
+
"epoch": 0.8431932544539643,
|
10856 |
+
"grad_norm": 0.7121729254722595,
|
10857 |
+
"learning_rate": 6.623610147602655e-06,
|
10858 |
+
"loss": 0.0089,
|
10859 |
+
"step": 1550
|
10860 |
+
},
|
10861 |
+
{
|
10862 |
+
"epoch": 0.8437372501019992,
|
10863 |
+
"grad_norm": 0.923882246017456,
|
10864 |
+
"learning_rate": 6.578727275841412e-06,
|
10865 |
+
"loss": 0.2194,
|
10866 |
+
"step": 1551
|
10867 |
+
},
|
10868 |
+
{
|
10869 |
+
"epoch": 0.844281245750034,
|
10870 |
+
"grad_norm": 0.7369918823242188,
|
10871 |
+
"learning_rate": 6.533986278020876e-06,
|
10872 |
+
"loss": 0.1086,
|
10873 |
+
"step": 1552
|
10874 |
+
},
|
10875 |
+
{
|
10876 |
+
"epoch": 0.8448252413980688,
|
10877 |
+
"grad_norm": 1.1742249727249146,
|
10878 |
+
"learning_rate": 6.489387300327016e-06,
|
10879 |
+
"loss": 0.1254,
|
10880 |
+
"step": 1553
|
10881 |
+
},
|
10882 |
+
{
|
10883 |
+
"epoch": 0.8453692370461037,
|
10884 |
+
"grad_norm": 1.4827868938446045,
|
10885 |
+
"learning_rate": 6.444930488481771e-06,
|
10886 |
+
"loss": 0.1951,
|
10887 |
+
"step": 1554
|
10888 |
+
},
|
10889 |
+
{
|
10890 |
+
"epoch": 0.8459132326941384,
|
10891 |
+
"grad_norm": 0.9186825752258301,
|
10892 |
+
"learning_rate": 6.400615987742603e-06,
|
10893 |
+
"loss": 0.0496,
|
10894 |
+
"step": 1555
|
10895 |
+
},
|
10896 |
+
{
|
10897 |
+
"epoch": 0.8464572283421733,
|
10898 |
+
"grad_norm": 1.2773072719573975,
|
10899 |
+
"learning_rate": 6.356443942901968e-06,
|
10900 |
+
"loss": 0.1205,
|
10901 |
+
"step": 1556
|
10902 |
+
},
|
10903 |
+
{
|
10904 |
+
"epoch": 0.847001223990208,
|
10905 |
+
"grad_norm": 1.5452393293380737,
|
10906 |
+
"learning_rate": 6.312414498286878e-06,
|
10907 |
+
"loss": 0.1829,
|
10908 |
+
"step": 1557
|
10909 |
+
},
|
10910 |
+
{
|
10911 |
+
"epoch": 0.8475452196382429,
|
10912 |
+
"grad_norm": 1.5889948606491089,
|
10913 |
+
"learning_rate": 6.2685277977583885e-06,
|
10914 |
+
"loss": 0.1293,
|
10915 |
+
"step": 1558
|
10916 |
+
},
|
10917 |
+
{
|
10918 |
+
"epoch": 0.8480892152862777,
|
10919 |
+
"grad_norm": 0.6459242105484009,
|
10920 |
+
"learning_rate": 6.2247839847111575e-06,
|
10921 |
+
"loss": 0.0508,
|
10922 |
+
"step": 1559
|
10923 |
+
},
|
10924 |
+
{
|
10925 |
+
"epoch": 0.8486332109343125,
|
10926 |
+
"grad_norm": 1.140173316001892,
|
10927 |
+
"learning_rate": 6.181183202073026e-06,
|
10928 |
+
"loss": 0.0899,
|
10929 |
+
"step": 1560
|
10930 |
+
},
|
10931 |
+
{
|
10932 |
+
"epoch": 0.8491772065823473,
|
10933 |
+
"grad_norm": 1.5784512758255005,
|
10934 |
+
"learning_rate": 6.137725592304444e-06,
|
10935 |
+
"loss": 0.1523,
|
10936 |
+
"step": 1561
|
10937 |
+
},
|
10938 |
+
{
|
10939 |
+
"epoch": 0.8497212022303822,
|
10940 |
+
"grad_norm": 2.3454713821411133,
|
10941 |
+
"learning_rate": 6.094411297398073e-06,
|
10942 |
+
"loss": 0.1878,
|
10943 |
+
"step": 1562
|
10944 |
+
},
|
10945 |
+
{
|
10946 |
+
"epoch": 0.8502651978784169,
|
10947 |
+
"grad_norm": 1.3627198934555054,
|
10948 |
+
"learning_rate": 6.051240458878315e-06,
|
10949 |
+
"loss": 0.1295,
|
10950 |
+
"step": 1563
|
10951 |
+
},
|
10952 |
+
{
|
10953 |
+
"epoch": 0.8508091935264518,
|
10954 |
+
"grad_norm": 2.2635772228240967,
|
10955 |
+
"learning_rate": 6.008213217800851e-06,
|
10956 |
+
"loss": 0.1888,
|
10957 |
+
"step": 1564
|
10958 |
+
},
|
10959 |
+
{
|
10960 |
+
"epoch": 0.8513531891744867,
|
10961 |
+
"grad_norm": 1.515532374382019,
|
10962 |
+
"learning_rate": 5.9653297147521884e-06,
|
10963 |
+
"loss": 0.0859,
|
10964 |
+
"step": 1565
|
10965 |
+
},
|
10966 |
+
{
|
10967 |
+
"epoch": 0.8518971848225214,
|
10968 |
+
"grad_norm": 2.1630115509033203,
|
10969 |
+
"learning_rate": 5.922590089849145e-06,
|
10970 |
+
"loss": 0.2151,
|
10971 |
+
"step": 1566
|
10972 |
+
},
|
10973 |
+
{
|
10974 |
+
"epoch": 0.8524411804705563,
|
10975 |
+
"grad_norm": 1.3857431411743164,
|
10976 |
+
"learning_rate": 5.879994482738443e-06,
|
10977 |
+
"loss": 0.1154,
|
10978 |
+
"step": 1567
|
10979 |
+
},
|
10980 |
+
{
|
10981 |
+
"epoch": 0.852985176118591,
|
10982 |
+
"grad_norm": 1.0906001329421997,
|
10983 |
+
"learning_rate": 5.837543032596293e-06,
|
10984 |
+
"loss": 0.0681,
|
10985 |
+
"step": 1568
|
10986 |
+
},
|
10987 |
+
{
|
10988 |
+
"epoch": 0.8535291717666259,
|
10989 |
+
"grad_norm": 1.3181507587432861,
|
10990 |
+
"learning_rate": 5.795235878127842e-06,
|
10991 |
+
"loss": 0.0724,
|
10992 |
+
"step": 1569
|
10993 |
+
},
|
10994 |
+
{
|
10995 |
+
"epoch": 0.8540731674146607,
|
10996 |
+
"grad_norm": 2.039085865020752,
|
10997 |
+
"learning_rate": 5.753073157566763e-06,
|
10998 |
+
"loss": 0.1496,
|
10999 |
+
"step": 1570
|
11000 |
+
},
|
11001 |
+
{
|
11002 |
+
"epoch": 0.8546171630626955,
|
11003 |
+
"grad_norm": 1.201068639755249,
|
11004 |
+
"learning_rate": 5.711055008674837e-06,
|
11005 |
+
"loss": 0.0706,
|
11006 |
+
"step": 1571
|
11007 |
+
},
|
11008 |
+
{
|
11009 |
+
"epoch": 0.8551611587107303,
|
11010 |
+
"grad_norm": 2.1874308586120605,
|
11011 |
+
"learning_rate": 5.66918156874146e-06,
|
11012 |
+
"loss": 0.2013,
|
11013 |
+
"step": 1572
|
11014 |
+
},
|
11015 |
+
{
|
11016 |
+
"epoch": 0.8557051543587652,
|
11017 |
+
"grad_norm": 2.9349591732025146,
|
11018 |
+
"learning_rate": 5.627452974583219e-06,
|
11019 |
+
"loss": 0.2482,
|
11020 |
+
"step": 1573
|
11021 |
+
},
|
11022 |
+
{
|
11023 |
+
"epoch": 0.8562491500067999,
|
11024 |
+
"grad_norm": 1.8207327127456665,
|
11025 |
+
"learning_rate": 5.585869362543416e-06,
|
11026 |
+
"loss": 0.107,
|
11027 |
+
"step": 1574
|
11028 |
+
},
|
11029 |
+
{
|
11030 |
+
"epoch": 0.8567931456548348,
|
11031 |
+
"grad_norm": 4.597315311431885,
|
11032 |
+
"learning_rate": 5.544430868491629e-06,
|
11033 |
+
"loss": 0.1733,
|
11034 |
+
"step": 1575
|
11035 |
+
},
|
11036 |
+
{
|
11037 |
+
"epoch": 0.8573371413028695,
|
11038 |
+
"grad_norm": 1.7532278299331665,
|
11039 |
+
"learning_rate": 5.503137627823341e-06,
|
11040 |
+
"loss": 0.1138,
|
11041 |
+
"step": 1576
|
11042 |
+
},
|
11043 |
+
{
|
11044 |
+
"epoch": 0.8578811369509044,
|
11045 |
+
"grad_norm": 1.958823561668396,
|
11046 |
+
"learning_rate": 5.461989775459381e-06,
|
11047 |
+
"loss": 0.1315,
|
11048 |
+
"step": 1577
|
11049 |
+
},
|
11050 |
+
{
|
11051 |
+
"epoch": 0.8584251325989392,
|
11052 |
+
"grad_norm": 1.6528518199920654,
|
11053 |
+
"learning_rate": 5.420987445845532e-06,
|
11054 |
+
"loss": 0.1118,
|
11055 |
+
"step": 1578
|
11056 |
+
},
|
11057 |
+
{
|
11058 |
+
"epoch": 0.858969128246974,
|
11059 |
+
"grad_norm": 1.382939338684082,
|
11060 |
+
"learning_rate": 5.380130772952147e-06,
|
11061 |
+
"loss": 0.0782,
|
11062 |
+
"step": 1579
|
11063 |
+
},
|
11064 |
+
{
|
11065 |
+
"epoch": 0.8595131238950089,
|
11066 |
+
"grad_norm": 3.080315113067627,
|
11067 |
+
"learning_rate": 5.339419890273622e-06,
|
11068 |
+
"loss": 0.3002,
|
11069 |
+
"step": 1580
|
11070 |
+
},
|
11071 |
+
{
|
11072 |
+
"epoch": 0.8600571195430436,
|
11073 |
+
"grad_norm": 2.677290678024292,
|
11074 |
+
"learning_rate": 5.298854930828029e-06,
|
11075 |
+
"loss": 0.1951,
|
11076 |
+
"step": 1581
|
11077 |
+
},
|
11078 |
+
{
|
11079 |
+
"epoch": 0.8606011151910785,
|
11080 |
+
"grad_norm": 2.089850664138794,
|
11081 |
+
"learning_rate": 5.258436027156632e-06,
|
11082 |
+
"loss": 0.1763,
|
11083 |
+
"step": 1582
|
11084 |
+
},
|
11085 |
+
{
|
11086 |
+
"epoch": 0.8611451108391133,
|
11087 |
+
"grad_norm": 1.5543582439422607,
|
11088 |
+
"learning_rate": 5.218163311323471e-06,
|
11089 |
+
"loss": 0.0863,
|
11090 |
+
"step": 1583
|
11091 |
+
},
|
11092 |
+
{
|
11093 |
+
"epoch": 0.8616891064871481,
|
11094 |
+
"grad_norm": 3.3499083518981934,
|
11095 |
+
"learning_rate": 5.1780369149149464e-06,
|
11096 |
+
"loss": 0.1678,
|
11097 |
+
"step": 1584
|
11098 |
+
},
|
11099 |
+
{
|
11100 |
+
"epoch": 0.8622331021351829,
|
11101 |
+
"grad_norm": 1.8637248277664185,
|
11102 |
+
"learning_rate": 5.1380569690393846e-06,
|
11103 |
+
"loss": 0.093,
|
11104 |
+
"step": 1585
|
11105 |
+
},
|
11106 |
+
{
|
11107 |
+
"epoch": 0.8627770977832178,
|
11108 |
+
"grad_norm": 3.096461296081543,
|
11109 |
+
"learning_rate": 5.098223604326597e-06,
|
11110 |
+
"loss": 0.1397,
|
11111 |
+
"step": 1586
|
11112 |
+
},
|
11113 |
+
{
|
11114 |
+
"epoch": 0.8633210934312525,
|
11115 |
+
"grad_norm": 3.825195074081421,
|
11116 |
+
"learning_rate": 5.058536950927445e-06,
|
11117 |
+
"loss": 0.3308,
|
11118 |
+
"step": 1587
|
11119 |
+
},
|
11120 |
+
{
|
11121 |
+
"epoch": 0.8638650890792874,
|
11122 |
+
"grad_norm": 1.1091595888137817,
|
11123 |
+
"learning_rate": 5.018997138513421e-06,
|
11124 |
+
"loss": 0.0389,
|
11125 |
+
"step": 1588
|
11126 |
+
},
|
11127 |
+
{
|
11128 |
+
"epoch": 0.8644090847273221,
|
11129 |
+
"grad_norm": 1.737465262413025,
|
11130 |
+
"learning_rate": 4.979604296276274e-06,
|
11131 |
+
"loss": 0.0799,
|
11132 |
+
"step": 1589
|
11133 |
+
},
|
11134 |
+
{
|
11135 |
+
"epoch": 0.864953080375357,
|
11136 |
+
"grad_norm": 2.6226816177368164,
|
11137 |
+
"learning_rate": 4.940358552927515e-06,
|
11138 |
+
"loss": 0.0915,
|
11139 |
+
"step": 1590
|
11140 |
+
},
|
11141 |
+
{
|
11142 |
+
"epoch": 0.8654970760233918,
|
11143 |
+
"grad_norm": 2.10581111907959,
|
11144 |
+
"learning_rate": 4.901260036698008e-06,
|
11145 |
+
"loss": 0.1097,
|
11146 |
+
"step": 1591
|
11147 |
+
},
|
11148 |
+
{
|
11149 |
+
"epoch": 0.8660410716714266,
|
11150 |
+
"grad_norm": 2.790656328201294,
|
11151 |
+
"learning_rate": 4.862308875337606e-06,
|
11152 |
+
"loss": 0.1321,
|
11153 |
+
"step": 1592
|
11154 |
+
},
|
11155 |
+
{
|
11156 |
+
"epoch": 0.8665850673194615,
|
11157 |
+
"grad_norm": 1.890804409980774,
|
11158 |
+
"learning_rate": 4.823505196114686e-06,
|
11159 |
+
"loss": 0.1058,
|
11160 |
+
"step": 1593
|
11161 |
+
},
|
11162 |
+
{
|
11163 |
+
"epoch": 0.8671290629674963,
|
11164 |
+
"grad_norm": 2.415065050125122,
|
11165 |
+
"learning_rate": 4.784849125815743e-06,
|
11166 |
+
"loss": 0.1026,
|
11167 |
+
"step": 1594
|
11168 |
+
},
|
11169 |
+
{
|
11170 |
+
"epoch": 0.8676730586155311,
|
11171 |
+
"grad_norm": 2.2980716228485107,
|
11172 |
+
"learning_rate": 4.746340790744969e-06,
|
11173 |
+
"loss": 0.0767,
|
11174 |
+
"step": 1595
|
11175 |
+
},
|
11176 |
+
{
|
11177 |
+
"epoch": 0.8682170542635659,
|
11178 |
+
"grad_norm": 2.5799341201782227,
|
11179 |
+
"learning_rate": 4.707980316723837e-06,
|
11180 |
+
"loss": 0.1155,
|
11181 |
+
"step": 1596
|
11182 |
+
},
|
11183 |
+
{
|
11184 |
+
"epoch": 0.8687610499116007,
|
11185 |
+
"grad_norm": 2.50065016746521,
|
11186 |
+
"learning_rate": 4.669767829090748e-06,
|
11187 |
+
"loss": 0.144,
|
11188 |
+
"step": 1597
|
11189 |
+
},
|
11190 |
+
{
|
11191 |
+
"epoch": 0.8693050455596355,
|
11192 |
+
"grad_norm": 1.8984285593032837,
|
11193 |
+
"learning_rate": 4.631703452700542e-06,
|
11194 |
+
"loss": 0.0851,
|
11195 |
+
"step": 1598
|
11196 |
+
},
|
11197 |
+
{
|
11198 |
+
"epoch": 0.8698490412076704,
|
11199 |
+
"grad_norm": 1.9078527688980103,
|
11200 |
+
"learning_rate": 4.593787311924103e-06,
|
11201 |
+
"loss": 0.0474,
|
11202 |
+
"step": 1599
|
11203 |
+
},
|
11204 |
+
{
|
11205 |
+
"epoch": 0.8703930368557051,
|
11206 |
+
"grad_norm": 2.077728271484375,
|
11207 |
+
"learning_rate": 4.556019530648009e-06,
|
11208 |
+
"loss": 0.0833,
|
11209 |
+
"step": 1600
|
11210 |
+
},
|
11211 |
+
{
|
11212 |
+
"epoch": 0.87093703250374,
|
11213 |
+
"grad_norm": 0.9673436880111694,
|
11214 |
+
"learning_rate": 4.5184002322740785e-06,
|
11215 |
+
"loss": 0.337,
|
11216 |
+
"step": 1601
|
11217 |
+
},
|
11218 |
+
{
|
11219 |
+
"epoch": 0.8714810281517748,
|
11220 |
+
"grad_norm": 0.8802148103713989,
|
11221 |
+
"learning_rate": 4.480929539718986e-06,
|
11222 |
+
"loss": 0.2077,
|
11223 |
+
"step": 1602
|
11224 |
+
},
|
11225 |
+
{
|
11226 |
+
"epoch": 0.8720250237998096,
|
11227 |
+
"grad_norm": 1.0483946800231934,
|
11228 |
+
"learning_rate": 4.4436075754138384e-06,
|
11229 |
+
"loss": 0.2234,
|
11230 |
+
"step": 1603
|
11231 |
+
},
|
11232 |
+
{
|
11233 |
+
"epoch": 0.8725690194478444,
|
11234 |
+
"grad_norm": 1.032644271850586,
|
11235 |
+
"learning_rate": 4.406434461303782e-06,
|
11236 |
+
"loss": 0.1506,
|
11237 |
+
"step": 1604
|
11238 |
+
},
|
11239 |
+
{
|
11240 |
+
"epoch": 0.8731130150958792,
|
11241 |
+
"grad_norm": 0.8552057147026062,
|
11242 |
+
"learning_rate": 4.369410318847661e-06,
|
11243 |
+
"loss": 0.0722,
|
11244 |
+
"step": 1605
|
11245 |
+
},
|
11246 |
+
{
|
11247 |
+
"epoch": 0.873657010743914,
|
11248 |
+
"grad_norm": 0.9704299569129944,
|
11249 |
+
"learning_rate": 4.332535269017518e-06,
|
11250 |
+
"loss": 0.064,
|
11251 |
+
"step": 1606
|
11252 |
+
},
|
11253 |
+
{
|
11254 |
+
"epoch": 0.8742010063919489,
|
11255 |
+
"grad_norm": 0.8319167494773865,
|
11256 |
+
"learning_rate": 4.29580943229827e-06,
|
11257 |
+
"loss": 0.0512,
|
11258 |
+
"step": 1607
|
11259 |
+
},
|
11260 |
+
{
|
11261 |
+
"epoch": 0.8747450020399837,
|
11262 |
+
"grad_norm": 0.9854416847229004,
|
11263 |
+
"learning_rate": 4.259232928687318e-06,
|
11264 |
+
"loss": 0.0557,
|
11265 |
+
"step": 1608
|
11266 |
+
},
|
11267 |
+
{
|
11268 |
+
"epoch": 0.8752889976880185,
|
11269 |
+
"grad_norm": 1.2387385368347168,
|
11270 |
+
"learning_rate": 4.2228058776941025e-06,
|
11271 |
+
"loss": 0.1301,
|
11272 |
+
"step": 1609
|
11273 |
+
},
|
11274 |
+
{
|
11275 |
+
"epoch": 0.8758329933360534,
|
11276 |
+
"grad_norm": 1.2359501123428345,
|
11277 |
+
"learning_rate": 4.186528398339784e-06,
|
11278 |
+
"loss": 0.1337,
|
11279 |
+
"step": 1610
|
11280 |
+
},
|
11281 |
+
{
|
11282 |
+
"epoch": 0.8763769889840881,
|
11283 |
+
"grad_norm": 0.8162487745285034,
|
11284 |
+
"learning_rate": 4.150400609156774e-06,
|
11285 |
+
"loss": 0.0688,
|
11286 |
+
"step": 1611
|
11287 |
+
},
|
11288 |
+
{
|
11289 |
+
"epoch": 0.876920984632123,
|
11290 |
+
"grad_norm": 2.2920849323272705,
|
11291 |
+
"learning_rate": 4.1144226281883965e-06,
|
11292 |
+
"loss": 0.2257,
|
11293 |
+
"step": 1612
|
11294 |
+
},
|
11295 |
+
{
|
11296 |
+
"epoch": 0.8774649802801577,
|
11297 |
+
"grad_norm": 0.9135187864303589,
|
11298 |
+
"learning_rate": 4.078594572988537e-06,
|
11299 |
+
"loss": 0.0623,
|
11300 |
+
"step": 1613
|
11301 |
+
},
|
11302 |
+
{
|
11303 |
+
"epoch": 0.8780089759281926,
|
11304 |
+
"grad_norm": 1.0081349611282349,
|
11305 |
+
"learning_rate": 4.042916560621163e-06,
|
11306 |
+
"loss": 0.0411,
|
11307 |
+
"step": 1614
|
11308 |
+
},
|
11309 |
+
{
|
11310 |
+
"epoch": 0.8785529715762274,
|
11311 |
+
"grad_norm": 2.3004424571990967,
|
11312 |
+
"learning_rate": 4.007388707660015e-06,
|
11313 |
+
"loss": 0.231,
|
11314 |
+
"step": 1615
|
11315 |
+
},
|
11316 |
+
{
|
11317 |
+
"epoch": 0.8790969672242622,
|
11318 |
+
"grad_norm": 1.449098825454712,
|
11319 |
+
"learning_rate": 3.972011130188208e-06,
|
11320 |
+
"loss": 0.0681,
|
11321 |
+
"step": 1616
|
11322 |
+
},
|
11323 |
+
{
|
11324 |
+
"epoch": 0.879640962872297,
|
11325 |
+
"grad_norm": 1.9416675567626953,
|
11326 |
+
"learning_rate": 3.93678394379785e-06,
|
11327 |
+
"loss": 0.0734,
|
11328 |
+
"step": 1617
|
11329 |
+
},
|
11330 |
+
{
|
11331 |
+
"epoch": 0.8801849585203319,
|
11332 |
+
"grad_norm": 2.1252365112304688,
|
11333 |
+
"learning_rate": 3.901707263589671e-06,
|
11334 |
+
"loss": 0.1672,
|
11335 |
+
"step": 1618
|
11336 |
+
},
|
11337 |
+
{
|
11338 |
+
"epoch": 0.8807289541683666,
|
11339 |
+
"grad_norm": 1.9904075860977173,
|
11340 |
+
"learning_rate": 3.866781204172615e-06,
|
11341 |
+
"loss": 0.1518,
|
11342 |
+
"step": 1619
|
11343 |
+
},
|
11344 |
+
{
|
11345 |
+
"epoch": 0.8812729498164015,
|
11346 |
+
"grad_norm": 1.4934519529342651,
|
11347 |
+
"learning_rate": 3.832005879663492e-06,
|
11348 |
+
"loss": 0.1005,
|
11349 |
+
"step": 1620
|
11350 |
+
},
|
11351 |
+
{
|
11352 |
+
"epoch": 0.8818169454644362,
|
11353 |
+
"grad_norm": 0.9808096885681152,
|
11354 |
+
"learning_rate": 3.797381403686656e-06,
|
11355 |
+
"loss": 0.0542,
|
11356 |
+
"step": 1621
|
11357 |
+
},
|
11358 |
+
{
|
11359 |
+
"epoch": 0.8823609411124711,
|
11360 |
+
"grad_norm": 1.6409112215042114,
|
11361 |
+
"learning_rate": 3.7629078893735124e-06,
|
11362 |
+
"loss": 0.1114,
|
11363 |
+
"step": 1622
|
11364 |
+
},
|
11365 |
+
{
|
11366 |
+
"epoch": 0.882904936760506,
|
11367 |
+
"grad_norm": 1.5050103664398193,
|
11368 |
+
"learning_rate": 3.7285854493622428e-06,
|
11369 |
+
"loss": 0.061,
|
11370 |
+
"step": 1623
|
11371 |
+
},
|
11372 |
+
{
|
11373 |
+
"epoch": 0.8834489324085407,
|
11374 |
+
"grad_norm": 1.97392737865448,
|
11375 |
+
"learning_rate": 3.6944141957974244e-06,
|
11376 |
+
"loss": 0.1131,
|
11377 |
+
"step": 1624
|
11378 |
+
},
|
11379 |
+
{
|
11380 |
+
"epoch": 0.8839929280565756,
|
11381 |
+
"grad_norm": 1.9159128665924072,
|
11382 |
+
"learning_rate": 3.6603942403296343e-06,
|
11383 |
+
"loss": 0.1297,
|
11384 |
+
"step": 1625
|
11385 |
+
},
|
11386 |
+
{
|
11387 |
+
"epoch": 0.8845369237046103,
|
11388 |
+
"grad_norm": 1.322691798210144,
|
11389 |
+
"learning_rate": 3.626525694115124e-06,
|
11390 |
+
"loss": 0.0346,
|
11391 |
+
"step": 1626
|
11392 |
+
},
|
11393 |
+
{
|
11394 |
+
"epoch": 0.8850809193526452,
|
11395 |
+
"grad_norm": 0.9318520426750183,
|
11396 |
+
"learning_rate": 3.592808667815395e-06,
|
11397 |
+
"loss": 0.044,
|
11398 |
+
"step": 1627
|
11399 |
+
},
|
11400 |
+
{
|
11401 |
+
"epoch": 0.88562491500068,
|
11402 |
+
"grad_norm": 2.2436294555664062,
|
11403 |
+
"learning_rate": 3.5592432715968902e-06,
|
11404 |
+
"loss": 0.141,
|
11405 |
+
"step": 1628
|
11406 |
+
},
|
11407 |
+
{
|
11408 |
+
"epoch": 0.8861689106487148,
|
11409 |
+
"grad_norm": 4.926000118255615,
|
11410 |
+
"learning_rate": 3.525829615130649e-06,
|
11411 |
+
"loss": 0.1275,
|
11412 |
+
"step": 1629
|
11413 |
+
},
|
11414 |
+
{
|
11415 |
+
"epoch": 0.8867129062967496,
|
11416 |
+
"grad_norm": 2.1689817905426025,
|
11417 |
+
"learning_rate": 3.4925678075918787e-06,
|
11418 |
+
"loss": 0.186,
|
11419 |
+
"step": 1630
|
11420 |
+
},
|
11421 |
+
{
|
11422 |
+
"epoch": 0.8872569019447845,
|
11423 |
+
"grad_norm": 1.034318447113037,
|
11424 |
+
"learning_rate": 3.459457957659651e-06,
|
11425 |
+
"loss": 0.0281,
|
11426 |
+
"step": 1631
|
11427 |
+
},
|
11428 |
+
{
|
11429 |
+
"epoch": 0.8878008975928192,
|
11430 |
+
"grad_norm": 1.549639105796814,
|
11431 |
+
"learning_rate": 3.426500173516539e-06,
|
11432 |
+
"loss": 0.0889,
|
11433 |
+
"step": 1632
|
11434 |
+
},
|
11435 |
+
{
|
11436 |
+
"epoch": 0.8883448932408541,
|
11437 |
+
"grad_norm": 1.6429660320281982,
|
11438 |
+
"learning_rate": 3.393694562848254e-06,
|
11439 |
+
"loss": 0.1069,
|
11440 |
+
"step": 1633
|
11441 |
+
},
|
11442 |
+
{
|
11443 |
+
"epoch": 0.8888888888888888,
|
11444 |
+
"grad_norm": 1.2664613723754883,
|
11445 |
+
"learning_rate": 3.361041232843315e-06,
|
11446 |
+
"loss": 0.054,
|
11447 |
+
"step": 1634
|
11448 |
+
},
|
11449 |
+
{
|
11450 |
+
"epoch": 0.8894328845369237,
|
11451 |
+
"grad_norm": 2.6217501163482666,
|
11452 |
+
"learning_rate": 3.32854029019265e-06,
|
11453 |
+
"loss": 0.1795,
|
11454 |
+
"step": 1635
|
11455 |
+
},
|
11456 |
+
{
|
11457 |
+
"epoch": 0.8899768801849586,
|
11458 |
+
"grad_norm": 2.5693042278289795,
|
11459 |
+
"learning_rate": 3.2961918410892966e-06,
|
11460 |
+
"loss": 0.1816,
|
11461 |
+
"step": 1636
|
11462 |
+
},
|
11463 |
+
{
|
11464 |
+
"epoch": 0.8905208758329933,
|
11465 |
+
"grad_norm": 1.3367880582809448,
|
11466 |
+
"learning_rate": 3.263995991228036e-06,
|
11467 |
+
"loss": 0.0832,
|
11468 |
+
"step": 1637
|
11469 |
+
},
|
11470 |
+
{
|
11471 |
+
"epoch": 0.8910648714810282,
|
11472 |
+
"grad_norm": 1.7241603136062622,
|
11473 |
+
"learning_rate": 3.2319528458050587e-06,
|
11474 |
+
"loss": 0.1019,
|
11475 |
+
"step": 1638
|
11476 |
+
},
|
11477 |
+
{
|
11478 |
+
"epoch": 0.891608867129063,
|
11479 |
+
"grad_norm": 2.629528760910034,
|
11480 |
+
"learning_rate": 3.200062509517604e-06,
|
11481 |
+
"loss": 0.2019,
|
11482 |
+
"step": 1639
|
11483 |
+
},
|
11484 |
+
{
|
11485 |
+
"epoch": 0.8921528627770978,
|
11486 |
+
"grad_norm": 1.4212032556533813,
|
11487 |
+
"learning_rate": 3.1683250865636114e-06,
|
11488 |
+
"loss": 0.077,
|
11489 |
+
"step": 1640
|
11490 |
+
},
|
11491 |
+
{
|
11492 |
+
"epoch": 0.8926968584251326,
|
11493 |
+
"grad_norm": 2.028712749481201,
|
11494 |
+
"learning_rate": 3.1367406806414036e-06,
|
11495 |
+
"loss": 0.091,
|
11496 |
+
"step": 1641
|
11497 |
+
},
|
11498 |
+
{
|
11499 |
+
"epoch": 0.8932408540731674,
|
11500 |
+
"grad_norm": 2.4595437049865723,
|
11501 |
+
"learning_rate": 3.1053093949493627e-06,
|
11502 |
+
"loss": 0.2446,
|
11503 |
+
"step": 1642
|
11504 |
+
},
|
11505 |
+
{
|
11506 |
+
"epoch": 0.8937848497212022,
|
11507 |
+
"grad_norm": 1.6820522546768188,
|
11508 |
+
"learning_rate": 3.074031332185534e-06,
|
11509 |
+
"loss": 0.0772,
|
11510 |
+
"step": 1643
|
11511 |
+
},
|
11512 |
+
{
|
11513 |
+
"epoch": 0.8943288453692371,
|
11514 |
+
"grad_norm": 3.932166814804077,
|
11515 |
+
"learning_rate": 3.042906594547329e-06,
|
11516 |
+
"loss": 0.3023,
|
11517 |
+
"step": 1644
|
11518 |
+
},
|
11519 |
+
{
|
11520 |
+
"epoch": 0.8948728410172718,
|
11521 |
+
"grad_norm": 2.2325611114501953,
|
11522 |
+
"learning_rate": 3.0119352837311988e-06,
|
11523 |
+
"loss": 0.1679,
|
11524 |
+
"step": 1645
|
11525 |
+
},
|
11526 |
+
{
|
11527 |
+
"epoch": 0.8954168366653067,
|
11528 |
+
"grad_norm": 1.5536819696426392,
|
11529 |
+
"learning_rate": 2.98111750093229e-06,
|
11530 |
+
"loss": 0.1157,
|
11531 |
+
"step": 1646
|
11532 |
+
},
|
11533 |
+
{
|
11534 |
+
"epoch": 0.8959608323133414,
|
11535 |
+
"grad_norm": 1.3292300701141357,
|
11536 |
+
"learning_rate": 2.9504533468441174e-06,
|
11537 |
+
"loss": 0.071,
|
11538 |
+
"step": 1647
|
11539 |
+
},
|
11540 |
+
{
|
11541 |
+
"epoch": 0.8965048279613763,
|
11542 |
+
"grad_norm": 1.8766157627105713,
|
11543 |
+
"learning_rate": 2.9199429216582195e-06,
|
11544 |
+
"loss": 0.1024,
|
11545 |
+
"step": 1648
|
11546 |
+
},
|
11547 |
+
{
|
11548 |
+
"epoch": 0.8970488236094111,
|
11549 |
+
"grad_norm": 1.938150405883789,
|
11550 |
+
"learning_rate": 2.8895863250638367e-06,
|
11551 |
+
"loss": 0.0493,
|
11552 |
+
"step": 1649
|
11553 |
+
},
|
11554 |
+
{
|
11555 |
+
"epoch": 0.8975928192574459,
|
11556 |
+
"grad_norm": 1.9663032293319702,
|
11557 |
+
"learning_rate": 2.8593836562476272e-06,
|
11558 |
+
"loss": 0.0837,
|
11559 |
+
"step": 1650
|
11560 |
+
},
|
11561 |
+
{
|
11562 |
+
"epoch": 0.8981368149054808,
|
11563 |
+
"grad_norm": 0.7369717359542847,
|
11564 |
+
"learning_rate": 2.8293350138932805e-06,
|
11565 |
+
"loss": 0.1826,
|
11566 |
+
"step": 1651
|
11567 |
+
},
|
11568 |
+
{
|
11569 |
+
"epoch": 0.8986808105535156,
|
11570 |
+
"grad_norm": 1.0037497282028198,
|
11571 |
+
"learning_rate": 2.799440496181216e-06,
|
11572 |
+
"loss": 0.2505,
|
11573 |
+
"step": 1652
|
11574 |
+
},
|
11575 |
+
{
|
11576 |
+
"epoch": 0.8992248062015504,
|
11577 |
+
"grad_norm": 1.3233697414398193,
|
11578 |
+
"learning_rate": 2.769700200788289e-06,
|
11579 |
+
"loss": 0.2648,
|
11580 |
+
"step": 1653
|
11581 |
+
},
|
11582 |
+
{
|
11583 |
+
"epoch": 0.8997688018495852,
|
11584 |
+
"grad_norm": 1.4565203189849854,
|
11585 |
+
"learning_rate": 2.7401142248874412e-06,
|
11586 |
+
"loss": 0.2416,
|
11587 |
+
"step": 1654
|
11588 |
+
},
|
11589 |
+
{
|
11590 |
+
"epoch": 0.90031279749762,
|
11591 |
+
"grad_norm": 1.293020248413086,
|
11592 |
+
"learning_rate": 2.7106826651474073e-06,
|
11593 |
+
"loss": 0.1664,
|
11594 |
+
"step": 1655
|
11595 |
+
},
|
11596 |
+
{
|
11597 |
+
"epoch": 0.9008567931456548,
|
11598 |
+
"grad_norm": 1.6501991748809814,
|
11599 |
+
"learning_rate": 2.681405617732363e-06,
|
11600 |
+
"loss": 0.157,
|
11601 |
+
"step": 1656
|
11602 |
+
},
|
11603 |
+
{
|
11604 |
+
"epoch": 0.9014007887936897,
|
11605 |
+
"grad_norm": 1.224089503288269,
|
11606 |
+
"learning_rate": 2.6522831783016345e-06,
|
11607 |
+
"loss": 0.0838,
|
11608 |
+
"step": 1657
|
11609 |
+
},
|
11610 |
+
{
|
11611 |
+
"epoch": 0.9019447844417244,
|
11612 |
+
"grad_norm": 1.2141624689102173,
|
11613 |
+
"learning_rate": 2.623315442009422e-06,
|
11614 |
+
"loss": 0.1234,
|
11615 |
+
"step": 1658
|
11616 |
+
},
|
11617 |
+
{
|
11618 |
+
"epoch": 0.9024887800897593,
|
11619 |
+
"grad_norm": 1.0604627132415771,
|
11620 |
+
"learning_rate": 2.594502503504398e-06,
|
11621 |
+
"loss": 0.1039,
|
11622 |
+
"step": 1659
|
11623 |
+
},
|
11624 |
+
{
|
11625 |
+
"epoch": 0.9030327757377941,
|
11626 |
+
"grad_norm": 1.0698707103729248,
|
11627 |
+
"learning_rate": 2.565844456929478e-06,
|
11628 |
+
"loss": 0.0927,
|
11629 |
+
"step": 1660
|
11630 |
+
},
|
11631 |
+
{
|
11632 |
+
"epoch": 0.9035767713858289,
|
11633 |
+
"grad_norm": 1.2410540580749512,
|
11634 |
+
"learning_rate": 2.537341395921494e-06,
|
11635 |
+
"loss": 0.094,
|
11636 |
+
"step": 1661
|
11637 |
+
},
|
11638 |
+
{
|
11639 |
+
"epoch": 0.9041207670338637,
|
11640 |
+
"grad_norm": 0.8609293103218079,
|
11641 |
+
"learning_rate": 2.5089934136108664e-06,
|
11642 |
+
"loss": 0.0744,
|
11643 |
+
"step": 1662
|
11644 |
+
},
|
11645 |
+
{
|
11646 |
+
"epoch": 0.9046647626818985,
|
11647 |
+
"grad_norm": 1.7044366598129272,
|
11648 |
+
"learning_rate": 2.480800602621325e-06,
|
11649 |
+
"loss": 0.1855,
|
11650 |
+
"step": 1663
|
11651 |
+
},
|
11652 |
+
{
|
11653 |
+
"epoch": 0.9052087583299333,
|
11654 |
+
"grad_norm": 1.3923779726028442,
|
11655 |
+
"learning_rate": 2.452763055069579e-06,
|
11656 |
+
"loss": 0.0876,
|
11657 |
+
"step": 1664
|
11658 |
+
},
|
11659 |
+
{
|
11660 |
+
"epoch": 0.9057527539779682,
|
11661 |
+
"grad_norm": 1.5099945068359375,
|
11662 |
+
"learning_rate": 2.4248808625650376e-06,
|
11663 |
+
"loss": 0.112,
|
11664 |
+
"step": 1665
|
11665 |
+
},
|
11666 |
+
{
|
11667 |
+
"epoch": 0.906296749626003,
|
11668 |
+
"grad_norm": 2.0889129638671875,
|
11669 |
+
"learning_rate": 2.3971541162095323e-06,
|
11670 |
+
"loss": 0.15,
|
11671 |
+
"step": 1666
|
11672 |
+
},
|
11673 |
+
{
|
11674 |
+
"epoch": 0.9068407452740378,
|
11675 |
+
"grad_norm": 1.6550538539886475,
|
11676 |
+
"learning_rate": 2.3695829065969623e-06,
|
11677 |
+
"loss": 0.1151,
|
11678 |
+
"step": 1667
|
11679 |
+
},
|
11680 |
+
{
|
11681 |
+
"epoch": 0.9073847409220727,
|
11682 |
+
"grad_norm": 1.2602660655975342,
|
11683 |
+
"learning_rate": 2.3421673238130215e-06,
|
11684 |
+
"loss": 0.0667,
|
11685 |
+
"step": 1668
|
11686 |
+
},
|
11687 |
+
{
|
11688 |
+
"epoch": 0.9079287365701074,
|
11689 |
+
"grad_norm": 2.6563076972961426,
|
11690 |
+
"learning_rate": 2.3149074574349395e-06,
|
11691 |
+
"loss": 0.2377,
|
11692 |
+
"step": 1669
|
11693 |
+
},
|
11694 |
+
{
|
11695 |
+
"epoch": 0.9084727322181423,
|
11696 |
+
"grad_norm": 1.3382247686386108,
|
11697 |
+
"learning_rate": 2.287803396531152e-06,
|
11698 |
+
"loss": 0.0886,
|
11699 |
+
"step": 1670
|
11700 |
+
},
|
11701 |
+
{
|
11702 |
+
"epoch": 0.909016727866177,
|
11703 |
+
"grad_norm": 1.7384536266326904,
|
11704 |
+
"learning_rate": 2.2608552296610075e-06,
|
11705 |
+
"loss": 0.0763,
|
11706 |
+
"step": 1671
|
11707 |
+
},
|
11708 |
+
{
|
11709 |
+
"epoch": 0.9095607235142119,
|
11710 |
+
"grad_norm": 1.6171133518218994,
|
11711 |
+
"learning_rate": 2.2340630448745015e-06,
|
11712 |
+
"loss": 0.1036,
|
11713 |
+
"step": 1672
|
11714 |
+
},
|
11715 |
+
{
|
11716 |
+
"epoch": 0.9101047191622467,
|
11717 |
+
"grad_norm": 1.374774694442749,
|
11718 |
+
"learning_rate": 2.2074269297119587e-06,
|
11719 |
+
"loss": 0.0765,
|
11720 |
+
"step": 1673
|
11721 |
}
|
11722 |
],
|
11723 |
"logging_steps": 1,
|
|
|
11737 |
"attributes": {}
|
11738 |
}
|
11739 |
},
|
11740 |
+
"total_flos": 2.6714266653551493e+18,
|
11741 |
"train_batch_size": 4,
|
11742 |
"trial_name": null,
|
11743 |
"trial_params": null
|