Training in progress, step 2600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201361312
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a4bcf338cd5bcfdfca322442e0437d0756fba523cd3c6e969d1ab63c60649d6
|
3 |
size 201361312
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102537812
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d2c3f69d1b1d65ff82b5361a177d936c38774c67f7e7c9e80fbae95019ee61a
|
3 |
size 102537812
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f17f98bb0e89af780ffffb5792ad56e5907144164a75d1f2d972da513c20656
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2bee69a6ac763ec64bcb9204c4b05b515ace2ec25288895042da824c3899c85a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 1.602339506149292,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2200",
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -16911,6 +16911,1414 @@
|
|
16911 |
"eval_samples_per_second": 8.736,
|
16912 |
"eval_steps_per_second": 4.373,
|
16913 |
"step": 2400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16914 |
}
|
16915 |
],
|
16916 |
"logging_steps": 1,
|
@@ -16925,7 +18333,7 @@
|
|
16925 |
"early_stopping_threshold": 0.0
|
16926 |
},
|
16927 |
"attributes": {
|
16928 |
-
"early_stopping_patience_counter":
|
16929 |
}
|
16930 |
},
|
16931 |
"TrainerControl": {
|
@@ -16939,7 +18347,7 @@
|
|
16939 |
"attributes": {}
|
16940 |
}
|
16941 |
},
|
16942 |
-
"total_flos": 2.
|
16943 |
"train_batch_size": 2,
|
16944 |
"trial_name": null,
|
16945 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": 1.602339506149292,
|
3 |
"best_model_checkpoint": "miner_id_24/checkpoint-2200",
|
4 |
+
"epoch": 0.12408872343725764,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 2600,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
16911 |
"eval_samples_per_second": 8.736,
|
16912 |
"eval_steps_per_second": 4.373,
|
16913 |
"step": 2400
|
16914 |
+
},
|
16915 |
+
{
|
16916 |
+
"epoch": 0.11459116345109831,
|
16917 |
+
"grad_norm": 6.055756092071533,
|
16918 |
+
"learning_rate": 0.00019929798684826006,
|
16919 |
+
"loss": 7.0865,
|
16920 |
+
"step": 2401
|
16921 |
+
},
|
16922 |
+
{
|
16923 |
+
"epoch": 0.11463888988318956,
|
16924 |
+
"grad_norm": 7.1241044998168945,
|
16925 |
+
"learning_rate": 0.00019929739525163437,
|
16926 |
+
"loss": 6.9193,
|
16927 |
+
"step": 2402
|
16928 |
+
},
|
16929 |
+
{
|
16930 |
+
"epoch": 0.1146866163152808,
|
16931 |
+
"grad_norm": 5.732908725738525,
|
16932 |
+
"learning_rate": 0.00019929680340671896,
|
16933 |
+
"loss": 6.8239,
|
16934 |
+
"step": 2403
|
16935 |
+
},
|
16936 |
+
{
|
16937 |
+
"epoch": 0.11473434274737207,
|
16938 |
+
"grad_norm": 6.3990278244018555,
|
16939 |
+
"learning_rate": 0.00019929621131351525,
|
16940 |
+
"loss": 7.5857,
|
16941 |
+
"step": 2404
|
16942 |
+
},
|
16943 |
+
{
|
16944 |
+
"epoch": 0.11478206917946332,
|
16945 |
+
"grad_norm": 6.51461124420166,
|
16946 |
+
"learning_rate": 0.00019929561897202478,
|
16947 |
+
"loss": 7.5126,
|
16948 |
+
"step": 2405
|
16949 |
+
},
|
16950 |
+
{
|
16951 |
+
"epoch": 0.11482979561155457,
|
16952 |
+
"grad_norm": 4.333196640014648,
|
16953 |
+
"learning_rate": 0.000199295026382249,
|
16954 |
+
"loss": 5.407,
|
16955 |
+
"step": 2406
|
16956 |
+
},
|
16957 |
+
{
|
16958 |
+
"epoch": 0.11487752204364582,
|
16959 |
+
"grad_norm": 8.812541007995605,
|
16960 |
+
"learning_rate": 0.0001992944335441894,
|
16961 |
+
"loss": 7.8813,
|
16962 |
+
"step": 2407
|
16963 |
+
},
|
16964 |
+
{
|
16965 |
+
"epoch": 0.11492524847573707,
|
16966 |
+
"grad_norm": 6.706300735473633,
|
16967 |
+
"learning_rate": 0.00019929384045784746,
|
16968 |
+
"loss": 7.3434,
|
16969 |
+
"step": 2408
|
16970 |
+
},
|
16971 |
+
{
|
16972 |
+
"epoch": 0.11497297490782833,
|
16973 |
+
"grad_norm": 5.9792375564575195,
|
16974 |
+
"learning_rate": 0.00019929324712322465,
|
16975 |
+
"loss": 6.588,
|
16976 |
+
"step": 2409
|
16977 |
+
},
|
16978 |
+
{
|
16979 |
+
"epoch": 0.11502070133991958,
|
16980 |
+
"grad_norm": 5.693054676055908,
|
16981 |
+
"learning_rate": 0.00019929265354032247,
|
16982 |
+
"loss": 6.9219,
|
16983 |
+
"step": 2410
|
16984 |
+
},
|
16985 |
+
{
|
16986 |
+
"epoch": 0.11506842777201083,
|
16987 |
+
"grad_norm": 6.320680141448975,
|
16988 |
+
"learning_rate": 0.00019929205970914242,
|
16989 |
+
"loss": 6.0497,
|
16990 |
+
"step": 2411
|
16991 |
+
},
|
16992 |
+
{
|
16993 |
+
"epoch": 0.11511615420410208,
|
16994 |
+
"grad_norm": 7.449224948883057,
|
16995 |
+
"learning_rate": 0.00019929146562968594,
|
16996 |
+
"loss": 6.0061,
|
16997 |
+
"step": 2412
|
16998 |
+
},
|
16999 |
+
{
|
17000 |
+
"epoch": 0.11516388063619334,
|
17001 |
+
"grad_norm": 5.945936679840088,
|
17002 |
+
"learning_rate": 0.00019929087130195457,
|
17003 |
+
"loss": 6.7282,
|
17004 |
+
"step": 2413
|
17005 |
+
},
|
17006 |
+
{
|
17007 |
+
"epoch": 0.1152116070682846,
|
17008 |
+
"grad_norm": 5.9501752853393555,
|
17009 |
+
"learning_rate": 0.00019929027672594977,
|
17010 |
+
"loss": 6.9589,
|
17011 |
+
"step": 2414
|
17012 |
+
},
|
17013 |
+
{
|
17014 |
+
"epoch": 0.11525933350037584,
|
17015 |
+
"grad_norm": 5.984325408935547,
|
17016 |
+
"learning_rate": 0.00019928968190167302,
|
17017 |
+
"loss": 6.0555,
|
17018 |
+
"step": 2415
|
17019 |
+
},
|
17020 |
+
{
|
17021 |
+
"epoch": 0.1153070599324671,
|
17022 |
+
"grad_norm": 5.344329833984375,
|
17023 |
+
"learning_rate": 0.00019928908682912582,
|
17024 |
+
"loss": 6.0593,
|
17025 |
+
"step": 2416
|
17026 |
+
},
|
17027 |
+
{
|
17028 |
+
"epoch": 0.11535478636455836,
|
17029 |
+
"grad_norm": 7.7345709800720215,
|
17030 |
+
"learning_rate": 0.0001992884915083096,
|
17031 |
+
"loss": 7.2564,
|
17032 |
+
"step": 2417
|
17033 |
+
},
|
17034 |
+
{
|
17035 |
+
"epoch": 0.1154025127966496,
|
17036 |
+
"grad_norm": 6.209690570831299,
|
17037 |
+
"learning_rate": 0.00019928789593922597,
|
17038 |
+
"loss": 6.8254,
|
17039 |
+
"step": 2418
|
17040 |
+
},
|
17041 |
+
{
|
17042 |
+
"epoch": 0.11545023922874086,
|
17043 |
+
"grad_norm": 7.814291477203369,
|
17044 |
+
"learning_rate": 0.00019928730012187628,
|
17045 |
+
"loss": 6.2167,
|
17046 |
+
"step": 2419
|
17047 |
+
},
|
17048 |
+
{
|
17049 |
+
"epoch": 0.1154979656608321,
|
17050 |
+
"grad_norm": 7.086947917938232,
|
17051 |
+
"learning_rate": 0.00019928670405626212,
|
17052 |
+
"loss": 6.982,
|
17053 |
+
"step": 2420
|
17054 |
+
},
|
17055 |
+
{
|
17056 |
+
"epoch": 0.11554569209292337,
|
17057 |
+
"grad_norm": 5.73114538192749,
|
17058 |
+
"learning_rate": 0.00019928610774238496,
|
17059 |
+
"loss": 6.7713,
|
17060 |
+
"step": 2421
|
17061 |
+
},
|
17062 |
+
{
|
17063 |
+
"epoch": 0.11559341852501462,
|
17064 |
+
"grad_norm": 6.70130729675293,
|
17065 |
+
"learning_rate": 0.00019928551118024627,
|
17066 |
+
"loss": 6.346,
|
17067 |
+
"step": 2422
|
17068 |
+
},
|
17069 |
+
{
|
17070 |
+
"epoch": 0.11564114495710587,
|
17071 |
+
"grad_norm": 6.0923662185668945,
|
17072 |
+
"learning_rate": 0.00019928491436984756,
|
17073 |
+
"loss": 7.3247,
|
17074 |
+
"step": 2423
|
17075 |
+
},
|
17076 |
+
{
|
17077 |
+
"epoch": 0.11568887138919712,
|
17078 |
+
"grad_norm": 5.1853861808776855,
|
17079 |
+
"learning_rate": 0.0001992843173111903,
|
17080 |
+
"loss": 5.7999,
|
17081 |
+
"step": 2424
|
17082 |
+
},
|
17083 |
+
{
|
17084 |
+
"epoch": 0.11573659782128838,
|
17085 |
+
"grad_norm": 6.79941463470459,
|
17086 |
+
"learning_rate": 0.000199283720004276,
|
17087 |
+
"loss": 5.7225,
|
17088 |
+
"step": 2425
|
17089 |
+
},
|
17090 |
+
{
|
17091 |
+
"epoch": 0.11578432425337963,
|
17092 |
+
"grad_norm": 6.806683540344238,
|
17093 |
+
"learning_rate": 0.00019928312244910616,
|
17094 |
+
"loss": 8.1717,
|
17095 |
+
"step": 2426
|
17096 |
+
},
|
17097 |
+
{
|
17098 |
+
"epoch": 0.11583205068547088,
|
17099 |
+
"grad_norm": 6.361385822296143,
|
17100 |
+
"learning_rate": 0.00019928252464568224,
|
17101 |
+
"loss": 7.508,
|
17102 |
+
"step": 2427
|
17103 |
+
},
|
17104 |
+
{
|
17105 |
+
"epoch": 0.11587977711756213,
|
17106 |
+
"grad_norm": 7.511900424957275,
|
17107 |
+
"learning_rate": 0.0001992819265940058,
|
17108 |
+
"loss": 7.6279,
|
17109 |
+
"step": 2428
|
17110 |
+
},
|
17111 |
+
{
|
17112 |
+
"epoch": 0.1159275035496534,
|
17113 |
+
"grad_norm": 6.408151149749756,
|
17114 |
+
"learning_rate": 0.00019928132829407825,
|
17115 |
+
"loss": 6.464,
|
17116 |
+
"step": 2429
|
17117 |
+
},
|
17118 |
+
{
|
17119 |
+
"epoch": 0.11597522998174464,
|
17120 |
+
"grad_norm": 6.161413192749023,
|
17121 |
+
"learning_rate": 0.00019928072974590116,
|
17122 |
+
"loss": 7.1698,
|
17123 |
+
"step": 2430
|
17124 |
+
},
|
17125 |
+
{
|
17126 |
+
"epoch": 0.11602295641383589,
|
17127 |
+
"grad_norm": 6.958288669586182,
|
17128 |
+
"learning_rate": 0.00019928013094947598,
|
17129 |
+
"loss": 8.821,
|
17130 |
+
"step": 2431
|
17131 |
+
},
|
17132 |
+
{
|
17133 |
+
"epoch": 0.11607068284592714,
|
17134 |
+
"grad_norm": 4.9188127517700195,
|
17135 |
+
"learning_rate": 0.00019927953190480423,
|
17136 |
+
"loss": 5.397,
|
17137 |
+
"step": 2432
|
17138 |
+
},
|
17139 |
+
{
|
17140 |
+
"epoch": 0.11611840927801839,
|
17141 |
+
"grad_norm": 5.682304382324219,
|
17142 |
+
"learning_rate": 0.0001992789326118874,
|
17143 |
+
"loss": 7.3442,
|
17144 |
+
"step": 2433
|
17145 |
+
},
|
17146 |
+
{
|
17147 |
+
"epoch": 0.11616613571010966,
|
17148 |
+
"grad_norm": 5.912992000579834,
|
17149 |
+
"learning_rate": 0.00019927833307072698,
|
17150 |
+
"loss": 6.825,
|
17151 |
+
"step": 2434
|
17152 |
+
},
|
17153 |
+
{
|
17154 |
+
"epoch": 0.1162138621422009,
|
17155 |
+
"grad_norm": 5.315921783447266,
|
17156 |
+
"learning_rate": 0.00019927773328132448,
|
17157 |
+
"loss": 4.3194,
|
17158 |
+
"step": 2435
|
17159 |
+
},
|
17160 |
+
{
|
17161 |
+
"epoch": 0.11626158857429215,
|
17162 |
+
"grad_norm": 6.76012659072876,
|
17163 |
+
"learning_rate": 0.00019927713324368145,
|
17164 |
+
"loss": 6.5639,
|
17165 |
+
"step": 2436
|
17166 |
+
},
|
17167 |
+
{
|
17168 |
+
"epoch": 0.1163093150063834,
|
17169 |
+
"grad_norm": 6.277029037475586,
|
17170 |
+
"learning_rate": 0.00019927653295779928,
|
17171 |
+
"loss": 6.0674,
|
17172 |
+
"step": 2437
|
17173 |
+
},
|
17174 |
+
{
|
17175 |
+
"epoch": 0.11635704143847467,
|
17176 |
+
"grad_norm": 5.538710117340088,
|
17177 |
+
"learning_rate": 0.00019927593242367956,
|
17178 |
+
"loss": 7.0892,
|
17179 |
+
"step": 2438
|
17180 |
+
},
|
17181 |
+
{
|
17182 |
+
"epoch": 0.11640476787056592,
|
17183 |
+
"grad_norm": 6.0107245445251465,
|
17184 |
+
"learning_rate": 0.00019927533164132376,
|
17185 |
+
"loss": 6.2243,
|
17186 |
+
"step": 2439
|
17187 |
+
},
|
17188 |
+
{
|
17189 |
+
"epoch": 0.11645249430265717,
|
17190 |
+
"grad_norm": 8.592703819274902,
|
17191 |
+
"learning_rate": 0.0001992747306107334,
|
17192 |
+
"loss": 5.2615,
|
17193 |
+
"step": 2440
|
17194 |
+
},
|
17195 |
+
{
|
17196 |
+
"epoch": 0.11650022073474842,
|
17197 |
+
"grad_norm": 5.739934921264648,
|
17198 |
+
"learning_rate": 0.00019927412933190992,
|
17199 |
+
"loss": 5.8984,
|
17200 |
+
"step": 2441
|
17201 |
+
},
|
17202 |
+
{
|
17203 |
+
"epoch": 0.11654794716683968,
|
17204 |
+
"grad_norm": 5.398654460906982,
|
17205 |
+
"learning_rate": 0.0001992735278048549,
|
17206 |
+
"loss": 5.6214,
|
17207 |
+
"step": 2442
|
17208 |
+
},
|
17209 |
+
{
|
17210 |
+
"epoch": 0.11659567359893093,
|
17211 |
+
"grad_norm": 6.197778224945068,
|
17212 |
+
"learning_rate": 0.00019927292602956982,
|
17213 |
+
"loss": 6.3904,
|
17214 |
+
"step": 2443
|
17215 |
+
},
|
17216 |
+
{
|
17217 |
+
"epoch": 0.11664340003102218,
|
17218 |
+
"grad_norm": 5.15541410446167,
|
17219 |
+
"learning_rate": 0.00019927232400605616,
|
17220 |
+
"loss": 5.1736,
|
17221 |
+
"step": 2444
|
17222 |
+
},
|
17223 |
+
{
|
17224 |
+
"epoch": 0.11669112646311343,
|
17225 |
+
"grad_norm": 7.815628528594971,
|
17226 |
+
"learning_rate": 0.00019927172173431546,
|
17227 |
+
"loss": 7.705,
|
17228 |
+
"step": 2445
|
17229 |
+
},
|
17230 |
+
{
|
17231 |
+
"epoch": 0.11673885289520469,
|
17232 |
+
"grad_norm": 6.1312642097473145,
|
17233 |
+
"learning_rate": 0.00019927111921434922,
|
17234 |
+
"loss": 6.9547,
|
17235 |
+
"step": 2446
|
17236 |
+
},
|
17237 |
+
{
|
17238 |
+
"epoch": 0.11678657932729594,
|
17239 |
+
"grad_norm": 5.695800304412842,
|
17240 |
+
"learning_rate": 0.00019927051644615893,
|
17241 |
+
"loss": 5.9635,
|
17242 |
+
"step": 2447
|
17243 |
+
},
|
17244 |
+
{
|
17245 |
+
"epoch": 0.11683430575938719,
|
17246 |
+
"grad_norm": 7.247018814086914,
|
17247 |
+
"learning_rate": 0.00019926991342974609,
|
17248 |
+
"loss": 6.8249,
|
17249 |
+
"step": 2448
|
17250 |
+
},
|
17251 |
+
{
|
17252 |
+
"epoch": 0.11688203219147844,
|
17253 |
+
"grad_norm": 6.522912979125977,
|
17254 |
+
"learning_rate": 0.00019926931016511223,
|
17255 |
+
"loss": 6.7136,
|
17256 |
+
"step": 2449
|
17257 |
+
},
|
17258 |
+
{
|
17259 |
+
"epoch": 0.1169297586235697,
|
17260 |
+
"grad_norm": 5.864326477050781,
|
17261 |
+
"learning_rate": 0.00019926870665225885,
|
17262 |
+
"loss": 6.6803,
|
17263 |
+
"step": 2450
|
17264 |
+
},
|
17265 |
+
{
|
17266 |
+
"epoch": 0.11697748505566095,
|
17267 |
+
"grad_norm": 6.620709419250488,
|
17268 |
+
"learning_rate": 0.0001992681028911875,
|
17269 |
+
"loss": 7.1,
|
17270 |
+
"step": 2451
|
17271 |
+
},
|
17272 |
+
{
|
17273 |
+
"epoch": 0.1170252114877522,
|
17274 |
+
"grad_norm": 6.445756435394287,
|
17275 |
+
"learning_rate": 0.0001992674988818996,
|
17276 |
+
"loss": 6.0606,
|
17277 |
+
"step": 2452
|
17278 |
+
},
|
17279 |
+
{
|
17280 |
+
"epoch": 0.11707293791984345,
|
17281 |
+
"grad_norm": 5.338548183441162,
|
17282 |
+
"learning_rate": 0.00019926689462439673,
|
17283 |
+
"loss": 4.951,
|
17284 |
+
"step": 2453
|
17285 |
+
},
|
17286 |
+
{
|
17287 |
+
"epoch": 0.11712066435193472,
|
17288 |
+
"grad_norm": 5.401038646697998,
|
17289 |
+
"learning_rate": 0.00019926629011868035,
|
17290 |
+
"loss": 5.7122,
|
17291 |
+
"step": 2454
|
17292 |
+
},
|
17293 |
+
{
|
17294 |
+
"epoch": 0.11716839078402597,
|
17295 |
+
"grad_norm": 7.513417720794678,
|
17296 |
+
"learning_rate": 0.00019926568536475205,
|
17297 |
+
"loss": 7.0187,
|
17298 |
+
"step": 2455
|
17299 |
+
},
|
17300 |
+
{
|
17301 |
+
"epoch": 0.11721611721611722,
|
17302 |
+
"grad_norm": 7.435956954956055,
|
17303 |
+
"learning_rate": 0.00019926508036261328,
|
17304 |
+
"loss": 6.212,
|
17305 |
+
"step": 2456
|
17306 |
+
},
|
17307 |
+
{
|
17308 |
+
"epoch": 0.11726384364820847,
|
17309 |
+
"grad_norm": 6.338344097137451,
|
17310 |
+
"learning_rate": 0.00019926447511226555,
|
17311 |
+
"loss": 7.8148,
|
17312 |
+
"step": 2457
|
17313 |
+
},
|
17314 |
+
{
|
17315 |
+
"epoch": 0.11731157008029972,
|
17316 |
+
"grad_norm": 6.532636642456055,
|
17317 |
+
"learning_rate": 0.00019926386961371042,
|
17318 |
+
"loss": 7.2181,
|
17319 |
+
"step": 2458
|
17320 |
+
},
|
17321 |
+
{
|
17322 |
+
"epoch": 0.11735929651239098,
|
17323 |
+
"grad_norm": 5.906259536743164,
|
17324 |
+
"learning_rate": 0.00019926326386694935,
|
17325 |
+
"loss": 7.3303,
|
17326 |
+
"step": 2459
|
17327 |
+
},
|
17328 |
+
{
|
17329 |
+
"epoch": 0.11740702294448223,
|
17330 |
+
"grad_norm": 5.578773498535156,
|
17331 |
+
"learning_rate": 0.00019926265787198388,
|
17332 |
+
"loss": 5.9464,
|
17333 |
+
"step": 2460
|
17334 |
+
},
|
17335 |
+
{
|
17336 |
+
"epoch": 0.11745474937657348,
|
17337 |
+
"grad_norm": 6.0162272453308105,
|
17338 |
+
"learning_rate": 0.00019926205162881555,
|
17339 |
+
"loss": 6.7526,
|
17340 |
+
"step": 2461
|
17341 |
+
},
|
17342 |
+
{
|
17343 |
+
"epoch": 0.11750247580866473,
|
17344 |
+
"grad_norm": 4.859315395355225,
|
17345 |
+
"learning_rate": 0.00019926144513744584,
|
17346 |
+
"loss": 5.4383,
|
17347 |
+
"step": 2462
|
17348 |
+
},
|
17349 |
+
{
|
17350 |
+
"epoch": 0.11755020224075599,
|
17351 |
+
"grad_norm": 5.332747936248779,
|
17352 |
+
"learning_rate": 0.00019926083839787628,
|
17353 |
+
"loss": 6.8147,
|
17354 |
+
"step": 2463
|
17355 |
+
},
|
17356 |
+
{
|
17357 |
+
"epoch": 0.11759792867284724,
|
17358 |
+
"grad_norm": 6.401904106140137,
|
17359 |
+
"learning_rate": 0.00019926023141010837,
|
17360 |
+
"loss": 5.5658,
|
17361 |
+
"step": 2464
|
17362 |
+
},
|
17363 |
+
{
|
17364 |
+
"epoch": 0.11764565510493849,
|
17365 |
+
"grad_norm": 8.178959846496582,
|
17366 |
+
"learning_rate": 0.00019925962417414366,
|
17367 |
+
"loss": 5.4159,
|
17368 |
+
"step": 2465
|
17369 |
+
},
|
17370 |
+
{
|
17371 |
+
"epoch": 0.11769338153702974,
|
17372 |
+
"grad_norm": 4.893832683563232,
|
17373 |
+
"learning_rate": 0.00019925901668998362,
|
17374 |
+
"loss": 6.2554,
|
17375 |
+
"step": 2466
|
17376 |
+
},
|
17377 |
+
{
|
17378 |
+
"epoch": 0.117741107969121,
|
17379 |
+
"grad_norm": 6.215792655944824,
|
17380 |
+
"learning_rate": 0.00019925840895762984,
|
17381 |
+
"loss": 5.6131,
|
17382 |
+
"step": 2467
|
17383 |
+
},
|
17384 |
+
{
|
17385 |
+
"epoch": 0.11778883440121225,
|
17386 |
+
"grad_norm": 103.55281829833984,
|
17387 |
+
"learning_rate": 0.00019925780097708378,
|
17388 |
+
"loss": 10.3719,
|
17389 |
+
"step": 2468
|
17390 |
+
},
|
17391 |
+
{
|
17392 |
+
"epoch": 0.1178365608333035,
|
17393 |
+
"grad_norm": 6.744437217712402,
|
17394 |
+
"learning_rate": 0.00019925719274834695,
|
17395 |
+
"loss": 4.8652,
|
17396 |
+
"step": 2469
|
17397 |
+
},
|
17398 |
+
{
|
17399 |
+
"epoch": 0.11788428726539475,
|
17400 |
+
"grad_norm": 6.799299240112305,
|
17401 |
+
"learning_rate": 0.00019925658427142096,
|
17402 |
+
"loss": 6.5246,
|
17403 |
+
"step": 2470
|
17404 |
+
},
|
17405 |
+
{
|
17406 |
+
"epoch": 0.11793201369748602,
|
17407 |
+
"grad_norm": 6.361636161804199,
|
17408 |
+
"learning_rate": 0.00019925597554630722,
|
17409 |
+
"loss": 7.6175,
|
17410 |
+
"step": 2471
|
17411 |
+
},
|
17412 |
+
{
|
17413 |
+
"epoch": 0.11797974012957727,
|
17414 |
+
"grad_norm": 4.605724334716797,
|
17415 |
+
"learning_rate": 0.00019925536657300734,
|
17416 |
+
"loss": 5.1498,
|
17417 |
+
"step": 2472
|
17418 |
+
},
|
17419 |
+
{
|
17420 |
+
"epoch": 0.11802746656166851,
|
17421 |
+
"grad_norm": 5.402807235717773,
|
17422 |
+
"learning_rate": 0.0001992547573515228,
|
17423 |
+
"loss": 6.0529,
|
17424 |
+
"step": 2473
|
17425 |
+
},
|
17426 |
+
{
|
17427 |
+
"epoch": 0.11807519299375976,
|
17428 |
+
"grad_norm": 8.144805908203125,
|
17429 |
+
"learning_rate": 0.00019925414788185512,
|
17430 |
+
"loss": 8.934,
|
17431 |
+
"step": 2474
|
17432 |
+
},
|
17433 |
+
{
|
17434 |
+
"epoch": 0.11812291942585103,
|
17435 |
+
"grad_norm": 10.172551155090332,
|
17436 |
+
"learning_rate": 0.00019925353816400583,
|
17437 |
+
"loss": 7.2408,
|
17438 |
+
"step": 2475
|
17439 |
+
},
|
17440 |
+
{
|
17441 |
+
"epoch": 0.11817064585794228,
|
17442 |
+
"grad_norm": 9.197928428649902,
|
17443 |
+
"learning_rate": 0.00019925292819797648,
|
17444 |
+
"loss": 6.8291,
|
17445 |
+
"step": 2476
|
17446 |
+
},
|
17447 |
+
{
|
17448 |
+
"epoch": 0.11821837229003353,
|
17449 |
+
"grad_norm": 606.15283203125,
|
17450 |
+
"learning_rate": 0.00019925231798376854,
|
17451 |
+
"loss": 8.0119,
|
17452 |
+
"step": 2477
|
17453 |
+
},
|
17454 |
+
{
|
17455 |
+
"epoch": 0.11826609872212478,
|
17456 |
+
"grad_norm": 16.890789031982422,
|
17457 |
+
"learning_rate": 0.0001992517075213836,
|
17458 |
+
"loss": 7.1259,
|
17459 |
+
"step": 2478
|
17460 |
+
},
|
17461 |
+
{
|
17462 |
+
"epoch": 0.11831382515421604,
|
17463 |
+
"grad_norm": 44.77732467651367,
|
17464 |
+
"learning_rate": 0.00019925109681082315,
|
17465 |
+
"loss": 7.1127,
|
17466 |
+
"step": 2479
|
17467 |
+
},
|
17468 |
+
{
|
17469 |
+
"epoch": 0.11836155158630729,
|
17470 |
+
"grad_norm": 278.022705078125,
|
17471 |
+
"learning_rate": 0.00019925048585208872,
|
17472 |
+
"loss": 6.3405,
|
17473 |
+
"step": 2480
|
17474 |
+
},
|
17475 |
+
{
|
17476 |
+
"epoch": 0.11840927801839854,
|
17477 |
+
"grad_norm": 9.286020278930664,
|
17478 |
+
"learning_rate": 0.0001992498746451818,
|
17479 |
+
"loss": 8.3945,
|
17480 |
+
"step": 2481
|
17481 |
+
},
|
17482 |
+
{
|
17483 |
+
"epoch": 0.11845700445048979,
|
17484 |
+
"grad_norm": 4.785390853881836,
|
17485 |
+
"learning_rate": 0.00019924926319010404,
|
17486 |
+
"loss": 5.2656,
|
17487 |
+
"step": 2482
|
17488 |
+
},
|
17489 |
+
{
|
17490 |
+
"epoch": 0.11850473088258104,
|
17491 |
+
"grad_norm": 5.759562969207764,
|
17492 |
+
"learning_rate": 0.00019924865148685683,
|
17493 |
+
"loss": 6.6575,
|
17494 |
+
"step": 2483
|
17495 |
+
},
|
17496 |
+
{
|
17497 |
+
"epoch": 0.1185524573146723,
|
17498 |
+
"grad_norm": 5.401790142059326,
|
17499 |
+
"learning_rate": 0.00019924803953544177,
|
17500 |
+
"loss": 6.0652,
|
17501 |
+
"step": 2484
|
17502 |
+
},
|
17503 |
+
{
|
17504 |
+
"epoch": 0.11860018374676355,
|
17505 |
+
"grad_norm": 6.856231212615967,
|
17506 |
+
"learning_rate": 0.00019924742733586038,
|
17507 |
+
"loss": 7.3388,
|
17508 |
+
"step": 2485
|
17509 |
+
},
|
17510 |
+
{
|
17511 |
+
"epoch": 0.1186479101788548,
|
17512 |
+
"grad_norm": 4.967160701751709,
|
17513 |
+
"learning_rate": 0.0001992468148881142,
|
17514 |
+
"loss": 6.0109,
|
17515 |
+
"step": 2486
|
17516 |
+
},
|
17517 |
+
{
|
17518 |
+
"epoch": 0.11869563661094605,
|
17519 |
+
"grad_norm": 7.75101900100708,
|
17520 |
+
"learning_rate": 0.00019924620219220472,
|
17521 |
+
"loss": 6.6262,
|
17522 |
+
"step": 2487
|
17523 |
+
},
|
17524 |
+
{
|
17525 |
+
"epoch": 0.11874336304303731,
|
17526 |
+
"grad_norm": 8.207610130310059,
|
17527 |
+
"learning_rate": 0.00019924558924813352,
|
17528 |
+
"loss": 7.5386,
|
17529 |
+
"step": 2488
|
17530 |
+
},
|
17531 |
+
{
|
17532 |
+
"epoch": 0.11879108947512856,
|
17533 |
+
"grad_norm": 6.139005661010742,
|
17534 |
+
"learning_rate": 0.00019924497605590212,
|
17535 |
+
"loss": 6.0931,
|
17536 |
+
"step": 2489
|
17537 |
+
},
|
17538 |
+
{
|
17539 |
+
"epoch": 0.11883881590721981,
|
17540 |
+
"grad_norm": 5.760986328125,
|
17541 |
+
"learning_rate": 0.00019924436261551206,
|
17542 |
+
"loss": 5.4283,
|
17543 |
+
"step": 2490
|
17544 |
+
},
|
17545 |
+
{
|
17546 |
+
"epoch": 0.11888654233931106,
|
17547 |
+
"grad_norm": 6.050114631652832,
|
17548 |
+
"learning_rate": 0.00019924374892696484,
|
17549 |
+
"loss": 6.4558,
|
17550 |
+
"step": 2491
|
17551 |
+
},
|
17552 |
+
{
|
17553 |
+
"epoch": 0.11893426877140233,
|
17554 |
+
"grad_norm": 6.406091690063477,
|
17555 |
+
"learning_rate": 0.00019924313499026201,
|
17556 |
+
"loss": 6.0646,
|
17557 |
+
"step": 2492
|
17558 |
+
},
|
17559 |
+
{
|
17560 |
+
"epoch": 0.11898199520349358,
|
17561 |
+
"grad_norm": 5.947465419769287,
|
17562 |
+
"learning_rate": 0.00019924252080540512,
|
17563 |
+
"loss": 7.7813,
|
17564 |
+
"step": 2493
|
17565 |
+
},
|
17566 |
+
{
|
17567 |
+
"epoch": 0.11902972163558483,
|
17568 |
+
"grad_norm": 6.351478576660156,
|
17569 |
+
"learning_rate": 0.00019924190637239571,
|
17570 |
+
"loss": 6.2155,
|
17571 |
+
"step": 2494
|
17572 |
+
},
|
17573 |
+
{
|
17574 |
+
"epoch": 0.11907744806767608,
|
17575 |
+
"grad_norm": 5.51107931137085,
|
17576 |
+
"learning_rate": 0.0001992412916912353,
|
17577 |
+
"loss": 5.8992,
|
17578 |
+
"step": 2495
|
17579 |
+
},
|
17580 |
+
{
|
17581 |
+
"epoch": 0.11912517449976734,
|
17582 |
+
"grad_norm": 5.350305080413818,
|
17583 |
+
"learning_rate": 0.00019924067676192544,
|
17584 |
+
"loss": 5.2331,
|
17585 |
+
"step": 2496
|
17586 |
+
},
|
17587 |
+
{
|
17588 |
+
"epoch": 0.11917290093185859,
|
17589 |
+
"grad_norm": 5.645650386810303,
|
17590 |
+
"learning_rate": 0.0001992400615844676,
|
17591 |
+
"loss": 5.9623,
|
17592 |
+
"step": 2497
|
17593 |
+
},
|
17594 |
+
{
|
17595 |
+
"epoch": 0.11922062736394984,
|
17596 |
+
"grad_norm": 5.326278209686279,
|
17597 |
+
"learning_rate": 0.00019923944615886344,
|
17598 |
+
"loss": 5.8638,
|
17599 |
+
"step": 2498
|
17600 |
+
},
|
17601 |
+
{
|
17602 |
+
"epoch": 0.11926835379604109,
|
17603 |
+
"grad_norm": 5.611948490142822,
|
17604 |
+
"learning_rate": 0.00019923883048511443,
|
17605 |
+
"loss": 6.6996,
|
17606 |
+
"step": 2499
|
17607 |
+
},
|
17608 |
+
{
|
17609 |
+
"epoch": 0.11931608022813235,
|
17610 |
+
"grad_norm": 5.947208404541016,
|
17611 |
+
"learning_rate": 0.00019923821456322208,
|
17612 |
+
"loss": 5.2053,
|
17613 |
+
"step": 2500
|
17614 |
+
},
|
17615 |
+
{
|
17616 |
+
"epoch": 0.1193638066602236,
|
17617 |
+
"grad_norm": 6.137307167053223,
|
17618 |
+
"learning_rate": 0.000199237598393188,
|
17619 |
+
"loss": 6.1276,
|
17620 |
+
"step": 2501
|
17621 |
+
},
|
17622 |
+
{
|
17623 |
+
"epoch": 0.11941153309231485,
|
17624 |
+
"grad_norm": 7.9021525382995605,
|
17625 |
+
"learning_rate": 0.0001992369819750137,
|
17626 |
+
"loss": 8.74,
|
17627 |
+
"step": 2502
|
17628 |
+
},
|
17629 |
+
{
|
17630 |
+
"epoch": 0.1194592595244061,
|
17631 |
+
"grad_norm": 8.133113861083984,
|
17632 |
+
"learning_rate": 0.00019923636530870068,
|
17633 |
+
"loss": 8.2945,
|
17634 |
+
"step": 2503
|
17635 |
+
},
|
17636 |
+
{
|
17637 |
+
"epoch": 0.11950698595649736,
|
17638 |
+
"grad_norm": 4.672719478607178,
|
17639 |
+
"learning_rate": 0.00019923574839425054,
|
17640 |
+
"loss": 5.661,
|
17641 |
+
"step": 2504
|
17642 |
+
},
|
17643 |
+
{
|
17644 |
+
"epoch": 0.11955471238858861,
|
17645 |
+
"grad_norm": 5.1231513023376465,
|
17646 |
+
"learning_rate": 0.0001992351312316648,
|
17647 |
+
"loss": 5.0149,
|
17648 |
+
"step": 2505
|
17649 |
+
},
|
17650 |
+
{
|
17651 |
+
"epoch": 0.11960243882067986,
|
17652 |
+
"grad_norm": 6.5395121574401855,
|
17653 |
+
"learning_rate": 0.00019923451382094498,
|
17654 |
+
"loss": 6.6384,
|
17655 |
+
"step": 2506
|
17656 |
+
},
|
17657 |
+
{
|
17658 |
+
"epoch": 0.11965016525277111,
|
17659 |
+
"grad_norm": 6.963812828063965,
|
17660 |
+
"learning_rate": 0.00019923389616209266,
|
17661 |
+
"loss": 7.6418,
|
17662 |
+
"step": 2507
|
17663 |
+
},
|
17664 |
+
{
|
17665 |
+
"epoch": 0.11969789168486236,
|
17666 |
+
"grad_norm": 5.314905643463135,
|
17667 |
+
"learning_rate": 0.0001992332782551094,
|
17668 |
+
"loss": 5.357,
|
17669 |
+
"step": 2508
|
17670 |
+
},
|
17671 |
+
{
|
17672 |
+
"epoch": 0.11974561811695363,
|
17673 |
+
"grad_norm": 6.953101634979248,
|
17674 |
+
"learning_rate": 0.0001992326600999967,
|
17675 |
+
"loss": 7.4247,
|
17676 |
+
"step": 2509
|
17677 |
+
},
|
17678 |
+
{
|
17679 |
+
"epoch": 0.11979334454904488,
|
17680 |
+
"grad_norm": 5.65818977355957,
|
17681 |
+
"learning_rate": 0.0001992320416967561,
|
17682 |
+
"loss": 6.9697,
|
17683 |
+
"step": 2510
|
17684 |
+
},
|
17685 |
+
{
|
17686 |
+
"epoch": 0.11984107098113612,
|
17687 |
+
"grad_norm": 5.882226467132568,
|
17688 |
+
"learning_rate": 0.0001992314230453892,
|
17689 |
+
"loss": 6.6894,
|
17690 |
+
"step": 2511
|
17691 |
+
},
|
17692 |
+
{
|
17693 |
+
"epoch": 0.11988879741322737,
|
17694 |
+
"grad_norm": 7.456608295440674,
|
17695 |
+
"learning_rate": 0.00019923080414589752,
|
17696 |
+
"loss": 6.5442,
|
17697 |
+
"step": 2512
|
17698 |
+
},
|
17699 |
+
{
|
17700 |
+
"epoch": 0.11993652384531864,
|
17701 |
+
"grad_norm": 5.068438529968262,
|
17702 |
+
"learning_rate": 0.0001992301849982826,
|
17703 |
+
"loss": 5.6924,
|
17704 |
+
"step": 2513
|
17705 |
+
},
|
17706 |
+
{
|
17707 |
+
"epoch": 0.11998425027740989,
|
17708 |
+
"grad_norm": 5.8276519775390625,
|
17709 |
+
"learning_rate": 0.00019922956560254599,
|
17710 |
+
"loss": 4.5949,
|
17711 |
+
"step": 2514
|
17712 |
+
},
|
17713 |
+
{
|
17714 |
+
"epoch": 0.12003197670950114,
|
17715 |
+
"grad_norm": 7.3889617919921875,
|
17716 |
+
"learning_rate": 0.00019922894595868923,
|
17717 |
+
"loss": 7.027,
|
17718 |
+
"step": 2515
|
17719 |
+
},
|
17720 |
+
{
|
17721 |
+
"epoch": 0.12007970314159239,
|
17722 |
+
"grad_norm": 5.222685813903809,
|
17723 |
+
"learning_rate": 0.00019922832606671386,
|
17724 |
+
"loss": 5.7551,
|
17725 |
+
"step": 2516
|
17726 |
+
},
|
17727 |
+
{
|
17728 |
+
"epoch": 0.12012742957368365,
|
17729 |
+
"grad_norm": 5.6962127685546875,
|
17730 |
+
"learning_rate": 0.0001992277059266215,
|
17731 |
+
"loss": 5.0557,
|
17732 |
+
"step": 2517
|
17733 |
+
},
|
17734 |
+
{
|
17735 |
+
"epoch": 0.1201751560057749,
|
17736 |
+
"grad_norm": 5.449974060058594,
|
17737 |
+
"learning_rate": 0.00019922708553841363,
|
17738 |
+
"loss": 6.3609,
|
17739 |
+
"step": 2518
|
17740 |
+
},
|
17741 |
+
{
|
17742 |
+
"epoch": 0.12022288243786615,
|
17743 |
+
"grad_norm": 5.272806167602539,
|
17744 |
+
"learning_rate": 0.00019922646490209183,
|
17745 |
+
"loss": 5.3785,
|
17746 |
+
"step": 2519
|
17747 |
+
},
|
17748 |
+
{
|
17749 |
+
"epoch": 0.1202706088699574,
|
17750 |
+
"grad_norm": 6.4713921546936035,
|
17751 |
+
"learning_rate": 0.00019922584401765763,
|
17752 |
+
"loss": 7.2317,
|
17753 |
+
"step": 2520
|
17754 |
+
},
|
17755 |
+
{
|
17756 |
+
"epoch": 0.12031833530204866,
|
17757 |
+
"grad_norm": 5.631217002868652,
|
17758 |
+
"learning_rate": 0.0001992252228851126,
|
17759 |
+
"loss": 5.7852,
|
17760 |
+
"step": 2521
|
17761 |
+
},
|
17762 |
+
{
|
17763 |
+
"epoch": 0.12036606173413991,
|
17764 |
+
"grad_norm": 6.146111488342285,
|
17765 |
+
"learning_rate": 0.0001992246015044583,
|
17766 |
+
"loss": 5.7136,
|
17767 |
+
"step": 2522
|
17768 |
+
},
|
17769 |
+
{
|
17770 |
+
"epoch": 0.12041378816623116,
|
17771 |
+
"grad_norm": 7.096044063568115,
|
17772 |
+
"learning_rate": 0.00019922397987569626,
|
17773 |
+
"loss": 7.2231,
|
17774 |
+
"step": 2523
|
17775 |
+
},
|
17776 |
+
{
|
17777 |
+
"epoch": 0.12046151459832241,
|
17778 |
+
"grad_norm": 7.921234607696533,
|
17779 |
+
"learning_rate": 0.00019922335799882806,
|
17780 |
+
"loss": 5.5,
|
17781 |
+
"step": 2524
|
17782 |
+
},
|
17783 |
+
{
|
17784 |
+
"epoch": 0.12050924103041367,
|
17785 |
+
"grad_norm": 5.085870265960693,
|
17786 |
+
"learning_rate": 0.00019922273587385524,
|
17787 |
+
"loss": 4.867,
|
17788 |
+
"step": 2525
|
17789 |
+
},
|
17790 |
+
{
|
17791 |
+
"epoch": 0.12055696746250492,
|
17792 |
+
"grad_norm": 6.939750671386719,
|
17793 |
+
"learning_rate": 0.00019922211350077934,
|
17794 |
+
"loss": 6.7561,
|
17795 |
+
"step": 2526
|
17796 |
+
},
|
17797 |
+
{
|
17798 |
+
"epoch": 0.12060469389459617,
|
17799 |
+
"grad_norm": 7.504082679748535,
|
17800 |
+
"learning_rate": 0.00019922149087960196,
|
17801 |
+
"loss": 7.1749,
|
17802 |
+
"step": 2527
|
17803 |
+
},
|
17804 |
+
{
|
17805 |
+
"epoch": 0.12065242032668742,
|
17806 |
+
"grad_norm": 6.798505783081055,
|
17807 |
+
"learning_rate": 0.00019922086801032462,
|
17808 |
+
"loss": 6.8441,
|
17809 |
+
"step": 2528
|
17810 |
+
},
|
17811 |
+
{
|
17812 |
+
"epoch": 0.12070014675877869,
|
17813 |
+
"grad_norm": 6.547543048858643,
|
17814 |
+
"learning_rate": 0.00019922024489294892,
|
17815 |
+
"loss": 4.8769,
|
17816 |
+
"step": 2529
|
17817 |
+
},
|
17818 |
+
{
|
17819 |
+
"epoch": 0.12074787319086994,
|
17820 |
+
"grad_norm": 8.873817443847656,
|
17821 |
+
"learning_rate": 0.00019921962152747634,
|
17822 |
+
"loss": 9.6741,
|
17823 |
+
"step": 2530
|
17824 |
+
},
|
17825 |
+
{
|
17826 |
+
"epoch": 0.12079559962296119,
|
17827 |
+
"grad_norm": 6.475502014160156,
|
17828 |
+
"learning_rate": 0.0001992189979139085,
|
17829 |
+
"loss": 6.5672,
|
17830 |
+
"step": 2531
|
17831 |
+
},
|
17832 |
+
{
|
17833 |
+
"epoch": 0.12084332605505244,
|
17834 |
+
"grad_norm": 4.202572345733643,
|
17835 |
+
"learning_rate": 0.00019921837405224694,
|
17836 |
+
"loss": 3.9452,
|
17837 |
+
"step": 2532
|
17838 |
+
},
|
17839 |
+
{
|
17840 |
+
"epoch": 0.1208910524871437,
|
17841 |
+
"grad_norm": 6.775956153869629,
|
17842 |
+
"learning_rate": 0.00019921774994249324,
|
17843 |
+
"loss": 4.9947,
|
17844 |
+
"step": 2533
|
17845 |
+
},
|
17846 |
+
{
|
17847 |
+
"epoch": 0.12093877891923495,
|
17848 |
+
"grad_norm": 7.0207695960998535,
|
17849 |
+
"learning_rate": 0.00019921712558464895,
|
17850 |
+
"loss": 7.7427,
|
17851 |
+
"step": 2534
|
17852 |
+
},
|
17853 |
+
{
|
17854 |
+
"epoch": 0.1209865053513262,
|
17855 |
+
"grad_norm": 6.175810813903809,
|
17856 |
+
"learning_rate": 0.00019921650097871562,
|
17857 |
+
"loss": 6.728,
|
17858 |
+
"step": 2535
|
17859 |
+
},
|
17860 |
+
{
|
17861 |
+
"epoch": 0.12103423178341745,
|
17862 |
+
"grad_norm": 6.027743816375732,
|
17863 |
+
"learning_rate": 0.0001992158761246948,
|
17864 |
+
"loss": 6.0176,
|
17865 |
+
"step": 2536
|
17866 |
+
},
|
17867 |
+
{
|
17868 |
+
"epoch": 0.1210819582155087,
|
17869 |
+
"grad_norm": 7.800528049468994,
|
17870 |
+
"learning_rate": 0.00019921525102258811,
|
17871 |
+
"loss": 7.3638,
|
17872 |
+
"step": 2537
|
17873 |
+
},
|
17874 |
+
{
|
17875 |
+
"epoch": 0.12112968464759996,
|
17876 |
+
"grad_norm": 6.8329267501831055,
|
17877 |
+
"learning_rate": 0.00019921462567239705,
|
17878 |
+
"loss": 7.3964,
|
17879 |
+
"step": 2538
|
17880 |
+
},
|
17881 |
+
{
|
17882 |
+
"epoch": 0.12117741107969121,
|
17883 |
+
"grad_norm": 6.654507637023926,
|
17884 |
+
"learning_rate": 0.0001992140000741232,
|
17885 |
+
"loss": 7.6877,
|
17886 |
+
"step": 2539
|
17887 |
+
},
|
17888 |
+
{
|
17889 |
+
"epoch": 0.12122513751178246,
|
17890 |
+
"grad_norm": 6.265644073486328,
|
17891 |
+
"learning_rate": 0.00019921337422776816,
|
17892 |
+
"loss": 6.2082,
|
17893 |
+
"step": 2540
|
17894 |
+
},
|
17895 |
+
{
|
17896 |
+
"epoch": 0.12127286394387371,
|
17897 |
+
"grad_norm": 5.6409196853637695,
|
17898 |
+
"learning_rate": 0.00019921274813333346,
|
17899 |
+
"loss": 6.288,
|
17900 |
+
"step": 2541
|
17901 |
+
},
|
17902 |
+
{
|
17903 |
+
"epoch": 0.12132059037596497,
|
17904 |
+
"grad_norm": 6.226003170013428,
|
17905 |
+
"learning_rate": 0.00019921212179082064,
|
17906 |
+
"loss": 6.6538,
|
17907 |
+
"step": 2542
|
17908 |
+
},
|
17909 |
+
{
|
17910 |
+
"epoch": 0.12136831680805622,
|
17911 |
+
"grad_norm": 4.8436384201049805,
|
17912 |
+
"learning_rate": 0.00019921149520023135,
|
17913 |
+
"loss": 4.5971,
|
17914 |
+
"step": 2543
|
17915 |
+
},
|
17916 |
+
{
|
17917 |
+
"epoch": 0.12141604324014747,
|
17918 |
+
"grad_norm": 6.016689777374268,
|
17919 |
+
"learning_rate": 0.00019921086836156707,
|
17920 |
+
"loss": 6.8544,
|
17921 |
+
"step": 2544
|
17922 |
+
},
|
17923 |
+
{
|
17924 |
+
"epoch": 0.12146376967223872,
|
17925 |
+
"grad_norm": 4.865900039672852,
|
17926 |
+
"learning_rate": 0.0001992102412748294,
|
17927 |
+
"loss": 4.7976,
|
17928 |
+
"step": 2545
|
17929 |
+
},
|
17930 |
+
{
|
17931 |
+
"epoch": 0.12151149610432999,
|
17932 |
+
"grad_norm": 4.419651508331299,
|
17933 |
+
"learning_rate": 0.0001992096139400199,
|
17934 |
+
"loss": 5.0167,
|
17935 |
+
"step": 2546
|
17936 |
+
},
|
17937 |
+
{
|
17938 |
+
"epoch": 0.12155922253642124,
|
17939 |
+
"grad_norm": 6.08146858215332,
|
17940 |
+
"learning_rate": 0.0001992089863571402,
|
17941 |
+
"loss": 6.2101,
|
17942 |
+
"step": 2547
|
17943 |
+
},
|
17944 |
+
{
|
17945 |
+
"epoch": 0.12160694896851248,
|
17946 |
+
"grad_norm": 6.761473178863525,
|
17947 |
+
"learning_rate": 0.00019920835852619176,
|
17948 |
+
"loss": 6.9868,
|
17949 |
+
"step": 2548
|
17950 |
+
},
|
17951 |
+
{
|
17952 |
+
"epoch": 0.12165467540060373,
|
17953 |
+
"grad_norm": 4.87887716293335,
|
17954 |
+
"learning_rate": 0.00019920773044717626,
|
17955 |
+
"loss": 5.0713,
|
17956 |
+
"step": 2549
|
17957 |
+
},
|
17958 |
+
{
|
17959 |
+
"epoch": 0.121702401832695,
|
17960 |
+
"grad_norm": 5.633859157562256,
|
17961 |
+
"learning_rate": 0.0001992071021200952,
|
17962 |
+
"loss": 6.1883,
|
17963 |
+
"step": 2550
|
17964 |
+
},
|
17965 |
+
{
|
17966 |
+
"epoch": 0.12175012826478625,
|
17967 |
+
"grad_norm": 6.28723669052124,
|
17968 |
+
"learning_rate": 0.0001992064735449502,
|
17969 |
+
"loss": 5.2651,
|
17970 |
+
"step": 2551
|
17971 |
+
},
|
17972 |
+
{
|
17973 |
+
"epoch": 0.1217978546968775,
|
17974 |
+
"grad_norm": 5.057219982147217,
|
17975 |
+
"learning_rate": 0.00019920584472174274,
|
17976 |
+
"loss": 4.7898,
|
17977 |
+
"step": 2552
|
17978 |
+
},
|
17979 |
+
{
|
17980 |
+
"epoch": 0.12184558112896875,
|
17981 |
+
"grad_norm": 7.6184821128845215,
|
17982 |
+
"learning_rate": 0.0001992052156504745,
|
17983 |
+
"loss": 7.9772,
|
17984 |
+
"step": 2553
|
17985 |
+
},
|
17986 |
+
{
|
17987 |
+
"epoch": 0.12189330756106001,
|
17988 |
+
"grad_norm": 7.5784173011779785,
|
17989 |
+
"learning_rate": 0.000199204586331147,
|
17990 |
+
"loss": 7.3766,
|
17991 |
+
"step": 2554
|
17992 |
+
},
|
17993 |
+
{
|
17994 |
+
"epoch": 0.12194103399315126,
|
17995 |
+
"grad_norm": 6.459564208984375,
|
17996 |
+
"learning_rate": 0.00019920395676376181,
|
17997 |
+
"loss": 5.3843,
|
17998 |
+
"step": 2555
|
17999 |
+
},
|
18000 |
+
{
|
18001 |
+
"epoch": 0.12198876042524251,
|
18002 |
+
"grad_norm": 8.033954620361328,
|
18003 |
+
"learning_rate": 0.00019920332694832048,
|
18004 |
+
"loss": 8.4444,
|
18005 |
+
"step": 2556
|
18006 |
+
},
|
18007 |
+
{
|
18008 |
+
"epoch": 0.12203648685733376,
|
18009 |
+
"grad_norm": 7.824962615966797,
|
18010 |
+
"learning_rate": 0.00019920269688482466,
|
18011 |
+
"loss": 7.2764,
|
18012 |
+
"step": 2557
|
18013 |
+
},
|
18014 |
+
{
|
18015 |
+
"epoch": 0.12208421328942502,
|
18016 |
+
"grad_norm": 7.024128437042236,
|
18017 |
+
"learning_rate": 0.00019920206657327588,
|
18018 |
+
"loss": 6.7798,
|
18019 |
+
"step": 2558
|
18020 |
+
},
|
18021 |
+
{
|
18022 |
+
"epoch": 0.12213193972151627,
|
18023 |
+
"grad_norm": 7.976163864135742,
|
18024 |
+
"learning_rate": 0.00019920143601367575,
|
18025 |
+
"loss": 6.9575,
|
18026 |
+
"step": 2559
|
18027 |
+
},
|
18028 |
+
{
|
18029 |
+
"epoch": 0.12217966615360752,
|
18030 |
+
"grad_norm": 6.063900470733643,
|
18031 |
+
"learning_rate": 0.0001992008052060258,
|
18032 |
+
"loss": 5.2735,
|
18033 |
+
"step": 2560
|
18034 |
+
},
|
18035 |
+
{
|
18036 |
+
"epoch": 0.12222739258569877,
|
18037 |
+
"grad_norm": 10.203490257263184,
|
18038 |
+
"learning_rate": 0.0001992001741503276,
|
18039 |
+
"loss": 6.9637,
|
18040 |
+
"step": 2561
|
18041 |
+
},
|
18042 |
+
{
|
18043 |
+
"epoch": 0.12227511901779002,
|
18044 |
+
"grad_norm": 6.814461708068848,
|
18045 |
+
"learning_rate": 0.0001991995428465828,
|
18046 |
+
"loss": 7.4683,
|
18047 |
+
"step": 2562
|
18048 |
+
},
|
18049 |
+
{
|
18050 |
+
"epoch": 0.12232284544988128,
|
18051 |
+
"grad_norm": 5.394496917724609,
|
18052 |
+
"learning_rate": 0.00019919891129479292,
|
18053 |
+
"loss": 5.9349,
|
18054 |
+
"step": 2563
|
18055 |
+
},
|
18056 |
+
{
|
18057 |
+
"epoch": 0.12237057188197253,
|
18058 |
+
"grad_norm": 7.58441162109375,
|
18059 |
+
"learning_rate": 0.00019919827949495952,
|
18060 |
+
"loss": 6.5405,
|
18061 |
+
"step": 2564
|
18062 |
+
},
|
18063 |
+
{
|
18064 |
+
"epoch": 0.12241829831406378,
|
18065 |
+
"grad_norm": 5.798213958740234,
|
18066 |
+
"learning_rate": 0.00019919764744708422,
|
18067 |
+
"loss": 6.6679,
|
18068 |
+
"step": 2565
|
18069 |
+
},
|
18070 |
+
{
|
18071 |
+
"epoch": 0.12246602474615503,
|
18072 |
+
"grad_norm": 5.101596355438232,
|
18073 |
+
"learning_rate": 0.0001991970151511686,
|
18074 |
+
"loss": 5.3157,
|
18075 |
+
"step": 2566
|
18076 |
+
},
|
18077 |
+
{
|
18078 |
+
"epoch": 0.1225137511782463,
|
18079 |
+
"grad_norm": 4.84085750579834,
|
18080 |
+
"learning_rate": 0.00019919638260721423,
|
18081 |
+
"loss": 5.0614,
|
18082 |
+
"step": 2567
|
18083 |
+
},
|
18084 |
+
{
|
18085 |
+
"epoch": 0.12256147761033755,
|
18086 |
+
"grad_norm": 6.472250461578369,
|
18087 |
+
"learning_rate": 0.00019919574981522268,
|
18088 |
+
"loss": 6.5636,
|
18089 |
+
"step": 2568
|
18090 |
+
},
|
18091 |
+
{
|
18092 |
+
"epoch": 0.1226092040424288,
|
18093 |
+
"grad_norm": 5.555691719055176,
|
18094 |
+
"learning_rate": 0.00019919511677519557,
|
18095 |
+
"loss": 5.9893,
|
18096 |
+
"step": 2569
|
18097 |
+
},
|
18098 |
+
{
|
18099 |
+
"epoch": 0.12265693047452005,
|
18100 |
+
"grad_norm": 6.802445411682129,
|
18101 |
+
"learning_rate": 0.00019919448348713445,
|
18102 |
+
"loss": 7.2901,
|
18103 |
+
"step": 2570
|
18104 |
+
},
|
18105 |
+
{
|
18106 |
+
"epoch": 0.12270465690661131,
|
18107 |
+
"grad_norm": 5.74739408493042,
|
18108 |
+
"learning_rate": 0.00019919384995104093,
|
18109 |
+
"loss": 5.9203,
|
18110 |
+
"step": 2571
|
18111 |
+
},
|
18112 |
+
{
|
18113 |
+
"epoch": 0.12275238333870256,
|
18114 |
+
"grad_norm": 6.161860942840576,
|
18115 |
+
"learning_rate": 0.00019919321616691655,
|
18116 |
+
"loss": 7.1177,
|
18117 |
+
"step": 2572
|
18118 |
+
},
|
18119 |
+
{
|
18120 |
+
"epoch": 0.12280010977079381,
|
18121 |
+
"grad_norm": 5.15494441986084,
|
18122 |
+
"learning_rate": 0.00019919258213476292,
|
18123 |
+
"loss": 5.502,
|
18124 |
+
"step": 2573
|
18125 |
+
},
|
18126 |
+
{
|
18127 |
+
"epoch": 0.12284783620288506,
|
18128 |
+
"grad_norm": 6.421548366546631,
|
18129 |
+
"learning_rate": 0.00019919194785458167,
|
18130 |
+
"loss": 5.7436,
|
18131 |
+
"step": 2574
|
18132 |
+
},
|
18133 |
+
{
|
18134 |
+
"epoch": 0.12289556263497632,
|
18135 |
+
"grad_norm": 6.586711883544922,
|
18136 |
+
"learning_rate": 0.0001991913133263743,
|
18137 |
+
"loss": 6.9608,
|
18138 |
+
"step": 2575
|
18139 |
+
},
|
18140 |
+
{
|
18141 |
+
"epoch": 0.12294328906706757,
|
18142 |
+
"grad_norm": 5.92076301574707,
|
18143 |
+
"learning_rate": 0.00019919067855014247,
|
18144 |
+
"loss": 6.092,
|
18145 |
+
"step": 2576
|
18146 |
+
},
|
18147 |
+
{
|
18148 |
+
"epoch": 0.12299101549915882,
|
18149 |
+
"grad_norm": 6.7879862785339355,
|
18150 |
+
"learning_rate": 0.00019919004352588767,
|
18151 |
+
"loss": 6.7476,
|
18152 |
+
"step": 2577
|
18153 |
+
},
|
18154 |
+
{
|
18155 |
+
"epoch": 0.12303874193125007,
|
18156 |
+
"grad_norm": 8.76463508605957,
|
18157 |
+
"learning_rate": 0.00019918940825361164,
|
18158 |
+
"loss": 8.9568,
|
18159 |
+
"step": 2578
|
18160 |
+
},
|
18161 |
+
{
|
18162 |
+
"epoch": 0.12308646836334133,
|
18163 |
+
"grad_norm": 4.8239665031433105,
|
18164 |
+
"learning_rate": 0.00019918877273331583,
|
18165 |
+
"loss": 5.2598,
|
18166 |
+
"step": 2579
|
18167 |
+
},
|
18168 |
+
{
|
18169 |
+
"epoch": 0.12313419479543258,
|
18170 |
+
"grad_norm": 6.723465919494629,
|
18171 |
+
"learning_rate": 0.00019918813696500189,
|
18172 |
+
"loss": 6.6971,
|
18173 |
+
"step": 2580
|
18174 |
+
},
|
18175 |
+
{
|
18176 |
+
"epoch": 0.12318192122752383,
|
18177 |
+
"grad_norm": 6.38710355758667,
|
18178 |
+
"learning_rate": 0.00019918750094867144,
|
18179 |
+
"loss": 5.7313,
|
18180 |
+
"step": 2581
|
18181 |
+
},
|
18182 |
+
{
|
18183 |
+
"epoch": 0.12322964765961508,
|
18184 |
+
"grad_norm": 6.35195779800415,
|
18185 |
+
"learning_rate": 0.000199186864684326,
|
18186 |
+
"loss": 7.8379,
|
18187 |
+
"step": 2582
|
18188 |
+
},
|
18189 |
+
{
|
18190 |
+
"epoch": 0.12327737409170635,
|
18191 |
+
"grad_norm": 6.694102764129639,
|
18192 |
+
"learning_rate": 0.0001991862281719672,
|
18193 |
+
"loss": 6.2678,
|
18194 |
+
"step": 2583
|
18195 |
+
},
|
18196 |
+
{
|
18197 |
+
"epoch": 0.1233251005237976,
|
18198 |
+
"grad_norm": 6.242228984832764,
|
18199 |
+
"learning_rate": 0.00019918559141159664,
|
18200 |
+
"loss": 5.813,
|
18201 |
+
"step": 2584
|
18202 |
+
},
|
18203 |
+
{
|
18204 |
+
"epoch": 0.12337282695588885,
|
18205 |
+
"grad_norm": 7.03825569152832,
|
18206 |
+
"learning_rate": 0.00019918495440321586,
|
18207 |
+
"loss": 6.2827,
|
18208 |
+
"step": 2585
|
18209 |
+
},
|
18210 |
+
{
|
18211 |
+
"epoch": 0.1234205533879801,
|
18212 |
+
"grad_norm": 4.9181389808654785,
|
18213 |
+
"learning_rate": 0.0001991843171468265,
|
18214 |
+
"loss": 4.9544,
|
18215 |
+
"step": 2586
|
18216 |
+
},
|
18217 |
+
{
|
18218 |
+
"epoch": 0.12346827982007134,
|
18219 |
+
"grad_norm": 7.315206050872803,
|
18220 |
+
"learning_rate": 0.00019918367964243014,
|
18221 |
+
"loss": 8.3353,
|
18222 |
+
"step": 2587
|
18223 |
+
},
|
18224 |
+
{
|
18225 |
+
"epoch": 0.12351600625216261,
|
18226 |
+
"grad_norm": 6.019472122192383,
|
18227 |
+
"learning_rate": 0.0001991830418900284,
|
18228 |
+
"loss": 6.734,
|
18229 |
+
"step": 2588
|
18230 |
+
},
|
18231 |
+
{
|
18232 |
+
"epoch": 0.12356373268425386,
|
18233 |
+
"grad_norm": 5.389978885650635,
|
18234 |
+
"learning_rate": 0.00019918240388962284,
|
18235 |
+
"loss": 6.5434,
|
18236 |
+
"step": 2589
|
18237 |
+
},
|
18238 |
+
{
|
18239 |
+
"epoch": 0.12361145911634511,
|
18240 |
+
"grad_norm": 6.509700298309326,
|
18241 |
+
"learning_rate": 0.00019918176564121508,
|
18242 |
+
"loss": 7.3647,
|
18243 |
+
"step": 2590
|
18244 |
+
},
|
18245 |
+
{
|
18246 |
+
"epoch": 0.12365918554843636,
|
18247 |
+
"grad_norm": 8.090117454528809,
|
18248 |
+
"learning_rate": 0.0001991811271448067,
|
18249 |
+
"loss": 7.2696,
|
18250 |
+
"step": 2591
|
18251 |
+
},
|
18252 |
+
{
|
18253 |
+
"epoch": 0.12370691198052762,
|
18254 |
+
"grad_norm": 5.416383266448975,
|
18255 |
+
"learning_rate": 0.00019918048840039928,
|
18256 |
+
"loss": 6.4276,
|
18257 |
+
"step": 2592
|
18258 |
+
},
|
18259 |
+
{
|
18260 |
+
"epoch": 0.12375463841261887,
|
18261 |
+
"grad_norm": 6.610803127288818,
|
18262 |
+
"learning_rate": 0.00019917984940799445,
|
18263 |
+
"loss": 5.4854,
|
18264 |
+
"step": 2593
|
18265 |
+
},
|
18266 |
+
{
|
18267 |
+
"epoch": 0.12380236484471012,
|
18268 |
+
"grad_norm": 4.918051242828369,
|
18269 |
+
"learning_rate": 0.0001991792101675938,
|
18270 |
+
"loss": 4.2269,
|
18271 |
+
"step": 2594
|
18272 |
+
},
|
18273 |
+
{
|
18274 |
+
"epoch": 0.12385009127680137,
|
18275 |
+
"grad_norm": 6.454699993133545,
|
18276 |
+
"learning_rate": 0.0001991785706791989,
|
18277 |
+
"loss": 6.3255,
|
18278 |
+
"step": 2595
|
18279 |
+
},
|
18280 |
+
{
|
18281 |
+
"epoch": 0.12389781770889263,
|
18282 |
+
"grad_norm": 10.754586219787598,
|
18283 |
+
"learning_rate": 0.0001991779309428114,
|
18284 |
+
"loss": 10.1455,
|
18285 |
+
"step": 2596
|
18286 |
+
},
|
18287 |
+
{
|
18288 |
+
"epoch": 0.12394554414098388,
|
18289 |
+
"grad_norm": 7.033049583435059,
|
18290 |
+
"learning_rate": 0.00019917729095843286,
|
18291 |
+
"loss": 6.9458,
|
18292 |
+
"step": 2597
|
18293 |
+
},
|
18294 |
+
{
|
18295 |
+
"epoch": 0.12399327057307513,
|
18296 |
+
"grad_norm": 5.376648426055908,
|
18297 |
+
"learning_rate": 0.0001991766507260649,
|
18298 |
+
"loss": 5.361,
|
18299 |
+
"step": 2598
|
18300 |
+
},
|
18301 |
+
{
|
18302 |
+
"epoch": 0.12404099700516638,
|
18303 |
+
"grad_norm": 7.248912811279297,
|
18304 |
+
"learning_rate": 0.0001991760102457091,
|
18305 |
+
"loss": 7.2075,
|
18306 |
+
"step": 2599
|
18307 |
+
},
|
18308 |
+
{
|
18309 |
+
"epoch": 0.12408872343725764,
|
18310 |
+
"grad_norm": 5.227941989898682,
|
18311 |
+
"learning_rate": 0.0001991753695173671,
|
18312 |
+
"loss": 5.2552,
|
18313 |
+
"step": 2600
|
18314 |
+
},
|
18315 |
+
{
|
18316 |
+
"epoch": 0.12408872343725764,
|
18317 |
+
"eval_loss": 1.616467833518982,
|
18318 |
+
"eval_runtime": 96.4878,
|
18319 |
+
"eval_samples_per_second": 8.737,
|
18320 |
+
"eval_steps_per_second": 4.374,
|
18321 |
+
"step": 2600
|
18322 |
}
|
18323 |
],
|
18324 |
"logging_steps": 1,
|
|
|
18333 |
"early_stopping_threshold": 0.0
|
18334 |
},
|
18335 |
"attributes": {
|
18336 |
+
"early_stopping_patience_counter": 2
|
18337 |
}
|
18338 |
},
|
18339 |
"TrainerControl": {
|
|
|
18347 |
"attributes": {}
|
18348 |
}
|
18349 |
},
|
18350 |
+
"total_flos": 2.410799318433792e+17,
|
18351 |
"train_batch_size": 2,
|
18352 |
"trial_name": null,
|
18353 |
"trial_params": null
|