Training in progress, epoch 0, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 161533192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fb2fe1140bd18da0d59537585b1a320d6f148a3e073233f8f24a15f1054717d
|
3 |
size 161533192
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 323290986
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ba2eceb4012b8014cd1c4c54aeb37c9b37ab99ea2c4145113fe8eefe62510f3
|
3 |
size 323290986
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a11b431c48c5d64e25a002804e15211716127584ce0efef99d94719910668dc
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5cd77d7aa40b1e78967adb7aa5f482fd9407e5df0aadea6bfa235f73d239c2a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "ru_qwen7b_gec_Ga/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 9,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -412,6 +412,21 @@
|
|
412 |
"eval_samples_per_second": 13.336,
|
413 |
"eval_steps_per_second": 1.669,
|
414 |
"step": 243
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
415 |
}
|
416 |
],
|
417 |
"logging_steps": 9,
|
@@ -440,7 +455,7 @@
|
|
440 |
"attributes": {}
|
441 |
}
|
442 |
},
|
443 |
-
"total_flos": 8.
|
444 |
"train_batch_size": 8,
|
445 |
"trial_name": null,
|
446 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.6374460458755493,
|
3 |
+
"best_model_checkpoint": "ru_qwen7b_gec_Ga/checkpoint-252",
|
4 |
+
"epoch": 0.8780487804878049,
|
5 |
"eval_steps": 9,
|
6 |
+
"global_step": 252,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
412 |
"eval_samples_per_second": 13.336,
|
413 |
"eval_steps_per_second": 1.669,
|
414 |
"step": 243
|
415 |
+
},
|
416 |
+
{
|
417 |
+
"epoch": 0.8780487804878049,
|
418 |
+
"grad_norm": 0.9245346784591675,
|
419 |
+
"learning_rate": 8.459199147463371e-07,
|
420 |
+
"loss": 0.6092,
|
421 |
+
"step": 252
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"epoch": 0.8780487804878049,
|
425 |
+
"eval_loss": 0.6374460458755493,
|
426 |
+
"eval_runtime": 58.0961,
|
427 |
+
"eval_samples_per_second": 13.34,
|
428 |
+
"eval_steps_per_second": 1.67,
|
429 |
+
"step": 252
|
430 |
}
|
431 |
],
|
432 |
"logging_steps": 9,
|
|
|
455 |
"attributes": {}
|
456 |
}
|
457 |
},
|
458 |
+
"total_flos": 8.807887535918285e+16,
|
459 |
"train_batch_size": 8,
|
460 |
"trial_name": null,
|
461 |
"trial_params": null
|