mika5883 commited on
Commit
7cad564
·
verified ·
1 Parent(s): c63f935

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36a3564bcec5a529ed89cae4077ab371093f3024c61ad4945417d3905ad1efb7
3
  size 161533192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb2fe1140bd18da0d59537585b1a320d6f148a3e073233f8f24a15f1054717d
3
  size 161533192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f2a4684e422053d4adc622a772b7f2ae47a52db8023658bef4ab1d3c4d8b551
3
  size 323290986
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ba2eceb4012b8014cd1c4c54aeb37c9b37ab99ea2c4145113fe8eefe62510f3
3
  size 323290986
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e05042930cbd772da85946159dcb5dddc0f558c0589627a889a9671e95aa522
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a11b431c48c5d64e25a002804e15211716127584ce0efef99d94719910668dc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a58e631e9dd7778c900a95e6edcdcfbefc62841f31a37818bb58f0bab95ca81d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5cd77d7aa40b1e78967adb7aa5f482fd9407e5df0aadea6bfa235f73d239c2a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6376007199287415,
3
- "best_model_checkpoint": "ru_qwen7b_gec_Ga/checkpoint-243",
4
- "epoch": 0.8466898954703833,
5
  "eval_steps": 9,
6
- "global_step": 243,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -412,6 +412,21 @@
412
  "eval_samples_per_second": 13.336,
413
  "eval_steps_per_second": 1.669,
414
  "step": 243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  }
416
  ],
417
  "logging_steps": 9,
@@ -440,7 +455,7 @@
440
  "attributes": {}
441
  }
442
  },
443
- "total_flos": 8.493320123921203e+16,
444
  "train_batch_size": 8,
445
  "trial_name": null,
446
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6374460458755493,
3
+ "best_model_checkpoint": "ru_qwen7b_gec_Ga/checkpoint-252",
4
+ "epoch": 0.8780487804878049,
5
  "eval_steps": 9,
6
+ "global_step": 252,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
412
  "eval_samples_per_second": 13.336,
413
  "eval_steps_per_second": 1.669,
414
  "step": 243
415
+ },
416
+ {
417
+ "epoch": 0.8780487804878049,
418
+ "grad_norm": 0.9245346784591675,
419
+ "learning_rate": 8.459199147463371e-07,
420
+ "loss": 0.6092,
421
+ "step": 252
422
+ },
423
+ {
424
+ "epoch": 0.8780487804878049,
425
+ "eval_loss": 0.6374460458755493,
426
+ "eval_runtime": 58.0961,
427
+ "eval_samples_per_second": 13.34,
428
+ "eval_steps_per_second": 1.67,
429
+ "step": 252
430
  }
431
  ],
432
  "logging_steps": 9,
 
455
  "attributes": {}
456
  }
457
  },
458
+ "total_flos": 8.807887535918285e+16,
459
  "train_batch_size": 8,
460
  "trial_name": null,
461
  "trial_params": null