mika5883 commited on
Commit
6248b92
·
verified ·
1 Parent(s): eada0ed

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fb2fe1140bd18da0d59537585b1a320d6f148a3e073233f8f24a15f1054717d
3
  size 161533192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1fa26184e840c327e1f6152c2e8b65631c3b4fb8bd6d5e9618989da82736f6
3
  size 161533192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ba2eceb4012b8014cd1c4c54aeb37c9b37ab99ea2c4145113fe8eefe62510f3
3
  size 323290986
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0cbc0277e866c44285c3b27a73b0f54c8c43cfc57e03c3c70332e948723d437
3
  size 323290986
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a11b431c48c5d64e25a002804e15211716127584ce0efef99d94719910668dc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d8c76d6c8a7a3b88e7f5125b78e546dbde485461d800944948e2662f2d8e09
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5cd77d7aa40b1e78967adb7aa5f482fd9407e5df0aadea6bfa235f73d239c2a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91c73e65fdf914a00fd32cdfca8c34325a0b0f62e22c4eba0ae82508bf3c9b74
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6374460458755493,
3
- "best_model_checkpoint": "ru_qwen7b_gec_Ga/checkpoint-252",
4
- "epoch": 0.8780487804878049,
5
  "eval_steps": 9,
6
- "global_step": 252,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -427,6 +427,21 @@
427
  "eval_samples_per_second": 13.34,
428
  "eval_steps_per_second": 1.67,
429
  "step": 252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 9,
@@ -455,7 +470,7 @@
455
  "attributes": {}
456
  }
457
  },
458
- "total_flos": 8.807887535918285e+16,
459
  "train_batch_size": 8,
460
  "trial_name": null,
461
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6373529434204102,
3
+ "best_model_checkpoint": "ru_qwen7b_gec_Ga/checkpoint-261",
4
+ "epoch": 0.9094076655052264,
5
  "eval_steps": 9,
6
+ "global_step": 261,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
427
  "eval_samples_per_second": 13.34,
428
  "eval_steps_per_second": 1.67,
429
  "step": 252
430
+ },
431
+ {
432
+ "epoch": 0.9094076655052264,
433
+ "grad_norm": 1.0773546695709229,
434
+ "learning_rate": 4.788233408928588e-07,
435
+ "loss": 0.6222,
436
+ "step": 261
437
+ },
438
+ {
439
+ "epoch": 0.9094076655052264,
440
+ "eval_loss": 0.6373529434204102,
441
+ "eval_runtime": 58.0985,
442
+ "eval_samples_per_second": 13.339,
443
+ "eval_steps_per_second": 1.67,
444
+ "step": 261
445
  }
446
  ],
447
  "logging_steps": 9,
 
470
  "attributes": {}
471
  }
472
  },
473
+ "total_flos": 9.122454947915366e+16,
474
  "train_batch_size": 8,
475
  "trial_name": null,
476
  "trial_params": null