mika5883 commited on
Commit
00bcf85
·
verified ·
1 Parent(s): 01ae571

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f8b9682b43bdeed01f3934c7b4376dafcccbd5c80fd094d655244613aee1f8e
3
  size 161533192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b23bb944012252c2e43e3bf5678e7f4a1bf0dabf43d433c7a0d9ffe054c82307
3
  size 161533192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c13ec3a8ed5fe7b6e8771f746ce2c949db42e9bd02fe328370cf6462a754b39a
3
  size 323290986
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d822f9ac4068a0ed6a97cc6ec23381fb86760f668c2ad94d642f603d7e018f5
3
  size 323290986
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d19b5d9bb2549dbad254c009b063a725d7b3736e59eec32be36d8d009ad3208f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:552e3d3cf9cb44fef1c26f04bb6414966a11bf7c925b92ec2c6327dfb7ffbd39
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfed7a9c513fb4dc9c3300c941108cb79744ddd15d7f3c0ec0b501331be45d7f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756b4673b64af1f690114c3fca9e03a16a9abde66559f4c5dfec5148ff3a7d00
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6725609302520752,
3
- "best_model_checkpoint": "ru_qwen7b_gec_Ag/checkpoint-2600",
4
- "epoch": 0.7621280961453906,
5
  "eval_steps": 200,
6
- "global_step": 2600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -202,6 +202,66 @@
202
  "eval_samples_per_second": 13.452,
203
  "eval_steps_per_second": 1.684,
204
  "step": 2600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  }
206
  ],
207
  "logging_steps": 200,
@@ -230,7 +290,7 @@
230
  "attributes": {}
231
  }
232
  },
233
- "total_flos": 9.087503013249024e+17,
234
  "train_batch_size": 8,
235
  "trial_name": null,
236
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6679416298866272,
3
+ "best_model_checkpoint": "ru_qwen7b_gec_Ag/checkpoint-3400",
4
+ "epoch": 0.9966290488055107,
5
  "eval_steps": 200,
6
+ "global_step": 3400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
202
  "eval_samples_per_second": 13.452,
203
  "eval_steps_per_second": 1.684,
204
  "step": 2600
205
+ },
206
+ {
207
+ "epoch": 0.8207533343104206,
208
+ "grad_norm": 1.2224448919296265,
209
+ "learning_rate": 1.4807874171782795e-05,
210
+ "loss": 0.5785,
211
+ "step": 2800
212
+ },
213
+ {
214
+ "epoch": 0.8207533343104206,
215
+ "eval_loss": 0.6695232391357422,
216
+ "eval_runtime": 185.8805,
217
+ "eval_samples_per_second": 13.45,
218
+ "eval_steps_per_second": 1.684,
219
+ "step": 2800
220
+ },
221
+ {
222
+ "epoch": 0.8793785724754507,
223
+ "grad_norm": 1.116542100906372,
224
+ "learning_rate": 1.4016954246529697e-05,
225
+ "loss": 0.5778,
226
+ "step": 3000
227
+ },
228
+ {
229
+ "epoch": 0.8793785724754507,
230
+ "eval_loss": 0.6703296899795532,
231
+ "eval_runtime": 186.6523,
232
+ "eval_samples_per_second": 13.394,
233
+ "eval_steps_per_second": 1.677,
234
+ "step": 3000
235
+ },
236
+ {
237
+ "epoch": 0.9380038106404808,
238
+ "grad_norm": 1.0617640018463135,
239
+ "learning_rate": 1.3194816296459483e-05,
240
+ "loss": 0.5823,
241
+ "step": 3200
242
+ },
243
+ {
244
+ "epoch": 0.9380038106404808,
245
+ "eval_loss": 0.6692666411399841,
246
+ "eval_runtime": 186.0381,
247
+ "eval_samples_per_second": 13.438,
248
+ "eval_steps_per_second": 1.682,
249
+ "step": 3200
250
+ },
251
+ {
252
+ "epoch": 0.9966290488055107,
253
+ "grad_norm": 1.1942830085754395,
254
+ "learning_rate": 1.234784962086541e-05,
255
+ "loss": 0.5765,
256
+ "step": 3400
257
+ },
258
+ {
259
+ "epoch": 0.9966290488055107,
260
+ "eval_loss": 0.6679416298866272,
261
+ "eval_runtime": 186.4984,
262
+ "eval_samples_per_second": 13.405,
263
+ "eval_steps_per_second": 1.678,
264
+ "step": 3400
265
  }
266
  ],
267
  "logging_steps": 200,
 
290
  "attributes": {}
291
  }
292
  },
293
+ "total_flos": 1.1883657786556416e+18,
294
  "train_batch_size": 8,
295
  "trial_name": null,
296
  "trial_params": null