VERSIL91 commited on
Commit
4fe81b5
·
verified ·
1 Parent(s): db667f9

Training in progress, step 16, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5eeff0a3f2e686ca9d8d42ab676758f1b8d07b69c84890406003b76fc3a7627
3
  size 161533192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af2997a4024dfdc97b8b4149e0db00eeccff5d122e74d84c9877509219f98af7
3
  size 161533192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69e1d67018ae410f0815be5816adc69a79935c462180c5d1d8fe3c778aca571e
3
  size 82460660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5514aafb3c2b44765b5a4b12e85bc0743ab1397babfcf4432bcb103b76f7758e
3
  size 82460660
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:105f96f979368944b59d27d1e74fc1fa8227279f7d5146ad57761b7d32f1a2d9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6eeca6fec2d74dc35412cee25843a6e2b45b715cb98314d10ac41fc41a4cabc
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a3d18e607a27cfab9cec7cfaa7384cdb877a2330c4bcd4e1efcae25be9908cb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3233603be2c007aac019155f1cd5d754e2eeb600f6ca413bd2048966230a2e8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.07881773399014778,
5
  "eval_steps": 8,
6
- "global_step": 8,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -79,6 +79,70 @@
79
  "eval_samples_per_second": 5.85,
80
  "eval_steps_per_second": 2.942,
81
  "step": 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
  ],
84
  "logging_steps": 1,
@@ -98,7 +162,7 @@
98
  "attributes": {}
99
  }
100
  },
101
- "total_flos": 4.473847637291827e+16,
102
  "train_batch_size": 2,
103
  "trial_name": null,
104
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.15763546798029557,
5
  "eval_steps": 8,
6
+ "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
79
  "eval_samples_per_second": 5.85,
80
  "eval_steps_per_second": 2.942,
81
  "step": 8
82
+ },
83
+ {
84
+ "epoch": 0.08866995073891626,
85
+ "grad_norm": 0.4130725860595703,
86
+ "learning_rate": 9e-05,
87
+ "loss": 0.9186,
88
+ "step": 9
89
+ },
90
+ {
91
+ "epoch": 0.09852216748768473,
92
+ "grad_norm": 0.4786558151245117,
93
+ "learning_rate": 0.0001,
94
+ "loss": 0.6656,
95
+ "step": 10
96
+ },
97
+ {
98
+ "epoch": 0.10837438423645321,
99
+ "grad_norm": 0.45567965507507324,
100
+ "learning_rate": 9.938441702975689e-05,
101
+ "loss": 0.8369,
102
+ "step": 11
103
+ },
104
+ {
105
+ "epoch": 0.11822660098522167,
106
+ "grad_norm": 0.44564610719680786,
107
+ "learning_rate": 9.755282581475769e-05,
108
+ "loss": 0.6533,
109
+ "step": 12
110
+ },
111
+ {
112
+ "epoch": 0.12807881773399016,
113
+ "grad_norm": 0.4153067469596863,
114
+ "learning_rate": 9.45503262094184e-05,
115
+ "loss": 0.7473,
116
+ "step": 13
117
+ },
118
+ {
119
+ "epoch": 0.13793103448275862,
120
+ "grad_norm": 0.4901822805404663,
121
+ "learning_rate": 9.045084971874738e-05,
122
+ "loss": 0.42,
123
+ "step": 14
124
+ },
125
+ {
126
+ "epoch": 0.1477832512315271,
127
+ "grad_norm": 0.4641437828540802,
128
+ "learning_rate": 8.535533905932738e-05,
129
+ "loss": 0.6957,
130
+ "step": 15
131
+ },
132
+ {
133
+ "epoch": 0.15763546798029557,
134
+ "grad_norm": 0.5527058243751526,
135
+ "learning_rate": 7.938926261462366e-05,
136
+ "loss": 0.9599,
137
+ "step": 16
138
+ },
139
+ {
140
+ "epoch": 0.15763546798029557,
141
+ "eval_loss": 0.6261801719665527,
142
+ "eval_runtime": 29.3343,
143
+ "eval_samples_per_second": 5.829,
144
+ "eval_steps_per_second": 2.932,
145
+ "step": 16
146
  }
147
  ],
148
  "logging_steps": 1,
 
162
  "attributes": {}
163
  }
164
  },
165
+ "total_flos": 8.947695274583654e+16,
166
  "train_batch_size": 2,
167
  "trial_name": null,
168
  "trial_params": null