PaulTran commited on
Commit
8f206cc
·
1 Parent(s): a0d8645

Upload model files

Browse files
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-multilingual-cased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2",
15
+ "3": "LABEL_3",
16
+ "4": "LABEL_4"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "label2id": {
20
+ "LABEL_0": 0,
21
+ "LABEL_1": 1,
22
+ "LABEL_2": 2,
23
+ "LABEL_3": 3,
24
+ "LABEL_4": 4
25
+ },
26
+ "max_position_embeddings": 512,
27
+ "model_type": "distilbert",
28
+ "n_heads": 12,
29
+ "n_layers": 6,
30
+ "output_past": true,
31
+ "pad_token_id": 0,
32
+ "problem_type": "multi_label_classification",
33
+ "qa_dropout": 0.1,
34
+ "seq_classif_dropout": 0.2,
35
+ "sinusoidal_pos_embds": false,
36
+ "tie_weights_": true,
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.24.0",
39
+ "vocab_size": 119547
40
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a45e52ce05a6b657f75f0299c91bab859bc971f8a7c99e1f64d9ef5d15e42802
3
+ size 1082689093
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:236930bc12eb148be077da5f6ff89e372954e5ae12617ce8ab3fdffdab6de29d
3
+ size 541349549
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f6353bc2fe2d7fef44b3357b6dbe6ee592d69d4ca2b101d397fbdb26b8d8a2f
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e134097bca81c3928fe283a7a9968ba629990becdb2bd3c94f6ae3538f682f2f
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "name_or_path": "distilbert-base-multilingual-cased",
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "special_tokens_map_file": null,
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "DistilBertTokenizer",
15
+ "unk_token": "[UNK]"
16
+ }
trainer_state.json ADDED
@@ -0,0 +1,1716 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7120954003407156,
3
+ "best_model_checkpoint": "Distil4\\checkpoint-40348",
4
+ "epoch": 100.0,
5
+ "global_step": 52400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 4.950095419847329e-06,
13
+ "loss": 0.4749,
14
+ "step": 523
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.4026503567787971,
19
+ "eval_f1": 0.5392528424472117,
20
+ "eval_loss": 0.4015094041824341,
21
+ "eval_roc_auc": 0.6891349750981157,
22
+ "eval_runtime": 9.4449,
23
+ "eval_samples_per_second": 103.866,
24
+ "eval_steps_per_second": 6.988,
25
+ "step": 524
26
+ },
27
+ {
28
+ "epoch": 2.0,
29
+ "learning_rate": 4.900190839694656e-06,
30
+ "loss": 0.3845,
31
+ "step": 1046
32
+ },
33
+ {
34
+ "epoch": 2.0,
35
+ "eval_accuracy": 0.5168195718654435,
36
+ "eval_f1": 0.6353754940711462,
37
+ "eval_loss": 0.36452510952949524,
38
+ "eval_roc_auc": 0.747088659888458,
39
+ "eval_runtime": 9.9987,
40
+ "eval_samples_per_second": 98.113,
41
+ "eval_steps_per_second": 6.601,
42
+ "step": 1048
43
+ },
44
+ {
45
+ "epoch": 2.99,
46
+ "learning_rate": 4.850286259541985e-06,
47
+ "loss": 0.3475,
48
+ "step": 1569
49
+ },
50
+ {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.5310907237512742,
53
+ "eval_f1": 0.6470871449205585,
54
+ "eval_loss": 0.3549170196056366,
55
+ "eval_roc_auc": 0.7563264556700557,
56
+ "eval_runtime": 10.07,
57
+ "eval_samples_per_second": 97.418,
58
+ "eval_steps_per_second": 6.554,
59
+ "step": 1572
60
+ },
61
+ {
62
+ "epoch": 3.99,
63
+ "learning_rate": 4.800381679389313e-06,
64
+ "loss": 0.326,
65
+ "step": 2092
66
+ },
67
+ {
68
+ "epoch": 4.0,
69
+ "eval_accuracy": 0.5545361875637105,
70
+ "eval_f1": 0.674766355140187,
71
+ "eval_loss": 0.3448670506477356,
72
+ "eval_roc_auc": 0.7760477152234284,
73
+ "eval_runtime": 10.1448,
74
+ "eval_samples_per_second": 96.7,
75
+ "eval_steps_per_second": 6.506,
76
+ "step": 2096
77
+ },
78
+ {
79
+ "epoch": 4.99,
80
+ "learning_rate": 4.750477099236642e-06,
81
+ "loss": 0.3083,
82
+ "step": 2615
83
+ },
84
+ {
85
+ "epoch": 5.0,
86
+ "eval_accuracy": 0.5565749235474006,
87
+ "eval_f1": 0.6812674743709226,
88
+ "eval_loss": 0.34419235587120056,
89
+ "eval_roc_auc": 0.7803114456863509,
90
+ "eval_runtime": 10.1716,
91
+ "eval_samples_per_second": 96.445,
92
+ "eval_steps_per_second": 6.489,
93
+ "step": 2620
94
+ },
95
+ {
96
+ "epoch": 5.99,
97
+ "learning_rate": 4.700572519083969e-06,
98
+ "loss": 0.2952,
99
+ "step": 3138
100
+ },
101
+ {
102
+ "epoch": 6.0,
103
+ "eval_accuracy": 0.5718654434250765,
104
+ "eval_f1": 0.6965452847805789,
105
+ "eval_loss": 0.3325030505657196,
106
+ "eval_roc_auc": 0.789289320878566,
107
+ "eval_runtime": 10.2795,
108
+ "eval_samples_per_second": 95.433,
109
+ "eval_steps_per_second": 6.421,
110
+ "step": 3144
111
+ },
112
+ {
113
+ "epoch": 6.99,
114
+ "learning_rate": 4.650667938931298e-06,
115
+ "loss": 0.2799,
116
+ "step": 3661
117
+ },
118
+ {
119
+ "epoch": 7.0,
120
+ "eval_accuracy": 0.5728848114169215,
121
+ "eval_f1": 0.6865116279069767,
122
+ "eval_loss": 0.34394344687461853,
123
+ "eval_roc_auc": 0.7837168070505612,
124
+ "eval_runtime": 10.3609,
125
+ "eval_samples_per_second": 94.683,
126
+ "eval_steps_per_second": 6.37,
127
+ "step": 3668
128
+ },
129
+ {
130
+ "epoch": 7.98,
131
+ "learning_rate": 4.600763358778627e-06,
132
+ "loss": 0.2626,
133
+ "step": 4184
134
+ },
135
+ {
136
+ "epoch": 8.0,
137
+ "eval_accuracy": 0.5800203873598369,
138
+ "eval_f1": 0.692876965772433,
139
+ "eval_loss": 0.33631590008735657,
140
+ "eval_roc_auc": 0.7883041472539074,
141
+ "eval_runtime": 9.1916,
142
+ "eval_samples_per_second": 106.728,
143
+ "eval_steps_per_second": 7.18,
144
+ "step": 4192
145
+ },
146
+ {
147
+ "epoch": 8.98,
148
+ "learning_rate": 4.550858778625955e-06,
149
+ "loss": 0.2529,
150
+ "step": 4707
151
+ },
152
+ {
153
+ "epoch": 9.0,
154
+ "eval_accuracy": 0.5749235474006116,
155
+ "eval_f1": 0.6857670979667283,
156
+ "eval_loss": 0.34780624508857727,
157
+ "eval_roc_auc": 0.7840966468522641,
158
+ "eval_runtime": 8.7207,
159
+ "eval_samples_per_second": 112.491,
160
+ "eval_steps_per_second": 7.568,
161
+ "step": 4716
162
+ },
163
+ {
164
+ "epoch": 9.98,
165
+ "learning_rate": 4.500954198473283e-06,
166
+ "loss": 0.237,
167
+ "step": 5230
168
+ },
169
+ {
170
+ "epoch": 10.0,
171
+ "eval_accuracy": 0.5667686034658511,
172
+ "eval_f1": 0.68721251149954,
173
+ "eval_loss": 0.34721097350120544,
174
+ "eval_roc_auc": 0.7855741204011842,
175
+ "eval_runtime": 8.7533,
176
+ "eval_samples_per_second": 112.072,
177
+ "eval_steps_per_second": 7.54,
178
+ "step": 5240
179
+ },
180
+ {
181
+ "epoch": 10.98,
182
+ "learning_rate": 4.451049618320611e-06,
183
+ "loss": 0.2223,
184
+ "step": 5753
185
+ },
186
+ {
187
+ "epoch": 11.0,
188
+ "eval_accuracy": 0.5657492354740061,
189
+ "eval_f1": 0.6845698680018207,
190
+ "eval_loss": 0.3609465956687927,
191
+ "eval_roc_auc": 0.785313626035666,
192
+ "eval_runtime": 8.7365,
193
+ "eval_samples_per_second": 112.288,
194
+ "eval_steps_per_second": 7.555,
195
+ "step": 5764
196
+ },
197
+ {
198
+ "epoch": 11.98,
199
+ "learning_rate": 4.40114503816794e-06,
200
+ "loss": 0.2117,
201
+ "step": 6276
202
+ },
203
+ {
204
+ "epoch": 12.0,
205
+ "eval_accuracy": 0.581039755351682,
206
+ "eval_f1": 0.6981818181818182,
207
+ "eval_loss": 0.36286601424217224,
208
+ "eval_roc_auc": 0.7939185467398039,
209
+ "eval_runtime": 8.7731,
210
+ "eval_samples_per_second": 111.819,
211
+ "eval_steps_per_second": 7.523,
212
+ "step": 6288
213
+ },
214
+ {
215
+ "epoch": 12.98,
216
+ "learning_rate": 4.351240458015267e-06,
217
+ "loss": 0.2,
218
+ "step": 6799
219
+ },
220
+ {
221
+ "epoch": 13.0,
222
+ "eval_accuracy": 0.5851172273190621,
223
+ "eval_f1": 0.7062999112688554,
224
+ "eval_loss": 0.3647877275943756,
225
+ "eval_roc_auc": 0.8024597782929012,
226
+ "eval_runtime": 8.7204,
227
+ "eval_samples_per_second": 112.495,
228
+ "eval_steps_per_second": 7.568,
229
+ "step": 6812
230
+ },
231
+ {
232
+ "epoch": 13.97,
233
+ "learning_rate": 4.301335877862596e-06,
234
+ "loss": 0.1909,
235
+ "step": 7322
236
+ },
237
+ {
238
+ "epoch": 14.0,
239
+ "eval_accuracy": 0.5698267074413863,
240
+ "eval_f1": 0.7012058954890575,
241
+ "eval_loss": 0.3725411891937256,
242
+ "eval_roc_auc": 0.7982735076082716,
243
+ "eval_runtime": 8.798,
244
+ "eval_samples_per_second": 111.503,
245
+ "eval_steps_per_second": 7.502,
246
+ "step": 7336
247
+ },
248
+ {
249
+ "epoch": 14.97,
250
+ "learning_rate": 4.2514312977099246e-06,
251
+ "loss": 0.1782,
252
+ "step": 7845
253
+ },
254
+ {
255
+ "epoch": 15.0,
256
+ "eval_accuracy": 0.5800203873598369,
257
+ "eval_f1": 0.7042128603104212,
258
+ "eval_loss": 0.37900835275650024,
259
+ "eval_roc_auc": 0.8012003396754722,
260
+ "eval_runtime": 8.7328,
261
+ "eval_samples_per_second": 112.335,
262
+ "eval_steps_per_second": 7.558,
263
+ "step": 7860
264
+ },
265
+ {
266
+ "epoch": 15.97,
267
+ "learning_rate": 4.2015267175572526e-06,
268
+ "loss": 0.1653,
269
+ "step": 8368
270
+ },
271
+ {
272
+ "epoch": 16.0,
273
+ "eval_accuracy": 0.5739041794087666,
274
+ "eval_f1": 0.7040141155712396,
275
+ "eval_loss": 0.3948748707771301,
276
+ "eval_roc_auc": 0.8018475591563196,
277
+ "eval_runtime": 8.6978,
278
+ "eval_samples_per_second": 112.788,
279
+ "eval_steps_per_second": 7.588,
280
+ "step": 8384
281
+ },
282
+ {
283
+ "epoch": 16.97,
284
+ "learning_rate": 4.1516221374045806e-06,
285
+ "loss": 0.156,
286
+ "step": 8891
287
+ },
288
+ {
289
+ "epoch": 17.0,
290
+ "eval_accuracy": 0.5769622833843018,
291
+ "eval_f1": 0.7074468085106383,
292
+ "eval_loss": 0.39961278438568115,
293
+ "eval_roc_auc": 0.8033181473916137,
294
+ "eval_runtime": 8.6958,
295
+ "eval_samples_per_second": 112.813,
296
+ "eval_steps_per_second": 7.59,
297
+ "step": 8908
298
+ },
299
+ {
300
+ "epoch": 17.97,
301
+ "learning_rate": 4.1017175572519085e-06,
302
+ "loss": 0.1478,
303
+ "step": 9414
304
+ },
305
+ {
306
+ "epoch": 18.0,
307
+ "eval_accuracy": 0.5575942915392457,
308
+ "eval_f1": 0.6918402777777779,
309
+ "eval_loss": 0.41351279616355896,
310
+ "eval_roc_auc": 0.79633816070322,
311
+ "eval_runtime": 8.7585,
312
+ "eval_samples_per_second": 112.006,
313
+ "eval_steps_per_second": 7.536,
314
+ "step": 9432
315
+ },
316
+ {
317
+ "epoch": 18.96,
318
+ "learning_rate": 4.051812977099237e-06,
319
+ "loss": 0.1362,
320
+ "step": 9937
321
+ },
322
+ {
323
+ "epoch": 19.0,
324
+ "eval_accuracy": 0.5616717635066258,
325
+ "eval_f1": 0.7035263387026557,
326
+ "eval_loss": 0.4201168119907379,
327
+ "eval_roc_auc": 0.8034656078584379,
328
+ "eval_runtime": 8.7893,
329
+ "eval_samples_per_second": 111.613,
330
+ "eval_steps_per_second": 7.509,
331
+ "step": 9956
332
+ },
333
+ {
334
+ "epoch": 19.96,
335
+ "learning_rate": 4.001908396946565e-06,
336
+ "loss": 0.1316,
337
+ "step": 10460
338
+ },
339
+ {
340
+ "epoch": 20.0,
341
+ "eval_accuracy": 0.54638124362895,
342
+ "eval_f1": 0.6954270923209663,
343
+ "eval_loss": 0.4302760362625122,
344
+ "eval_roc_auc": 0.7995323724495651,
345
+ "eval_runtime": 8.7776,
346
+ "eval_samples_per_second": 111.762,
347
+ "eval_steps_per_second": 7.519,
348
+ "step": 10480
349
+ },
350
+ {
351
+ "epoch": 20.96,
352
+ "learning_rate": 3.952003816793893e-06,
353
+ "loss": 0.1189,
354
+ "step": 10983
355
+ },
356
+ {
357
+ "epoch": 21.0,
358
+ "eval_accuracy": 0.5524974515800204,
359
+ "eval_f1": 0.6955767562879446,
360
+ "eval_loss": 0.4442458152770996,
361
+ "eval_roc_auc": 0.7988851529687178,
362
+ "eval_runtime": 9.0585,
363
+ "eval_samples_per_second": 108.296,
364
+ "eval_steps_per_second": 7.286,
365
+ "step": 11004
366
+ },
367
+ {
368
+ "epoch": 21.96,
369
+ "learning_rate": 3.902099236641222e-06,
370
+ "loss": 0.1153,
371
+ "step": 11506
372
+ },
373
+ {
374
+ "epoch": 22.0,
375
+ "eval_accuracy": 0.5800203873598369,
376
+ "eval_f1": 0.7025121198765976,
377
+ "eval_loss": 0.4538831412792206,
378
+ "eval_roc_auc": 0.8010173050882469,
379
+ "eval_runtime": 8.6472,
380
+ "eval_samples_per_second": 113.447,
381
+ "eval_steps_per_second": 7.633,
382
+ "step": 11528
383
+ },
384
+ {
385
+ "epoch": 22.96,
386
+ "learning_rate": 3.85219465648855e-06,
387
+ "loss": 0.1037,
388
+ "step": 12029
389
+ },
390
+ {
391
+ "epoch": 23.0,
392
+ "eval_accuracy": 0.5565749235474006,
393
+ "eval_f1": 0.7012430347192455,
394
+ "eval_loss": 0.4749497175216675,
395
+ "eval_roc_auc": 0.804281517523123,
396
+ "eval_runtime": 8.6607,
397
+ "eval_samples_per_second": 113.27,
398
+ "eval_steps_per_second": 7.621,
399
+ "step": 12052
400
+ },
401
+ {
402
+ "epoch": 23.95,
403
+ "learning_rate": 3.802290076335878e-06,
404
+ "loss": 0.0968,
405
+ "step": 12552
406
+ },
407
+ {
408
+ "epoch": 24.0,
409
+ "eval_accuracy": 0.5606523955147809,
410
+ "eval_f1": 0.7047124945957631,
411
+ "eval_loss": 0.479137659072876,
412
+ "eval_roc_auc": 0.8052666911477818,
413
+ "eval_runtime": 8.7017,
414
+ "eval_samples_per_second": 112.737,
415
+ "eval_steps_per_second": 7.585,
416
+ "step": 12576
417
+ },
418
+ {
419
+ "epoch": 24.95,
420
+ "learning_rate": 3.752385496183206e-06,
421
+ "loss": 0.0923,
422
+ "step": 13075
423
+ },
424
+ {
425
+ "epoch": 25.0,
426
+ "eval_accuracy": 0.5555555555555556,
427
+ "eval_f1": 0.6873362445414847,
428
+ "eval_loss": 0.50052809715271,
429
+ "eval_roc_auc": 0.7925810745679466,
430
+ "eval_runtime": 8.625,
431
+ "eval_samples_per_second": 113.739,
432
+ "eval_steps_per_second": 7.652,
433
+ "step": 13100
434
+ },
435
+ {
436
+ "epoch": 25.95,
437
+ "learning_rate": 3.7024809160305346e-06,
438
+ "loss": 0.0846,
439
+ "step": 13598
440
+ },
441
+ {
442
+ "epoch": 26.0,
443
+ "eval_accuracy": 0.5524974515800204,
444
+ "eval_f1": 0.6971770744225834,
445
+ "eval_loss": 0.5201558470726013,
446
+ "eval_roc_auc": 0.801924445158477,
447
+ "eval_runtime": 8.6427,
448
+ "eval_samples_per_second": 113.506,
449
+ "eval_steps_per_second": 7.636,
450
+ "step": 13624
451
+ },
452
+ {
453
+ "epoch": 26.95,
454
+ "learning_rate": 3.6525763358778626e-06,
455
+ "loss": 0.0807,
456
+ "step": 14121
457
+ },
458
+ {
459
+ "epoch": 27.0,
460
+ "eval_accuracy": 0.5688073394495413,
461
+ "eval_f1": 0.7090358841331603,
462
+ "eval_loss": 0.52068030834198,
463
+ "eval_roc_auc": 0.8080810630924239,
464
+ "eval_runtime": 8.6305,
465
+ "eval_samples_per_second": 113.667,
466
+ "eval_steps_per_second": 7.647,
467
+ "step": 14148
468
+ },
469
+ {
470
+ "epoch": 27.95,
471
+ "learning_rate": 3.602671755725191e-06,
472
+ "loss": 0.0748,
473
+ "step": 14644
474
+ },
475
+ {
476
+ "epoch": 28.0,
477
+ "eval_accuracy": 0.5728848114169215,
478
+ "eval_f1": 0.7102321582115221,
479
+ "eval_loss": 0.5269947052001953,
480
+ "eval_roc_auc": 0.8097203415115557,
481
+ "eval_runtime": 8.6597,
482
+ "eval_samples_per_second": 113.284,
483
+ "eval_steps_per_second": 7.622,
484
+ "step": 14672
485
+ },
486
+ {
487
+ "epoch": 28.94,
488
+ "learning_rate": 3.5527671755725195e-06,
489
+ "loss": 0.0727,
490
+ "step": 15167
491
+ },
492
+ {
493
+ "epoch": 29.0,
494
+ "eval_accuracy": 0.528032619775739,
495
+ "eval_f1": 0.6827004219409282,
496
+ "eval_loss": 0.5663571953773499,
497
+ "eval_roc_auc": 0.7942691239585964,
498
+ "eval_runtime": 8.6961,
499
+ "eval_samples_per_second": 112.809,
500
+ "eval_steps_per_second": 7.59,
501
+ "step": 15196
502
+ },
503
+ {
504
+ "epoch": 29.94,
505
+ "learning_rate": 3.5028625954198474e-06,
506
+ "loss": 0.0685,
507
+ "step": 15690
508
+ },
509
+ {
510
+ "epoch": 30.0,
511
+ "eval_accuracy": 0.5586136595310908,
512
+ "eval_f1": 0.696027633851468,
513
+ "eval_loss": 0.5751686096191406,
514
+ "eval_roc_auc": 0.7997997521287095,
515
+ "eval_runtime": 8.6393,
516
+ "eval_samples_per_second": 113.551,
517
+ "eval_steps_per_second": 7.639,
518
+ "step": 15720
519
+ },
520
+ {
521
+ "epoch": 30.94,
522
+ "learning_rate": 3.452958015267176e-06,
523
+ "loss": 0.0629,
524
+ "step": 16213
525
+ },
526
+ {
527
+ "epoch": 31.0,
528
+ "eval_accuracy": 0.5596330275229358,
529
+ "eval_f1": 0.7002606429192008,
530
+ "eval_loss": 0.5864209532737732,
531
+ "eval_roc_auc": 0.8016714098827202,
532
+ "eval_runtime": 8.6544,
533
+ "eval_samples_per_second": 113.352,
534
+ "eval_steps_per_second": 7.626,
535
+ "step": 16244
536
+ },
537
+ {
538
+ "epoch": 31.94,
539
+ "learning_rate": 3.403053435114504e-06,
540
+ "loss": 0.0586,
541
+ "step": 16736
542
+ },
543
+ {
544
+ "epoch": 32.0,
545
+ "eval_accuracy": 0.5524974515800204,
546
+ "eval_f1": 0.7002992731936725,
547
+ "eval_loss": 0.5988386273384094,
548
+ "eval_roc_auc": 0.8040422528746184,
549
+ "eval_runtime": 8.6401,
550
+ "eval_samples_per_second": 113.54,
551
+ "eval_steps_per_second": 7.639,
552
+ "step": 16768
553
+ },
554
+ {
555
+ "epoch": 32.94,
556
+ "learning_rate": 3.3531488549618323e-06,
557
+ "loss": 0.0573,
558
+ "step": 17259
559
+ },
560
+ {
561
+ "epoch": 33.0,
562
+ "eval_accuracy": 0.5596330275229358,
563
+ "eval_f1": 0.7038148306900984,
564
+ "eval_loss": 0.5976923108100891,
565
+ "eval_roc_auc": 0.8059701406899085,
566
+ "eval_runtime": 10.028,
567
+ "eval_samples_per_second": 97.827,
568
+ "eval_steps_per_second": 6.582,
569
+ "step": 17292
570
+ },
571
+ {
572
+ "epoch": 33.94,
573
+ "learning_rate": 3.3032442748091603e-06,
574
+ "loss": 0.0549,
575
+ "step": 17782
576
+ },
577
+ {
578
+ "epoch": 34.0,
579
+ "eval_accuracy": 0.5759429153924567,
580
+ "eval_f1": 0.7030091583078936,
581
+ "eval_loss": 0.6293498873710632,
582
+ "eval_roc_auc": 0.8028746184388699,
583
+ "eval_runtime": 9.2788,
584
+ "eval_samples_per_second": 105.725,
585
+ "eval_steps_per_second": 7.113,
586
+ "step": 17816
587
+ },
588
+ {
589
+ "epoch": 34.93,
590
+ "learning_rate": 3.2533396946564887e-06,
591
+ "loss": 0.0503,
592
+ "step": 18305
593
+ },
594
+ {
595
+ "epoch": 35.0,
596
+ "eval_accuracy": 0.5667686034658511,
597
+ "eval_f1": 0.7081545064377682,
598
+ "eval_loss": 0.630769670009613,
599
+ "eval_roc_auc": 0.8086227077643385,
600
+ "eval_runtime": 9.6294,
601
+ "eval_samples_per_second": 101.876,
602
+ "eval_steps_per_second": 6.854,
603
+ "step": 18340
604
+ },
605
+ {
606
+ "epoch": 35.93,
607
+ "learning_rate": 3.203435114503817e-06,
608
+ "loss": 0.0477,
609
+ "step": 18828
610
+ },
611
+ {
612
+ "epoch": 36.0,
613
+ "eval_accuracy": 0.527013251783894,
614
+ "eval_f1": 0.6998341625207297,
615
+ "eval_loss": 0.6515944004058838,
616
+ "eval_roc_auc": 0.8083547543090588,
617
+ "eval_runtime": 10.0399,
618
+ "eval_samples_per_second": 97.71,
619
+ "eval_steps_per_second": 6.574,
620
+ "step": 18864
621
+ },
622
+ {
623
+ "epoch": 36.93,
624
+ "learning_rate": 3.153530534351145e-06,
625
+ "loss": 0.0464,
626
+ "step": 19351
627
+ },
628
+ {
629
+ "epoch": 37.0,
630
+ "eval_accuracy": 0.5484199796126402,
631
+ "eval_f1": 0.7017984107068171,
632
+ "eval_loss": 0.6635262370109558,
633
+ "eval_roc_auc": 0.8083478689954327,
634
+ "eval_runtime": 9.7697,
635
+ "eval_samples_per_second": 100.412,
636
+ "eval_steps_per_second": 6.756,
637
+ "step": 19388
638
+ },
639
+ {
640
+ "epoch": 37.93,
641
+ "learning_rate": 3.1036259541984735e-06,
642
+ "loss": 0.0436,
643
+ "step": 19874
644
+ },
645
+ {
646
+ "epoch": 38.0,
647
+ "eval_accuracy": 0.563710499490316,
648
+ "eval_f1": 0.7022834984920293,
649
+ "eval_loss": 0.6706992387771606,
650
+ "eval_roc_auc": 0.8041971724312043,
651
+ "eval_runtime": 9.7315,
652
+ "eval_samples_per_second": 100.807,
653
+ "eval_steps_per_second": 6.782,
654
+ "step": 19912
655
+ },
656
+ {
657
+ "epoch": 38.93,
658
+ "learning_rate": 3.0537213740458015e-06,
659
+ "loss": 0.0394,
660
+ "step": 20397
661
+ },
662
+ {
663
+ "epoch": 39.0,
664
+ "eval_accuracy": 0.527013251783894,
665
+ "eval_f1": 0.6885798567214496,
666
+ "eval_loss": 0.7015945911407471,
667
+ "eval_roc_auc": 0.7983710495513071,
668
+ "eval_runtime": 9.8867,
669
+ "eval_samples_per_second": 99.224,
670
+ "eval_steps_per_second": 6.676,
671
+ "step": 20436
672
+ },
673
+ {
674
+ "epoch": 39.92,
675
+ "learning_rate": 3.00381679389313e-06,
676
+ "loss": 0.038,
677
+ "step": 20920
678
+ },
679
+ {
680
+ "epoch": 40.0,
681
+ "eval_accuracy": 0.5372069317023446,
682
+ "eval_f1": 0.6997894736842106,
683
+ "eval_loss": 0.7073464393615723,
684
+ "eval_roc_auc": 0.8059839113171605,
685
+ "eval_runtime": 9.9063,
686
+ "eval_samples_per_second": 99.028,
687
+ "eval_steps_per_second": 6.662,
688
+ "step": 20960
689
+ },
690
+ {
691
+ "epoch": 40.92,
692
+ "learning_rate": 2.953912213740458e-06,
693
+ "loss": 0.036,
694
+ "step": 21443
695
+ },
696
+ {
697
+ "epoch": 41.0,
698
+ "eval_accuracy": 0.5474006116207951,
699
+ "eval_f1": 0.6956521739130435,
700
+ "eval_loss": 0.7225540280342102,
701
+ "eval_roc_auc": 0.7999896720295611,
702
+ "eval_runtime": 9.951,
703
+ "eval_samples_per_second": 98.583,
704
+ "eval_steps_per_second": 6.632,
705
+ "step": 21484
706
+ },
707
+ {
708
+ "epoch": 41.92,
709
+ "learning_rate": 2.9040076335877863e-06,
710
+ "loss": 0.0341,
711
+ "step": 21966
712
+ },
713
+ {
714
+ "epoch": 42.0,
715
+ "eval_accuracy": 0.564729867482161,
716
+ "eval_f1": 0.7089262613195343,
717
+ "eval_loss": 0.7202900052070618,
718
+ "eval_roc_auc": 0.8084046728328477,
719
+ "eval_runtime": 9.6425,
720
+ "eval_samples_per_second": 101.737,
721
+ "eval_steps_per_second": 6.845,
722
+ "step": 22008
723
+ },
724
+ {
725
+ "epoch": 42.92,
726
+ "learning_rate": 2.8541030534351148e-06,
727
+ "loss": 0.0323,
728
+ "step": 22489
729
+ },
730
+ {
731
+ "epoch": 43.0,
732
+ "eval_accuracy": 0.5606523955147809,
733
+ "eval_f1": 0.7040552200172563,
734
+ "eval_loss": 0.7239031791687012,
735
+ "eval_roc_auc": 0.8051611163388492,
736
+ "eval_runtime": 9.9111,
737
+ "eval_samples_per_second": 98.98,
738
+ "eval_steps_per_second": 6.659,
739
+ "step": 22532
740
+ },
741
+ {
742
+ "epoch": 43.92,
743
+ "learning_rate": 2.8041984732824428e-06,
744
+ "loss": 0.0302,
745
+ "step": 23012
746
+ },
747
+ {
748
+ "epoch": 44.0,
749
+ "eval_accuracy": 0.5535168195718655,
750
+ "eval_f1": 0.6988466467321658,
751
+ "eval_loss": 0.7510971426963806,
752
+ "eval_roc_auc": 0.8032119988065456,
753
+ "eval_runtime": 9.9666,
754
+ "eval_samples_per_second": 98.429,
755
+ "eval_steps_per_second": 6.622,
756
+ "step": 23056
757
+ },
758
+ {
759
+ "epoch": 44.91,
760
+ "learning_rate": 2.754293893129771e-06,
761
+ "loss": 0.0286,
762
+ "step": 23535
763
+ },
764
+ {
765
+ "epoch": 45.0,
766
+ "eval_accuracy": 0.5524974515800204,
767
+ "eval_f1": 0.7002132196162046,
768
+ "eval_loss": 0.7605226635932922,
769
+ "eval_roc_auc": 0.8043658626150422,
770
+ "eval_runtime": 9.9921,
771
+ "eval_samples_per_second": 98.178,
772
+ "eval_steps_per_second": 6.605,
773
+ "step": 23580
774
+ },
775
+ {
776
+ "epoch": 45.91,
777
+ "learning_rate": 2.704389312977099e-06,
778
+ "loss": 0.0275,
779
+ "step": 24058
780
+ },
781
+ {
782
+ "epoch": 46.0,
783
+ "eval_accuracy": 0.5484199796126402,
784
+ "eval_f1": 0.6999573196756296,
785
+ "eval_loss": 0.774695634841919,
786
+ "eval_roc_auc": 0.8040703679052581,
787
+ "eval_runtime": 9.9221,
788
+ "eval_samples_per_second": 98.87,
789
+ "eval_steps_per_second": 6.652,
790
+ "step": 24104
791
+ },
792
+ {
793
+ "epoch": 46.91,
794
+ "learning_rate": 2.6544847328244276e-06,
795
+ "loss": 0.026,
796
+ "step": 24581
797
+ },
798
+ {
799
+ "epoch": 47.0,
800
+ "eval_accuracy": 0.5372069317023446,
801
+ "eval_f1": 0.6884827879303017,
802
+ "eval_loss": 0.7950236797332764,
803
+ "eval_roc_auc": 0.7971047256202519,
804
+ "eval_runtime": 10.0992,
805
+ "eval_samples_per_second": 97.137,
806
+ "eval_steps_per_second": 6.535,
807
+ "step": 24628
808
+ },
809
+ {
810
+ "epoch": 47.91,
811
+ "learning_rate": 2.6045801526717556e-06,
812
+ "loss": 0.0247,
813
+ "step": 25104
814
+ },
815
+ {
816
+ "epoch": 48.0,
817
+ "eval_accuracy": 0.5474006116207951,
818
+ "eval_f1": 0.6989293361884368,
819
+ "eval_loss": 0.8052034974098206,
820
+ "eval_roc_auc": 0.802888389066122,
821
+ "eval_runtime": 10.0437,
822
+ "eval_samples_per_second": 97.673,
823
+ "eval_steps_per_second": 6.571,
824
+ "step": 25152
825
+ },
826
+ {
827
+ "epoch": 48.91,
828
+ "learning_rate": 2.554675572519084e-06,
829
+ "loss": 0.0223,
830
+ "step": 25627
831
+ },
832
+ {
833
+ "epoch": 49.0,
834
+ "eval_accuracy": 0.5524974515800204,
835
+ "eval_f1": 0.7021276595744681,
836
+ "eval_loss": 0.8275096416473389,
837
+ "eval_roc_auc": 0.8059489109728948,
838
+ "eval_runtime": 9.9609,
839
+ "eval_samples_per_second": 98.485,
840
+ "eval_steps_per_second": 6.626,
841
+ "step": 25676
842
+ },
843
+ {
844
+ "epoch": 49.9,
845
+ "learning_rate": 2.5047709923664124e-06,
846
+ "loss": 0.0239,
847
+ "step": 26150
848
+ },
849
+ {
850
+ "epoch": 50.0,
851
+ "eval_accuracy": 0.5382262996941896,
852
+ "eval_f1": 0.6999168744804655,
853
+ "eval_loss": 0.8239336609840393,
854
+ "eval_roc_auc": 0.8080311445686351,
855
+ "eval_runtime": 9.9615,
856
+ "eval_samples_per_second": 98.479,
857
+ "eval_steps_per_second": 6.626,
858
+ "step": 26200
859
+ },
860
+ {
861
+ "epoch": 50.9,
862
+ "learning_rate": 2.4548664122137404e-06,
863
+ "loss": 0.023,
864
+ "step": 26673
865
+ },
866
+ {
867
+ "epoch": 51.0,
868
+ "eval_accuracy": 0.5484199796126402,
869
+ "eval_f1": 0.7054823629409264,
870
+ "eval_loss": 0.8209096193313599,
871
+ "eval_roc_auc": 0.8083622133988203,
872
+ "eval_runtime": 10.0023,
873
+ "eval_samples_per_second": 98.078,
874
+ "eval_steps_per_second": 6.598,
875
+ "step": 26724
876
+ },
877
+ {
878
+ "epoch": 51.9,
879
+ "learning_rate": 2.404961832061069e-06,
880
+ "loss": 0.0199,
881
+ "step": 27196
882
+ },
883
+ {
884
+ "epoch": 52.0,
885
+ "eval_accuracy": 0.5484199796126402,
886
+ "eval_f1": 0.7072438919845693,
887
+ "eval_loss": 0.828449010848999,
888
+ "eval_roc_auc": 0.8082216382456221,
889
+ "eval_runtime": 10.055,
890
+ "eval_samples_per_second": 97.563,
891
+ "eval_steps_per_second": 6.564,
892
+ "step": 27248
893
+ },
894
+ {
895
+ "epoch": 52.9,
896
+ "learning_rate": 2.3550572519083973e-06,
897
+ "loss": 0.0197,
898
+ "step": 27719
899
+ },
900
+ {
901
+ "epoch": 53.0,
902
+ "eval_accuracy": 0.563710499490316,
903
+ "eval_f1": 0.7059333044608056,
904
+ "eval_loss": 0.8516786098480225,
905
+ "eval_roc_auc": 0.8058014505060705,
906
+ "eval_runtime": 10.0044,
907
+ "eval_samples_per_second": 98.057,
908
+ "eval_steps_per_second": 6.597,
909
+ "step": 27772
910
+ },
911
+ {
912
+ "epoch": 53.9,
913
+ "learning_rate": 2.3051526717557252e-06,
914
+ "loss": 0.0168,
915
+ "step": 28242
916
+ },
917
+ {
918
+ "epoch": 54.0,
919
+ "eval_accuracy": 0.5351681957186545,
920
+ "eval_f1": 0.699581589958159,
921
+ "eval_loss": 0.8833754658699036,
922
+ "eval_roc_auc": 0.8067929356682197,
923
+ "eval_runtime": 10.0433,
924
+ "eval_samples_per_second": 97.678,
925
+ "eval_steps_per_second": 6.572,
926
+ "step": 28296
927
+ },
928
+ {
929
+ "epoch": 54.9,
930
+ "learning_rate": 2.2552480916030537e-06,
931
+ "loss": 0.018,
932
+ "step": 28765
933
+ },
934
+ {
935
+ "epoch": 55.0,
936
+ "eval_accuracy": 0.5524974515800204,
937
+ "eval_f1": 0.7060839760068551,
938
+ "eval_loss": 0.8679118156433105,
939
+ "eval_roc_auc": 0.8075250740171215,
940
+ "eval_runtime": 9.9308,
941
+ "eval_samples_per_second": 98.784,
942
+ "eval_steps_per_second": 6.646,
943
+ "step": 28820
944
+ },
945
+ {
946
+ "epoch": 55.89,
947
+ "learning_rate": 2.2053435114503817e-06,
948
+ "loss": 0.0169,
949
+ "step": 29288
950
+ },
951
+ {
952
+ "epoch": 56.0,
953
+ "eval_accuracy": 0.5575942915392457,
954
+ "eval_f1": 0.7040417209908736,
955
+ "eval_loss": 0.8795809149742126,
956
+ "eval_roc_auc": 0.8040565972780059,
957
+ "eval_runtime": 9.9865,
958
+ "eval_samples_per_second": 98.232,
959
+ "eval_steps_per_second": 6.609,
960
+ "step": 29344
961
+ },
962
+ {
963
+ "epoch": 56.89,
964
+ "learning_rate": 2.15543893129771e-06,
965
+ "loss": 0.0168,
966
+ "step": 29811
967
+ },
968
+ {
969
+ "epoch": 57.0,
970
+ "eval_accuracy": 0.5433231396534148,
971
+ "eval_f1": 0.6988879384088965,
972
+ "eval_loss": 0.9083885550498962,
973
+ "eval_roc_auc": 0.8030501939363338,
974
+ "eval_runtime": 10.0982,
975
+ "eval_samples_per_second": 97.146,
976
+ "eval_steps_per_second": 6.536,
977
+ "step": 29868
978
+ },
979
+ {
980
+ "epoch": 57.89,
981
+ "learning_rate": 2.105534351145038e-06,
982
+ "loss": 0.0156,
983
+ "step": 30334
984
+ },
985
+ {
986
+ "epoch": 58.0,
987
+ "eval_accuracy": 0.5606523955147809,
988
+ "eval_f1": 0.710651142733937,
989
+ "eval_loss": 0.9000456929206848,
990
+ "eval_roc_auc": 0.8095304216107044,
991
+ "eval_runtime": 10.0091,
992
+ "eval_samples_per_second": 98.011,
993
+ "eval_steps_per_second": 6.594,
994
+ "step": 30392
995
+ },
996
+ {
997
+ "epoch": 58.89,
998
+ "learning_rate": 2.055629770992367e-06,
999
+ "loss": 0.0138,
1000
+ "step": 30857
1001
+ },
1002
+ {
1003
+ "epoch": 59.0,
1004
+ "eval_accuracy": 0.5382262996941896,
1005
+ "eval_f1": 0.6970849176172369,
1006
+ "eval_loss": 0.9262450337409973,
1007
+ "eval_roc_auc": 0.8036761837001675,
1008
+ "eval_runtime": 9.8719,
1009
+ "eval_samples_per_second": 99.373,
1010
+ "eval_steps_per_second": 6.686,
1011
+ "step": 30916
1012
+ },
1013
+ {
1014
+ "epoch": 59.89,
1015
+ "learning_rate": 2.005725190839695e-06,
1016
+ "loss": 0.0139,
1017
+ "step": 31380
1018
+ },
1019
+ {
1020
+ "epoch": 60.0,
1021
+ "eval_accuracy": 0.5596330275229358,
1022
+ "eval_f1": 0.7065868263473054,
1023
+ "eval_loss": 0.923125684261322,
1024
+ "eval_roc_auc": 0.8081160634366895,
1025
+ "eval_runtime": 10.0204,
1026
+ "eval_samples_per_second": 97.901,
1027
+ "eval_steps_per_second": 6.587,
1028
+ "step": 31440
1029
+ },
1030
+ {
1031
+ "epoch": 60.88,
1032
+ "learning_rate": 1.955820610687023e-06,
1033
+ "loss": 0.0155,
1034
+ "step": 31903
1035
+ },
1036
+ {
1037
+ "epoch": 61.0,
1038
+ "eval_accuracy": 0.5596330275229358,
1039
+ "eval_f1": 0.7081545064377682,
1040
+ "eval_loss": 0.9300869107246399,
1041
+ "eval_roc_auc": 0.8086227077643385,
1042
+ "eval_runtime": 9.9625,
1043
+ "eval_samples_per_second": 98.47,
1044
+ "eval_steps_per_second": 6.625,
1045
+ "step": 31964
1046
+ },
1047
+ {
1048
+ "epoch": 61.88,
1049
+ "learning_rate": 1.9059160305343513e-06,
1050
+ "loss": 0.0149,
1051
+ "step": 32426
1052
+ },
1053
+ {
1054
+ "epoch": 62.0,
1055
+ "eval_accuracy": 0.5504587155963303,
1056
+ "eval_f1": 0.7104930467762326,
1057
+ "eval_loss": 0.9461256861686707,
1058
+ "eval_roc_auc": 0.8130057836634459,
1059
+ "eval_runtime": 10.0052,
1060
+ "eval_samples_per_second": 98.049,
1061
+ "eval_steps_per_second": 6.597,
1062
+ "step": 32488
1063
+ },
1064
+ {
1065
+ "epoch": 62.88,
1066
+ "learning_rate": 1.8560114503816795e-06,
1067
+ "loss": 0.0124,
1068
+ "step": 32949
1069
+ },
1070
+ {
1071
+ "epoch": 63.0,
1072
+ "eval_accuracy": 0.5565749235474006,
1073
+ "eval_f1": 0.7081380485726461,
1074
+ "eval_loss": 0.9584424495697021,
1075
+ "eval_roc_auc": 0.809727226825182,
1076
+ "eval_runtime": 9.989,
1077
+ "eval_samples_per_second": 98.208,
1078
+ "eval_steps_per_second": 6.607,
1079
+ "step": 33012
1080
+ },
1081
+ {
1082
+ "epoch": 63.88,
1083
+ "learning_rate": 1.8061068702290077e-06,
1084
+ "loss": 0.011,
1085
+ "step": 33472
1086
+ },
1087
+ {
1088
+ "epoch": 64.0,
1089
+ "eval_accuracy": 0.5545361875637105,
1090
+ "eval_f1": 0.7051226861816616,
1091
+ "eval_loss": 0.9570773243904114,
1092
+ "eval_roc_auc": 0.8061812903077735,
1093
+ "eval_runtime": 10.0472,
1094
+ "eval_samples_per_second": 97.639,
1095
+ "eval_steps_per_second": 6.569,
1096
+ "step": 33536
1097
+ },
1098
+ {
1099
+ "epoch": 64.88,
1100
+ "learning_rate": 1.756202290076336e-06,
1101
+ "loss": 0.0114,
1102
+ "step": 33995
1103
+ },
1104
+ {
1105
+ "epoch": 65.0,
1106
+ "eval_accuracy": 0.5565749235474006,
1107
+ "eval_f1": 0.7043701799485862,
1108
+ "eval_loss": 0.9560405015945435,
1109
+ "eval_roc_auc": 0.8063993252392647,
1110
+ "eval_runtime": 10.0628,
1111
+ "eval_samples_per_second": 97.488,
1112
+ "eval_steps_per_second": 6.559,
1113
+ "step": 34060
1114
+ },
1115
+ {
1116
+ "epoch": 65.87,
1117
+ "learning_rate": 1.7062977099236644e-06,
1118
+ "loss": 0.011,
1119
+ "step": 34518
1120
+ },
1121
+ {
1122
+ "epoch": 66.0,
1123
+ "eval_accuracy": 0.5504587155963303,
1124
+ "eval_f1": 0.7023809523809524,
1125
+ "eval_loss": 0.9797949194908142,
1126
+ "eval_roc_auc": 0.8062444056826787,
1127
+ "eval_runtime": 10.0681,
1128
+ "eval_samples_per_second": 97.436,
1129
+ "eval_steps_per_second": 6.555,
1130
+ "step": 34584
1131
+ },
1132
+ {
1133
+ "epoch": 66.87,
1134
+ "learning_rate": 1.6563931297709926e-06,
1135
+ "loss": 0.0107,
1136
+ "step": 35041
1137
+ },
1138
+ {
1139
+ "epoch": 67.0,
1140
+ "eval_accuracy": 0.545361875637105,
1141
+ "eval_f1": 0.6986301369863014,
1142
+ "eval_loss": 0.9825329780578613,
1143
+ "eval_roc_auc": 0.8027546992265496,
1144
+ "eval_runtime": 10.0516,
1145
+ "eval_samples_per_second": 97.596,
1146
+ "eval_steps_per_second": 6.566,
1147
+ "step": 35108
1148
+ },
1149
+ {
1150
+ "epoch": 67.87,
1151
+ "learning_rate": 1.6064885496183208e-06,
1152
+ "loss": 0.0091,
1153
+ "step": 35564
1154
+ },
1155
+ {
1156
+ "epoch": 68.0,
1157
+ "eval_accuracy": 0.5606523955147809,
1158
+ "eval_f1": 0.7040552200172563,
1159
+ "eval_loss": 0.9886102676391602,
1160
+ "eval_roc_auc": 0.8051611163388492,
1161
+ "eval_runtime": 10.0044,
1162
+ "eval_samples_per_second": 98.056,
1163
+ "eval_steps_per_second": 6.597,
1164
+ "step": 35632
1165
+ },
1166
+ {
1167
+ "epoch": 68.87,
1168
+ "learning_rate": 1.556583969465649e-06,
1169
+ "loss": 0.0095,
1170
+ "step": 36087
1171
+ },
1172
+ {
1173
+ "epoch": 69.0,
1174
+ "eval_accuracy": 0.5392456676860347,
1175
+ "eval_f1": 0.7075,
1176
+ "eval_loss": 1.007102131843567,
1177
+ "eval_roc_auc": 0.8127734043285672,
1178
+ "eval_runtime": 10.0072,
1179
+ "eval_samples_per_second": 98.029,
1180
+ "eval_steps_per_second": 6.595,
1181
+ "step": 36156
1182
+ },
1183
+ {
1184
+ "epoch": 69.87,
1185
+ "learning_rate": 1.5066793893129772e-06,
1186
+ "loss": 0.0088,
1187
+ "step": 36610
1188
+ },
1189
+ {
1190
+ "epoch": 70.0,
1191
+ "eval_accuracy": 0.5443425076452599,
1192
+ "eval_f1": 0.7051336444633007,
1193
+ "eval_loss": 1.004128098487854,
1194
+ "eval_roc_auc": 0.8083903284294599,
1195
+ "eval_runtime": 10.0021,
1196
+ "eval_samples_per_second": 98.079,
1197
+ "eval_steps_per_second": 6.599,
1198
+ "step": 36680
1199
+ },
1200
+ {
1201
+ "epoch": 70.86,
1202
+ "learning_rate": 1.4567748091603054e-06,
1203
+ "loss": 0.0102,
1204
+ "step": 37133
1205
+ },
1206
+ {
1207
+ "epoch": 71.0,
1208
+ "eval_accuracy": 0.5474006116207951,
1209
+ "eval_f1": 0.7023354564755838,
1210
+ "eval_loss": 1.023705244064331,
1211
+ "eval_roc_auc": 0.8064062105528907,
1212
+ "eval_runtime": 10.0301,
1213
+ "eval_samples_per_second": 97.806,
1214
+ "eval_steps_per_second": 6.58,
1215
+ "step": 37204
1216
+ },
1217
+ {
1218
+ "epoch": 71.86,
1219
+ "learning_rate": 1.4068702290076336e-06,
1220
+ "loss": 0.0086,
1221
+ "step": 37656
1222
+ },
1223
+ {
1224
+ "epoch": 72.0,
1225
+ "eval_accuracy": 0.5606523955147809,
1226
+ "eval_f1": 0.7097887020267357,
1227
+ "eval_loss": 1.0078336000442505,
1228
+ "eval_roc_auc": 0.808967547221776,
1229
+ "eval_runtime": 10.0138,
1230
+ "eval_samples_per_second": 97.964,
1231
+ "eval_steps_per_second": 6.591,
1232
+ "step": 37728
1233
+ },
1234
+ {
1235
+ "epoch": 72.86,
1236
+ "learning_rate": 1.356965648854962e-06,
1237
+ "loss": 0.0084,
1238
+ "step": 38179
1239
+ },
1240
+ {
1241
+ "epoch": 73.0,
1242
+ "eval_accuracy": 0.5504587155963303,
1243
+ "eval_f1": 0.7039249146757679,
1244
+ "eval_loss": 1.0251305103302002,
1245
+ "eval_roc_auc": 0.8067510500103279,
1246
+ "eval_runtime": 9.9979,
1247
+ "eval_samples_per_second": 98.12,
1248
+ "eval_steps_per_second": 6.601,
1249
+ "step": 38252
1250
+ },
1251
+ {
1252
+ "epoch": 73.86,
1253
+ "learning_rate": 1.3070610687022902e-06,
1254
+ "loss": 0.0084,
1255
+ "step": 38702
1256
+ },
1257
+ {
1258
+ "epoch": 74.0,
1259
+ "eval_accuracy": 0.5565749235474006,
1260
+ "eval_f1": 0.7060839760068551,
1261
+ "eval_loss": 1.0233700275421143,
1262
+ "eval_roc_auc": 0.8075250740171215,
1263
+ "eval_runtime": 10.0267,
1264
+ "eval_samples_per_second": 97.839,
1265
+ "eval_steps_per_second": 6.582,
1266
+ "step": 38776
1267
+ },
1268
+ {
1269
+ "epoch": 74.86,
1270
+ "learning_rate": 1.2571564885496184e-06,
1271
+ "loss": 0.0076,
1272
+ "step": 39225
1273
+ },
1274
+ {
1275
+ "epoch": 75.0,
1276
+ "eval_accuracy": 0.5433231396534148,
1277
+ "eval_f1": 0.7029787234042555,
1278
+ "eval_loss": 1.0505975484848022,
1279
+ "eval_roc_auc": 0.8065117853618233,
1280
+ "eval_runtime": 9.9951,
1281
+ "eval_samples_per_second": 98.148,
1282
+ "eval_steps_per_second": 6.603,
1283
+ "step": 39300
1284
+ },
1285
+ {
1286
+ "epoch": 75.85,
1287
+ "learning_rate": 1.2072519083969466e-06,
1288
+ "loss": 0.0089,
1289
+ "step": 39748
1290
+ },
1291
+ {
1292
+ "epoch": 76.0,
1293
+ "eval_accuracy": 0.5575942915392457,
1294
+ "eval_f1": 0.7084398976982098,
1295
+ "eval_loss": 1.0305790901184082,
1296
+ "eval_roc_auc": 0.8098609166647541,
1297
+ "eval_runtime": 10.0015,
1298
+ "eval_samples_per_second": 98.086,
1299
+ "eval_steps_per_second": 6.599,
1300
+ "step": 39824
1301
+ },
1302
+ {
1303
+ "epoch": 76.85,
1304
+ "learning_rate": 1.1573473282442748e-06,
1305
+ "loss": 0.0074,
1306
+ "step": 40271
1307
+ },
1308
+ {
1309
+ "epoch": 77.0,
1310
+ "eval_accuracy": 0.5514780835881753,
1311
+ "eval_f1": 0.7120954003407156,
1312
+ "eval_loss": 1.037413477897644,
1313
+ "eval_roc_auc": 0.8124079089302518,
1314
+ "eval_runtime": 10.0466,
1315
+ "eval_samples_per_second": 97.645,
1316
+ "eval_steps_per_second": 6.569,
1317
+ "step": 40348
1318
+ },
1319
+ {
1320
+ "epoch": 77.85,
1321
+ "learning_rate": 1.1074427480916033e-06,
1322
+ "loss": 0.0064,
1323
+ "step": 40794
1324
+ },
1325
+ {
1326
+ "epoch": 78.0,
1327
+ "eval_accuracy": 0.563710499490316,
1328
+ "eval_f1": 0.7064418504107222,
1329
+ "eval_loss": 1.0435516834259033,
1330
+ "eval_roc_auc": 0.8063924399256387,
1331
+ "eval_runtime": 9.9844,
1332
+ "eval_samples_per_second": 98.254,
1333
+ "eval_steps_per_second": 6.61,
1334
+ "step": 40872
1335
+ },
1336
+ {
1337
+ "epoch": 78.85,
1338
+ "learning_rate": 1.0575381679389315e-06,
1339
+ "loss": 0.0079,
1340
+ "step": 41317
1341
+ },
1342
+ {
1343
+ "epoch": 79.0,
1344
+ "eval_accuracy": 0.5535168195718655,
1345
+ "eval_f1": 0.7080479452054794,
1346
+ "eval_loss": 1.0525715351104736,
1347
+ "eval_roc_auc": 0.8089463175047622,
1348
+ "eval_runtime": 10.0182,
1349
+ "eval_samples_per_second": 97.922,
1350
+ "eval_steps_per_second": 6.588,
1351
+ "step": 41396
1352
+ },
1353
+ {
1354
+ "epoch": 79.85,
1355
+ "learning_rate": 1.0076335877862597e-06,
1356
+ "loss": 0.0059,
1357
+ "step": 41840
1358
+ },
1359
+ {
1360
+ "epoch": 80.0,
1361
+ "eval_accuracy": 0.545361875637105,
1362
+ "eval_f1": 0.7050359712230215,
1363
+ "eval_loss": 1.0556447505950928,
1364
+ "eval_roc_auc": 0.8087139381698836,
1365
+ "eval_runtime": 9.9919,
1366
+ "eval_samples_per_second": 98.18,
1367
+ "eval_steps_per_second": 6.605,
1368
+ "step": 41920
1369
+ },
1370
+ {
1371
+ "epoch": 80.85,
1372
+ "learning_rate": 9.577290076335879e-07,
1373
+ "loss": 0.0063,
1374
+ "step": 42363
1375
+ },
1376
+ {
1377
+ "epoch": 81.0,
1378
+ "eval_accuracy": 0.5412844036697247,
1379
+ "eval_f1": 0.7034834324553951,
1380
+ "eval_loss": 1.0627212524414062,
1381
+ "eval_roc_auc": 0.8071027747813913,
1382
+ "eval_runtime": 10.1039,
1383
+ "eval_samples_per_second": 97.091,
1384
+ "eval_steps_per_second": 6.532,
1385
+ "step": 42444
1386
+ },
1387
+ {
1388
+ "epoch": 81.84,
1389
+ "learning_rate": 9.078244274809162e-07,
1390
+ "loss": 0.0064,
1391
+ "step": 42886
1392
+ },
1393
+ {
1394
+ "epoch": 82.0,
1395
+ "eval_accuracy": 0.5382262996941896,
1396
+ "eval_f1": 0.7048903878583473,
1397
+ "eval_loss": 1.0669602155685425,
1398
+ "eval_roc_auc": 0.8091993527805191,
1399
+ "eval_runtime": 10.2231,
1400
+ "eval_samples_per_second": 95.96,
1401
+ "eval_steps_per_second": 6.456,
1402
+ "step": 42968
1403
+ },
1404
+ {
1405
+ "epoch": 82.84,
1406
+ "learning_rate": 8.579198473282444e-07,
1407
+ "loss": 0.0064,
1408
+ "step": 43409
1409
+ },
1410
+ {
1411
+ "epoch": 83.0,
1412
+ "eval_accuracy": 0.5575942915392457,
1413
+ "eval_f1": 0.70824434002563,
1414
+ "eval_loss": 1.0584640502929688,
1415
+ "eval_roc_auc": 0.8094036170847583,
1416
+ "eval_runtime": 10.0727,
1417
+ "eval_samples_per_second": 97.392,
1418
+ "eval_steps_per_second": 6.552,
1419
+ "step": 43492
1420
+ },
1421
+ {
1422
+ "epoch": 83.84,
1423
+ "learning_rate": 8.080152671755725e-07,
1424
+ "loss": 0.006,
1425
+ "step": 43932
1426
+ },
1427
+ {
1428
+ "epoch": 84.0,
1429
+ "eval_accuracy": 0.5524974515800204,
1430
+ "eval_f1": 0.7100340136054422,
1431
+ "eval_loss": 1.0683461427688599,
1432
+ "eval_roc_auc": 0.8113102751830346,
1433
+ "eval_runtime": 10.0298,
1434
+ "eval_samples_per_second": 97.809,
1435
+ "eval_steps_per_second": 6.58,
1436
+ "step": 44016
1437
+ },
1438
+ {
1439
+ "epoch": 84.84,
1440
+ "learning_rate": 7.581106870229009e-07,
1441
+ "loss": 0.0056,
1442
+ "step": 44455
1443
+ },
1444
+ {
1445
+ "epoch": 85.0,
1446
+ "eval_accuracy": 0.5484199796126402,
1447
+ "eval_f1": 0.7041294167730949,
1448
+ "eval_loss": 1.0729014873504639,
1449
+ "eval_roc_auc": 0.8072083495903238,
1450
+ "eval_runtime": 10.0817,
1451
+ "eval_samples_per_second": 97.305,
1452
+ "eval_steps_per_second": 6.547,
1453
+ "step": 44540
1454
+ },
1455
+ {
1456
+ "epoch": 85.84,
1457
+ "learning_rate": 7.082061068702291e-07,
1458
+ "loss": 0.0063,
1459
+ "step": 44978
1460
+ },
1461
+ {
1462
+ "epoch": 86.0,
1463
+ "eval_accuracy": 0.564729867482161,
1464
+ "eval_f1": 0.7094274644855791,
1465
+ "eval_loss": 1.0694997310638428,
1466
+ "eval_roc_auc": 0.8089956622524155,
1467
+ "eval_runtime": 10.0647,
1468
+ "eval_samples_per_second": 97.469,
1469
+ "eval_steps_per_second": 6.558,
1470
+ "step": 45064
1471
+ },
1472
+ {
1473
+ "epoch": 86.83,
1474
+ "learning_rate": 6.583015267175573e-07,
1475
+ "loss": 0.0051,
1476
+ "step": 45501
1477
+ },
1478
+ {
1479
+ "epoch": 87.0,
1480
+ "eval_accuracy": 0.5555555555555556,
1481
+ "eval_f1": 0.7070967741935484,
1482
+ "eval_loss": 1.0698884725570679,
1483
+ "eval_roc_auc": 0.8076025337954145,
1484
+ "eval_runtime": 10.0544,
1485
+ "eval_samples_per_second": 97.569,
1486
+ "eval_steps_per_second": 6.564,
1487
+ "step": 45588
1488
+ },
1489
+ {
1490
+ "epoch": 87.83,
1491
+ "learning_rate": 6.083969465648855e-07,
1492
+ "loss": 0.0059,
1493
+ "step": 46024
1494
+ },
1495
+ {
1496
+ "epoch": 88.0,
1497
+ "eval_accuracy": 0.5494393476044852,
1498
+ "eval_f1": 0.7032119914346895,
1499
+ "eval_loss": 1.073889970779419,
1500
+ "eval_roc_auc": 0.8057027610107641,
1501
+ "eval_runtime": 10.0443,
1502
+ "eval_samples_per_second": 97.668,
1503
+ "eval_steps_per_second": 6.571,
1504
+ "step": 46112
1505
+ },
1506
+ {
1507
+ "epoch": 88.83,
1508
+ "learning_rate": 5.584923664122137e-07,
1509
+ "loss": 0.0067,
1510
+ "step": 46547
1511
+ },
1512
+ {
1513
+ "epoch": 89.0,
1514
+ "eval_accuracy": 0.5535168195718655,
1515
+ "eval_f1": 0.7074422583404619,
1516
+ "eval_loss": 1.0765037536621094,
1517
+ "eval_roc_auc": 0.808678937825618,
1518
+ "eval_runtime": 10.0208,
1519
+ "eval_samples_per_second": 97.897,
1520
+ "eval_steps_per_second": 6.586,
1521
+ "step": 46636
1522
+ },
1523
+ {
1524
+ "epoch": 89.83,
1525
+ "learning_rate": 5.085877862595421e-07,
1526
+ "loss": 0.0055,
1527
+ "step": 47070
1528
+ },
1529
+ {
1530
+ "epoch": 90.0,
1531
+ "eval_accuracy": 0.5423037716615698,
1532
+ "eval_f1": 0.705531914893617,
1533
+ "eval_loss": 1.0747418403625488,
1534
+ "eval_roc_auc": 0.8082004085286085,
1535
+ "eval_runtime": 10.0179,
1536
+ "eval_samples_per_second": 97.924,
1537
+ "eval_steps_per_second": 6.588,
1538
+ "step": 47160
1539
+ },
1540
+ {
1541
+ "epoch": 90.83,
1542
+ "learning_rate": 4.586832061068703e-07,
1543
+ "loss": 0.0041,
1544
+ "step": 47593
1545
+ },
1546
+ {
1547
+ "epoch": 91.0,
1548
+ "eval_accuracy": 0.5524974515800204,
1549
+ "eval_f1": 0.7061855670103093,
1550
+ "eval_loss": 1.077362060546875,
1551
+ "eval_roc_auc": 0.8072014642766979,
1552
+ "eval_runtime": 9.974,
1553
+ "eval_samples_per_second": 98.356,
1554
+ "eval_steps_per_second": 6.617,
1555
+ "step": 47684
1556
+ },
1557
+ {
1558
+ "epoch": 91.82,
1559
+ "learning_rate": 4.0877862595419847e-07,
1560
+ "loss": 0.0051,
1561
+ "step": 48116
1562
+ },
1563
+ {
1564
+ "epoch": 92.0,
1565
+ "eval_accuracy": 0.5565749235474006,
1566
+ "eval_f1": 0.706792777300086,
1567
+ "eval_loss": 1.0821139812469482,
1568
+ "eval_roc_auc": 0.8074688439558423,
1569
+ "eval_runtime": 9.9576,
1570
+ "eval_samples_per_second": 98.517,
1571
+ "eval_steps_per_second": 6.628,
1572
+ "step": 48208
1573
+ },
1574
+ {
1575
+ "epoch": 92.82,
1576
+ "learning_rate": 3.588740458015268e-07,
1577
+ "loss": 0.0053,
1578
+ "step": 48639
1579
+ },
1580
+ {
1581
+ "epoch": 93.0,
1582
+ "eval_accuracy": 0.5535168195718655,
1583
+ "eval_f1": 0.7076526225279449,
1584
+ "eval_loss": 1.0813453197479248,
1585
+ "eval_roc_auc": 0.8080317183447706,
1586
+ "eval_runtime": 10.0101,
1587
+ "eval_samples_per_second": 98.001,
1588
+ "eval_steps_per_second": 6.593,
1589
+ "step": 48732
1590
+ },
1591
+ {
1592
+ "epoch": 93.82,
1593
+ "learning_rate": 3.08969465648855e-07,
1594
+ "loss": 0.0052,
1595
+ "step": 49162
1596
+ },
1597
+ {
1598
+ "epoch": 94.0,
1599
+ "eval_accuracy": 0.5494393476044852,
1600
+ "eval_f1": 0.7078364565587735,
1601
+ "eval_loss": 1.0872832536697388,
1602
+ "eval_roc_auc": 0.8095935369856098,
1603
+ "eval_runtime": 10.0385,
1604
+ "eval_samples_per_second": 97.724,
1605
+ "eval_steps_per_second": 6.575,
1606
+ "step": 49256
1607
+ },
1608
+ {
1609
+ "epoch": 94.82,
1610
+ "learning_rate": 2.5906488549618325e-07,
1611
+ "loss": 0.0049,
1612
+ "step": 49685
1613
+ },
1614
+ {
1615
+ "epoch": 95.0,
1616
+ "eval_accuracy": 0.5504587155963303,
1617
+ "eval_f1": 0.7095681625740897,
1618
+ "eval_loss": 1.0950355529785156,
1619
+ "eval_roc_auc": 0.8116619999540979,
1620
+ "eval_runtime": 10.0471,
1621
+ "eval_samples_per_second": 97.64,
1622
+ "eval_steps_per_second": 6.569,
1623
+ "step": 49780
1624
+ },
1625
+ {
1626
+ "epoch": 95.82,
1627
+ "learning_rate": 2.0916030534351148e-07,
1628
+ "loss": 0.0055,
1629
+ "step": 50208
1630
+ },
1631
+ {
1632
+ "epoch": 96.0,
1633
+ "eval_accuracy": 0.54638124362895,
1634
+ "eval_f1": 0.7064846416382252,
1635
+ "eval_loss": 1.091185450553894,
1636
+ "eval_roc_auc": 0.8084396731771133,
1637
+ "eval_runtime": 10.0192,
1638
+ "eval_samples_per_second": 97.912,
1639
+ "eval_steps_per_second": 6.587,
1640
+ "step": 50304
1641
+ },
1642
+ {
1643
+ "epoch": 96.81,
1644
+ "learning_rate": 1.5925572519083971e-07,
1645
+ "loss": 0.0048,
1646
+ "step": 50731
1647
+ },
1648
+ {
1649
+ "epoch": 97.0,
1650
+ "eval_accuracy": 0.5555555555555556,
1651
+ "eval_f1": 0.70926243567753,
1652
+ "eval_loss": 1.0918930768966675,
1653
+ "eval_roc_auc": 0.809481076863051,
1654
+ "eval_runtime": 10.1024,
1655
+ "eval_samples_per_second": 97.106,
1656
+ "eval_steps_per_second": 6.533,
1657
+ "step": 50828
1658
+ },
1659
+ {
1660
+ "epoch": 97.81,
1661
+ "learning_rate": 1.0935114503816793e-07,
1662
+ "loss": 0.0052,
1663
+ "step": 51254
1664
+ },
1665
+ {
1666
+ "epoch": 98.0,
1667
+ "eval_accuracy": 0.5514780835881753,
1668
+ "eval_f1": 0.7080479452054794,
1669
+ "eval_loss": 1.0924346446990967,
1670
+ "eval_roc_auc": 0.8089463175047622,
1671
+ "eval_runtime": 10.042,
1672
+ "eval_samples_per_second": 97.689,
1673
+ "eval_steps_per_second": 6.572,
1674
+ "step": 51352
1675
+ },
1676
+ {
1677
+ "epoch": 98.81,
1678
+ "learning_rate": 5.9446564885496193e-08,
1679
+ "loss": 0.0041,
1680
+ "step": 51777
1681
+ },
1682
+ {
1683
+ "epoch": 99.0,
1684
+ "eval_accuracy": 0.5504587155963303,
1685
+ "eval_f1": 0.70824434002563,
1686
+ "eval_loss": 1.0939857959747314,
1687
+ "eval_roc_auc": 0.8094036170847583,
1688
+ "eval_runtime": 10.0515,
1689
+ "eval_samples_per_second": 97.598,
1690
+ "eval_steps_per_second": 6.566,
1691
+ "step": 51876
1692
+ },
1693
+ {
1694
+ "epoch": 99.81,
1695
+ "learning_rate": 9.541984732824428e-09,
1696
+ "loss": 0.0043,
1697
+ "step": 52300
1698
+ },
1699
+ {
1700
+ "epoch": 100.0,
1701
+ "eval_accuracy": 0.5514780835881753,
1702
+ "eval_f1": 0.7073378839590444,
1703
+ "eval_loss": 1.0944114923477173,
1704
+ "eval_roc_auc": 0.8090025475660416,
1705
+ "eval_runtime": 10.0289,
1706
+ "eval_samples_per_second": 97.818,
1707
+ "eval_steps_per_second": 6.581,
1708
+ "step": 52400
1709
+ }
1710
+ ],
1711
+ "max_steps": 52400,
1712
+ "num_train_epochs": 100,
1713
+ "total_flos": 1.03939481527296e+17,
1714
+ "trial_name": null,
1715
+ "trial_params": null
1716
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf631f4c0b74fa8d3b4cdb53d55bcf1b57bcba0abafbe6b794fc7456083a0dce
3
+ size 3323
vocab.txt ADDED
The diff for this file is too large to render. See raw diff