File size: 5,692 Bytes
a26c659
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
{
  "best_metric": 6.602455701786633,
  "best_model_checkpoint": "checkpoint-130000",
  "epoch": 98.21512890735669,
  "eval_steps": 10000,
  "global_step": 130000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 7.56,
      "learning_rate": 3.7792894935752085e-05,
      "loss": 3.1181,
      "step": 10000
    },
    {
      "epoch": 7.56,
      "eval_cer": 12.22214052888268,
      "eval_loss": 0.39288055896759033,
      "eval_runtime": 80.6046,
      "eval_samples_per_second": 6.178,
      "eval_steps_per_second": 3.089,
      "eval_wer": 38.847117794486216,
      "step": 10000
    },
    {
      "epoch": 15.11,
      "learning_rate": 7.558578987150417e-05,
      "loss": 0.4086,
      "step": 20000
    },
    {
      "epoch": 15.11,
      "eval_cer": 9.646349533122564,
      "eval_loss": 0.3199174404144287,
      "eval_runtime": 78.777,
      "eval_samples_per_second": 6.322,
      "eval_steps_per_second": 3.161,
      "eval_wer": 30.367585630743527,
      "step": 20000
    },
    {
      "epoch": 22.67,
      "learning_rate": 9.665532879818595e-05,
      "loss": 0.3126,
      "step": 30000
    },
    {
      "epoch": 22.67,
      "eval_cer": 9.011592284881013,
      "eval_loss": 0.3147233724594116,
      "eval_runtime": 77.284,
      "eval_samples_per_second": 6.444,
      "eval_steps_per_second": 3.222,
      "eval_wer": 28.390420495683657,
      "step": 30000
    },
    {
      "epoch": 30.22,
      "learning_rate": 8.720710506424793e-05,
      "loss": 0.2509,
      "step": 40000
    },
    {
      "epoch": 30.22,
      "eval_cer": 8.134205818199643,
      "eval_loss": 0.3039480447769165,
      "eval_runtime": 68.0983,
      "eval_samples_per_second": 7.313,
      "eval_steps_per_second": 3.656,
      "eval_wer": 26.427179058758004,
      "step": 40000
    },
    {
      "epoch": 37.78,
      "learning_rate": 7.77588813303099e-05,
      "loss": 0.2084,
      "step": 50000
    },
    {
      "epoch": 37.78,
      "eval_cer": 7.7028649854177385,
      "eval_loss": 0.2937542498111725,
      "eval_runtime": 66.6192,
      "eval_samples_per_second": 7.475,
      "eval_steps_per_second": 3.738,
      "eval_wer": 25.382901698691175,
      "step": 50000
    },
    {
      "epoch": 45.33,
      "learning_rate": 6.831065759637189e-05,
      "loss": 0.1794,
      "step": 60000
    },
    {
      "epoch": 45.33,
      "eval_cer": 7.658750582065044,
      "eval_loss": 0.3241848349571228,
      "eval_runtime": 65.9465,
      "eval_samples_per_second": 7.552,
      "eval_steps_per_second": 3.776,
      "eval_wer": 24.770258980785297,
      "step": 60000
    },
    {
      "epoch": 52.89,
      "learning_rate": 5.886243386243386e-05,
      "loss": 0.1566,
      "step": 70000
    },
    {
      "epoch": 52.89,
      "eval_cer": 7.2984829546847045,
      "eval_loss": 0.33441099524497986,
      "eval_runtime": 66.4973,
      "eval_samples_per_second": 7.489,
      "eval_steps_per_second": 3.745,
      "eval_wer": 24.33862433862434,
      "step": 70000
    },
    {
      "epoch": 60.44,
      "learning_rate": 4.9414210128495846e-05,
      "loss": 0.1381,
      "step": 80000
    },
    {
      "epoch": 60.44,
      "eval_cer": 7.394064161948877,
      "eval_loss": 0.3713204860687256,
      "eval_runtime": 66.5533,
      "eval_samples_per_second": 7.483,
      "eval_steps_per_second": 3.741,
      "eval_wer": 23.62851573377889,
      "step": 80000
    },
    {
      "epoch": 68.0,
      "learning_rate": 3.9965986394557825e-05,
      "loss": 0.1227,
      "step": 90000
    },
    {
      "epoch": 68.0,
      "eval_cer": 7.109771340342622,
      "eval_loss": 0.3827340006828308,
      "eval_runtime": 66.2381,
      "eval_samples_per_second": 7.518,
      "eval_steps_per_second": 3.759,
      "eval_wer": 22.946254525201894,
      "step": 90000
    },
    {
      "epoch": 75.55,
      "learning_rate": 3.0517762660619804e-05,
      "loss": 0.1097,
      "step": 100000
    },
    {
      "epoch": 75.55,
      "eval_cer": 7.124476141460187,
      "eval_loss": 0.415243923664093,
      "eval_runtime": 149.6844,
      "eval_samples_per_second": 3.327,
      "eval_steps_per_second": 1.663,
      "eval_wer": 22.988025619604567,
      "step": 100000
    },
    {
      "epoch": 83.11,
      "learning_rate": 2.1069538926681782e-05,
      "loss": 0.0988,
      "step": 110000
    },
    {
      "epoch": 83.11,
      "eval_cer": 6.854888120971498,
      "eval_loss": 0.4487506151199341,
      "eval_runtime": 66.6595,
      "eval_samples_per_second": 7.471,
      "eval_steps_per_second": 3.735,
      "eval_wer": 22.570314675577833,
      "step": 110000
    },
    {
      "epoch": 90.66,
      "learning_rate": 1.1621315192743764e-05,
      "loss": 0.0896,
      "step": 120000
    },
    {
      "epoch": 90.66,
      "eval_cer": 6.808322917432541,
      "eval_loss": 0.44282594323158264,
      "eval_runtime": 66.7552,
      "eval_samples_per_second": 7.46,
      "eval_steps_per_second": 3.73,
      "eval_wer": 21.99944305207463,
      "step": 120000
    },
    {
      "epoch": 98.22,
      "learning_rate": 2.1730914588057445e-06,
      "loss": 0.0823,
      "step": 130000
    },
    {
      "epoch": 98.22,
      "eval_cer": 6.602455701786633,
      "eval_loss": 0.4513276517391205,
      "eval_runtime": 66.6171,
      "eval_samples_per_second": 7.476,
      "eval_steps_per_second": 3.738,
      "eval_wer": 21.74881648565859,
      "step": 130000
    }
  ],
  "logging_steps": 10000,
  "max_steps": 132300,
  "num_train_epochs": 100,
  "save_steps": 10000,
  "total_flos": 4.063479904391249e+20,
  "trial_name": null,
  "trial_params": null
}