Seosnaps commited on
Commit
e938f15
1 Parent(s): be1ee2a

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac4962b8b8afe0d36f41b787869fbebbfd594ea53b45bbca7618c65ce68156bf
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e277a4f4a7cb3582830e2d23c77db492c8e24ba3a899fc3a666129a2a2689c3
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1af3b1cf9e36066912abae789cdee6acff65d835a8cecd1904888da843a13677
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b91d01bfba26061e5726be9c0364da8b10817efe206f2acc64d9ee9d1e5d46d4
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efc19516f0bb6ebbb441d01c76bfbe40ffc86ac7def6317731979041e8f3b7ba
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1e95b55e61f2a72e5e5389523fde8c9fb1a2902741a578a17a342a5d7a4df5d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c57fd35b966308a3b0826e505a4ead95eaee451ffe4c405784a98aead25ab3e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1717d9346c92bb9117ed254820c219b1728f8d3d4762e26811e93dd311443eca
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 180.03448936577888,
3
- "best_model_checkpoint": "./whisper-small-ha-v3/checkpoint-500",
4
- "epoch": 3.1847133757961785,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,156 @@
157
  "eval_wer": 180.03448936577888,
158
  "eval_wer_ortho": 183.0078125,
159
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 25,
@@ -176,7 +326,7 @@
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 2.30522017775616e+18,
180
  "train_batch_size": 16,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
+ "best_metric": 96.39777735198314,
3
+ "best_model_checkpoint": "./whisper-small-ha-v3/checkpoint-1000",
4
+ "epoch": 6.369426751592357,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_wer": 180.03448936577888,
158
  "eval_wer_ortho": 183.0078125,
159
  "step": 500
160
+ },
161
+ {
162
+ "epoch": 3.343949044585987,
163
+ "grad_norm": 7.396618843078613,
164
+ "learning_rate": 0.0005,
165
+ "loss": 1.6942,
166
+ "step": 525
167
+ },
168
+ {
169
+ "epoch": 3.5031847133757963,
170
+ "grad_norm": 7.276708602905273,
171
+ "learning_rate": 0.0005,
172
+ "loss": 1.7758,
173
+ "step": 550
174
+ },
175
+ {
176
+ "epoch": 3.662420382165605,
177
+ "grad_norm": 8.082304000854492,
178
+ "learning_rate": 0.0005,
179
+ "loss": 1.8561,
180
+ "step": 575
181
+ },
182
+ {
183
+ "epoch": 3.821656050955414,
184
+ "grad_norm": 7.295908451080322,
185
+ "learning_rate": 0.0005,
186
+ "loss": 1.9064,
187
+ "step": 600
188
+ },
189
+ {
190
+ "epoch": 3.980891719745223,
191
+ "grad_norm": 7.9326491355896,
192
+ "learning_rate": 0.0005,
193
+ "loss": 1.9251,
194
+ "step": 625
195
+ },
196
+ {
197
+ "epoch": 4.140127388535032,
198
+ "grad_norm": 7.216257095336914,
199
+ "learning_rate": 0.0005,
200
+ "loss": 1.3606,
201
+ "step": 650
202
+ },
203
+ {
204
+ "epoch": 4.2993630573248405,
205
+ "grad_norm": 6.987307548522949,
206
+ "learning_rate": 0.0005,
207
+ "loss": 1.382,
208
+ "step": 675
209
+ },
210
+ {
211
+ "epoch": 4.45859872611465,
212
+ "grad_norm": 8.181788444519043,
213
+ "learning_rate": 0.0005,
214
+ "loss": 1.4235,
215
+ "step": 700
216
+ },
217
+ {
218
+ "epoch": 4.617834394904459,
219
+ "grad_norm": 7.226937294006348,
220
+ "learning_rate": 0.0005,
221
+ "loss": 1.5352,
222
+ "step": 725
223
+ },
224
+ {
225
+ "epoch": 4.777070063694268,
226
+ "grad_norm": 7.664785385131836,
227
+ "learning_rate": 0.0005,
228
+ "loss": 1.5803,
229
+ "step": 750
230
+ },
231
+ {
232
+ "epoch": 4.936305732484076,
233
+ "grad_norm": 8.353466987609863,
234
+ "learning_rate": 0.0005,
235
+ "loss": 1.6793,
236
+ "step": 775
237
+ },
238
+ {
239
+ "epoch": 5.095541401273885,
240
+ "grad_norm": 7.170167922973633,
241
+ "learning_rate": 0.0005,
242
+ "loss": 1.3162,
243
+ "step": 800
244
+ },
245
+ {
246
+ "epoch": 5.254777070063694,
247
+ "grad_norm": 7.019118309020996,
248
+ "learning_rate": 0.0005,
249
+ "loss": 1.1988,
250
+ "step": 825
251
+ },
252
+ {
253
+ "epoch": 5.414012738853503,
254
+ "grad_norm": 6.39375638961792,
255
+ "learning_rate": 0.0005,
256
+ "loss": 1.2476,
257
+ "step": 850
258
+ },
259
+ {
260
+ "epoch": 5.573248407643312,
261
+ "grad_norm": 7.22137451171875,
262
+ "learning_rate": 0.0005,
263
+ "loss": 1.325,
264
+ "step": 875
265
+ },
266
+ {
267
+ "epoch": 5.732484076433121,
268
+ "grad_norm": 6.7961883544921875,
269
+ "learning_rate": 0.0005,
270
+ "loss": 1.3379,
271
+ "step": 900
272
+ },
273
+ {
274
+ "epoch": 5.89171974522293,
275
+ "grad_norm": 7.7992377281188965,
276
+ "learning_rate": 0.0005,
277
+ "loss": 1.3845,
278
+ "step": 925
279
+ },
280
+ {
281
+ "epoch": 6.050955414012739,
282
+ "grad_norm": 6.209515571594238,
283
+ "learning_rate": 0.0005,
284
+ "loss": 1.2692,
285
+ "step": 950
286
+ },
287
+ {
288
+ "epoch": 6.210191082802548,
289
+ "grad_norm": 6.86682653427124,
290
+ "learning_rate": 0.0005,
291
+ "loss": 1.1113,
292
+ "step": 975
293
+ },
294
+ {
295
+ "epoch": 6.369426751592357,
296
+ "grad_norm": 6.647078037261963,
297
+ "learning_rate": 0.0005,
298
+ "loss": 1.1378,
299
+ "step": 1000
300
+ },
301
+ {
302
+ "epoch": 6.369426751592357,
303
+ "eval_loss": 4.400506019592285,
304
+ "eval_runtime": 280.0441,
305
+ "eval_samples_per_second": 2.357,
306
+ "eval_steps_per_second": 0.15,
307
+ "eval_wer": 96.39777735198314,
308
+ "eval_wer_ortho": 96.640625,
309
+ "step": 1000
310
  }
311
  ],
312
  "logging_steps": 25,
 
326
  "attributes": {}
327
  }
328
  },
329
+ "total_flos": 4.61044035551232e+18,
330
  "train_batch_size": 16,
331
  "trial_name": null,
332
  "trial_params": null