utakumi commited on
Commit
920c0c8
1 Parent(s): 9ec9194

End of training

Browse files
Files changed (5) hide show
  1. README.md +6 -2
  2. all_results.json +16 -0
  3. eval_results.json +10 -0
  4. train_results.json +9 -0
  5. trainer_state.json +518 -0
README.md CHANGED
@@ -1,8 +1,12 @@
1
  ---
2
  library_name: transformers
 
 
3
  license: apache-2.0
4
  base_model: rinna/japanese-hubert-base
5
  tags:
 
 
6
  - generated_from_trainer
7
  metrics:
8
  - wer
@@ -16,9 +20,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # Hubert-common_voice-phoneme-debug-warmup500
18
 
19
- This model is a fine-tuned version of [rinna/japanese-hubert-base](https://huggingface.co/rinna/japanese-hubert-base) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 2.9678
22
  - Wer: 1.0
23
  - Cer: 0.9851
24
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - ja
5
  license: apache-2.0
6
  base_model: rinna/japanese-hubert-base
7
  tags:
8
+ - automatic-speech-recognition
9
+ - mozilla-foundation/common_voice_13_0
10
  - generated_from_trainer
11
  metrics:
12
  - wer
 
20
 
21
  # Hubert-common_voice-phoneme-debug-warmup500
22
 
23
+ This model is a fine-tuned version of [rinna/japanese-hubert-base](https://huggingface.co/rinna/japanese-hubert-base) on the MOZILLA-FOUNDATION/COMMON_VOICE_13_0 - JA dataset.
24
  It achieves the following results on the evaluation set:
25
+ - Loss: 2.9679
26
  - Wer: 1.0
27
  - Cer: 0.9851
28
 
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_cer": 0.9850705754614549,
4
+ "eval_loss": 2.9679019451141357,
5
+ "eval_runtime": 21.4298,
6
+ "eval_samples": 500,
7
+ "eval_samples_per_second": 23.332,
8
+ "eval_steps_per_second": 2.94,
9
+ "eval_wer": 1.0,
10
+ "total_flos": 6.12842473721103e+18,
11
+ "train_loss": 3.08231661460642,
12
+ "train_runtime": 8045.4114,
13
+ "train_samples": 4500,
14
+ "train_samples_per_second": 16.78,
15
+ "train_steps_per_second": 0.526
16
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "eval_cer": 0.9850705754614549,
4
+ "eval_loss": 2.9679019451141357,
5
+ "eval_runtime": 21.4298,
6
+ "eval_samples": 500,
7
+ "eval_samples_per_second": 23.332,
8
+ "eval_steps_per_second": 2.94,
9
+ "eval_wer": 1.0
10
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 30.0,
3
+ "total_flos": 6.12842473721103e+18,
4
+ "train_loss": 3.08231661460642,
5
+ "train_runtime": 8045.4114,
6
+ "train_samples": 4500,
7
+ "train_samples_per_second": 16.78,
8
+ "train_steps_per_second": 0.526
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,518 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.9668984413146973,
3
+ "best_model_checkpoint": "./Hubert-common_voice-phoneme-debug-warmup500/checkpoint-3600",
4
+ "epoch": 30.0,
5
+ "eval_steps": 100,
6
+ "global_step": 4230,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.7092198581560284,
13
+ "eval_cer": 0.9850705754614549,
14
+ "eval_loss": 4.566883563995361,
15
+ "eval_runtime": 21.222,
16
+ "eval_samples_per_second": 23.56,
17
+ "eval_steps_per_second": 2.969,
18
+ "eval_wer": 1.0,
19
+ "step": 100
20
+ },
21
+ {
22
+ "epoch": 1.4184397163120568,
23
+ "eval_cer": 0.9851007359150682,
24
+ "eval_loss": 3.011932134628296,
25
+ "eval_runtime": 21.0835,
26
+ "eval_samples_per_second": 23.715,
27
+ "eval_steps_per_second": 2.988,
28
+ "eval_wer": 1.0,
29
+ "step": 200
30
+ },
31
+ {
32
+ "epoch": 2.127659574468085,
33
+ "eval_cer": 0.9850705754614549,
34
+ "eval_loss": 2.9839680194854736,
35
+ "eval_runtime": 20.7833,
36
+ "eval_samples_per_second": 24.058,
37
+ "eval_steps_per_second": 3.031,
38
+ "eval_wer": 1.0,
39
+ "step": 300
40
+ },
41
+ {
42
+ "epoch": 2.8368794326241136,
43
+ "eval_cer": 0.9851007359150682,
44
+ "eval_loss": 2.9763612747192383,
45
+ "eval_runtime": 20.9118,
46
+ "eval_samples_per_second": 23.91,
47
+ "eval_steps_per_second": 3.013,
48
+ "eval_wer": 1.0,
49
+ "step": 400
50
+ },
51
+ {
52
+ "epoch": 3.546099290780142,
53
+ "grad_norm": 0.8111785054206848,
54
+ "learning_rate": 0.0002982,
55
+ "loss": 3.973,
56
+ "step": 500
57
+ },
58
+ {
59
+ "epoch": 3.546099290780142,
60
+ "eval_cer": 0.9851007359150682,
61
+ "eval_loss": 2.9795632362365723,
62
+ "eval_runtime": 20.5236,
63
+ "eval_samples_per_second": 24.362,
64
+ "eval_steps_per_second": 3.07,
65
+ "eval_wer": 1.0,
66
+ "step": 500
67
+ },
68
+ {
69
+ "epoch": 4.25531914893617,
70
+ "eval_cer": 0.9851007359150682,
71
+ "eval_loss": 2.9758143424987793,
72
+ "eval_runtime": 21.1254,
73
+ "eval_samples_per_second": 23.668,
74
+ "eval_steps_per_second": 2.982,
75
+ "eval_wer": 1.0,
76
+ "step": 600
77
+ },
78
+ {
79
+ "epoch": 4.964539007092198,
80
+ "eval_cer": 0.9851007359150682,
81
+ "eval_loss": 2.9691450595855713,
82
+ "eval_runtime": 20.6886,
83
+ "eval_samples_per_second": 24.168,
84
+ "eval_steps_per_second": 3.045,
85
+ "eval_wer": 1.0,
86
+ "step": 700
87
+ },
88
+ {
89
+ "epoch": 5.673758865248227,
90
+ "eval_cer": 0.9850404150078417,
91
+ "eval_loss": 2.985788345336914,
92
+ "eval_runtime": 20.5834,
93
+ "eval_samples_per_second": 24.291,
94
+ "eval_steps_per_second": 3.061,
95
+ "eval_wer": 1.0,
96
+ "step": 800
97
+ },
98
+ {
99
+ "epoch": 6.382978723404255,
100
+ "eval_cer": 0.9850705754614549,
101
+ "eval_loss": 2.9692296981811523,
102
+ "eval_runtime": 21.3068,
103
+ "eval_samples_per_second": 23.467,
104
+ "eval_steps_per_second": 2.957,
105
+ "eval_wer": 1.0,
106
+ "step": 900
107
+ },
108
+ {
109
+ "epoch": 7.092198581560283,
110
+ "grad_norm": 0.9110927581787109,
111
+ "learning_rate": 0.00028704894194342103,
112
+ "loss": 2.9654,
113
+ "step": 1000
114
+ },
115
+ {
116
+ "epoch": 7.092198581560283,
117
+ "eval_cer": 0.9850404150078417,
118
+ "eval_loss": 2.9895429611206055,
119
+ "eval_runtime": 21.477,
120
+ "eval_samples_per_second": 23.281,
121
+ "eval_steps_per_second": 2.933,
122
+ "eval_wer": 1.0,
123
+ "step": 1000
124
+ },
125
+ {
126
+ "epoch": 7.801418439716312,
127
+ "eval_cer": 0.9850102545542285,
128
+ "eval_loss": 2.972479820251465,
129
+ "eval_runtime": 20.7781,
130
+ "eval_samples_per_second": 24.064,
131
+ "eval_steps_per_second": 3.032,
132
+ "eval_wer": 1.0,
133
+ "step": 1100
134
+ },
135
+ {
136
+ "epoch": 8.51063829787234,
137
+ "eval_cer": 0.9850404150078417,
138
+ "eval_loss": 2.971254825592041,
139
+ "eval_runtime": 21.378,
140
+ "eval_samples_per_second": 23.389,
141
+ "eval_steps_per_second": 2.947,
142
+ "eval_wer": 1.0,
143
+ "step": 1200
144
+ },
145
+ {
146
+ "epoch": 9.21985815602837,
147
+ "eval_cer": 0.9851007359150682,
148
+ "eval_loss": 2.9757533073425293,
149
+ "eval_runtime": 21.4867,
150
+ "eval_samples_per_second": 23.27,
151
+ "eval_steps_per_second": 2.932,
152
+ "eval_wer": 1.0,
153
+ "step": 1300
154
+ },
155
+ {
156
+ "epoch": 9.929078014184396,
157
+ "eval_cer": 0.9850404150078417,
158
+ "eval_loss": 2.978407859802246,
159
+ "eval_runtime": 21.1721,
160
+ "eval_samples_per_second": 23.616,
161
+ "eval_steps_per_second": 2.976,
162
+ "eval_wer": 1.0,
163
+ "step": 1400
164
+ },
165
+ {
166
+ "epoch": 10.638297872340425,
167
+ "grad_norm": 0.4841027557849884,
168
+ "learning_rate": 0.00025015032745484046,
169
+ "loss": 2.9643,
170
+ "step": 1500
171
+ },
172
+ {
173
+ "epoch": 10.638297872340425,
174
+ "eval_cer": 0.9851007359150682,
175
+ "eval_loss": 2.968749761581421,
176
+ "eval_runtime": 21.3576,
177
+ "eval_samples_per_second": 23.411,
178
+ "eval_steps_per_second": 2.95,
179
+ "eval_wer": 1.0,
180
+ "step": 1500
181
+ },
182
+ {
183
+ "epoch": 11.347517730496454,
184
+ "eval_cer": 0.9850705754614549,
185
+ "eval_loss": 2.977853536605835,
186
+ "eval_runtime": 21.1411,
187
+ "eval_samples_per_second": 23.651,
188
+ "eval_steps_per_second": 2.98,
189
+ "eval_wer": 1.0,
190
+ "step": 1600
191
+ },
192
+ {
193
+ "epoch": 12.056737588652481,
194
+ "eval_cer": 0.9850404150078417,
195
+ "eval_loss": 2.9678869247436523,
196
+ "eval_runtime": 21.0582,
197
+ "eval_samples_per_second": 23.744,
198
+ "eval_steps_per_second": 2.992,
199
+ "eval_wer": 1.0,
200
+ "step": 1700
201
+ },
202
+ {
203
+ "epoch": 12.76595744680851,
204
+ "eval_cer": 0.9850705754614549,
205
+ "eval_loss": 2.976925849914551,
206
+ "eval_runtime": 21.1415,
207
+ "eval_samples_per_second": 23.65,
208
+ "eval_steps_per_second": 2.98,
209
+ "eval_wer": 1.0,
210
+ "step": 1800
211
+ },
212
+ {
213
+ "epoch": 13.47517730496454,
214
+ "eval_cer": 0.9850705754614549,
215
+ "eval_loss": 2.9718081951141357,
216
+ "eval_runtime": 20.8074,
217
+ "eval_samples_per_second": 24.03,
218
+ "eval_steps_per_second": 3.028,
219
+ "eval_wer": 1.0,
220
+ "step": 1900
221
+ },
222
+ {
223
+ "epoch": 14.184397163120567,
224
+ "grad_norm": 0.6759688258171082,
225
+ "learning_rate": 0.00019575136936746506,
226
+ "loss": 2.9631,
227
+ "step": 2000
228
+ },
229
+ {
230
+ "epoch": 14.184397163120567,
231
+ "eval_cer": 0.9851007359150682,
232
+ "eval_loss": 2.968552827835083,
233
+ "eval_runtime": 21.3479,
234
+ "eval_samples_per_second": 23.421,
235
+ "eval_steps_per_second": 2.951,
236
+ "eval_wer": 1.0,
237
+ "step": 2000
238
+ },
239
+ {
240
+ "epoch": 14.893617021276595,
241
+ "eval_cer": 0.9850404150078417,
242
+ "eval_loss": 2.9706172943115234,
243
+ "eval_runtime": 21.7626,
244
+ "eval_samples_per_second": 22.975,
245
+ "eval_steps_per_second": 2.895,
246
+ "eval_wer": 1.0,
247
+ "step": 2100
248
+ },
249
+ {
250
+ "epoch": 15.602836879432624,
251
+ "eval_cer": 0.9851007359150682,
252
+ "eval_loss": 2.9790878295898438,
253
+ "eval_runtime": 20.9137,
254
+ "eval_samples_per_second": 23.908,
255
+ "eval_steps_per_second": 3.012,
256
+ "eval_wer": 1.0,
257
+ "step": 2200
258
+ },
259
+ {
260
+ "epoch": 16.31205673758865,
261
+ "eval_cer": 0.9850705754614549,
262
+ "eval_loss": 2.97310733795166,
263
+ "eval_runtime": 21.1636,
264
+ "eval_samples_per_second": 23.625,
265
+ "eval_steps_per_second": 2.977,
266
+ "eval_wer": 1.0,
267
+ "step": 2300
268
+ },
269
+ {
270
+ "epoch": 17.02127659574468,
271
+ "eval_cer": 0.9850404150078417,
272
+ "eval_loss": 2.97218656539917,
273
+ "eval_runtime": 20.5792,
274
+ "eval_samples_per_second": 24.296,
275
+ "eval_steps_per_second": 3.061,
276
+ "eval_wer": 1.0,
277
+ "step": 2400
278
+ },
279
+ {
280
+ "epoch": 17.73049645390071,
281
+ "grad_norm": 0.45569872856140137,
282
+ "learning_rate": 0.00013335778256150607,
283
+ "loss": 2.9627,
284
+ "step": 2500
285
+ },
286
+ {
287
+ "epoch": 17.73049645390071,
288
+ "eval_cer": 0.9850705754614549,
289
+ "eval_loss": 2.9722900390625,
290
+ "eval_runtime": 21.0187,
291
+ "eval_samples_per_second": 23.788,
292
+ "eval_steps_per_second": 2.997,
293
+ "eval_wer": 1.0,
294
+ "step": 2500
295
+ },
296
+ {
297
+ "epoch": 18.43971631205674,
298
+ "eval_cer": 0.9850705754614549,
299
+ "eval_loss": 2.9688594341278076,
300
+ "eval_runtime": 20.8051,
301
+ "eval_samples_per_second": 24.033,
302
+ "eval_steps_per_second": 3.028,
303
+ "eval_wer": 1.0,
304
+ "step": 2600
305
+ },
306
+ {
307
+ "epoch": 19.148936170212767,
308
+ "eval_cer": 0.9850705754614549,
309
+ "eval_loss": 2.974661111831665,
310
+ "eval_runtime": 20.7915,
311
+ "eval_samples_per_second": 24.048,
312
+ "eval_steps_per_second": 3.03,
313
+ "eval_wer": 1.0,
314
+ "step": 2700
315
+ },
316
+ {
317
+ "epoch": 19.858156028368793,
318
+ "eval_cer": 0.9850705754614549,
319
+ "eval_loss": 2.980069160461426,
320
+ "eval_runtime": 21.3631,
321
+ "eval_samples_per_second": 23.405,
322
+ "eval_steps_per_second": 2.949,
323
+ "eval_wer": 1.0,
324
+ "step": 2800
325
+ },
326
+ {
327
+ "epoch": 20.56737588652482,
328
+ "eval_cer": 0.9850705754614549,
329
+ "eval_loss": 2.9740312099456787,
330
+ "eval_runtime": 21.1673,
331
+ "eval_samples_per_second": 23.621,
332
+ "eval_steps_per_second": 2.976,
333
+ "eval_wer": 1.0,
334
+ "step": 2900
335
+ },
336
+ {
337
+ "epoch": 21.27659574468085,
338
+ "grad_norm": 1.4903497695922852,
339
+ "learning_rate": 7.387226935671251e-05,
340
+ "loss": 2.9622,
341
+ "step": 3000
342
+ },
343
+ {
344
+ "epoch": 21.27659574468085,
345
+ "eval_cer": 0.9850102545542285,
346
+ "eval_loss": 2.973616600036621,
347
+ "eval_runtime": 21.0369,
348
+ "eval_samples_per_second": 23.768,
349
+ "eval_steps_per_second": 2.995,
350
+ "eval_wer": 1.0,
351
+ "step": 3000
352
+ },
353
+ {
354
+ "epoch": 21.98581560283688,
355
+ "eval_cer": 0.9850705754614549,
356
+ "eval_loss": 2.9718637466430664,
357
+ "eval_runtime": 21.543,
358
+ "eval_samples_per_second": 23.209,
359
+ "eval_steps_per_second": 2.924,
360
+ "eval_wer": 1.0,
361
+ "step": 3100
362
+ },
363
+ {
364
+ "epoch": 22.69503546099291,
365
+ "eval_cer": 0.9850404150078417,
366
+ "eval_loss": 2.9710469245910645,
367
+ "eval_runtime": 21.5903,
368
+ "eval_samples_per_second": 23.159,
369
+ "eval_steps_per_second": 2.918,
370
+ "eval_wer": 1.0,
371
+ "step": 3200
372
+ },
373
+ {
374
+ "epoch": 23.404255319148938,
375
+ "eval_cer": 0.9850102545542285,
376
+ "eval_loss": 2.971414804458618,
377
+ "eval_runtime": 21.0287,
378
+ "eval_samples_per_second": 23.777,
379
+ "eval_steps_per_second": 2.996,
380
+ "eval_wer": 1.0,
381
+ "step": 3300
382
+ },
383
+ {
384
+ "epoch": 24.113475177304963,
385
+ "eval_cer": 0.9851007359150682,
386
+ "eval_loss": 2.9701104164123535,
387
+ "eval_runtime": 22.2142,
388
+ "eval_samples_per_second": 22.508,
389
+ "eval_steps_per_second": 2.836,
390
+ "eval_wer": 1.0,
391
+ "step": 3400
392
+ },
393
+ {
394
+ "epoch": 24.822695035460992,
395
+ "grad_norm": 0.7893990874290466,
396
+ "learning_rate": 2.768937310296969e-05,
397
+ "loss": 2.9609,
398
+ "step": 3500
399
+ },
400
+ {
401
+ "epoch": 24.822695035460992,
402
+ "eval_cer": 0.9850705754614549,
403
+ "eval_loss": 2.9694862365722656,
404
+ "eval_runtime": 20.7615,
405
+ "eval_samples_per_second": 24.083,
406
+ "eval_steps_per_second": 3.034,
407
+ "eval_wer": 1.0,
408
+ "step": 3500
409
+ },
410
+ {
411
+ "epoch": 25.53191489361702,
412
+ "eval_cer": 0.9850404150078417,
413
+ "eval_loss": 2.9668984413146973,
414
+ "eval_runtime": 21.25,
415
+ "eval_samples_per_second": 23.529,
416
+ "eval_steps_per_second": 2.965,
417
+ "eval_wer": 1.0,
418
+ "step": 3600
419
+ },
420
+ {
421
+ "epoch": 26.24113475177305,
422
+ "eval_cer": 0.9851308963686813,
423
+ "eval_loss": 2.9773755073547363,
424
+ "eval_runtime": 21.3493,
425
+ "eval_samples_per_second": 23.42,
426
+ "eval_steps_per_second": 2.951,
427
+ "eval_wer": 1.0,
428
+ "step": 3700
429
+ },
430
+ {
431
+ "epoch": 26.95035460992908,
432
+ "eval_cer": 0.9850705754614549,
433
+ "eval_loss": 2.971196174621582,
434
+ "eval_runtime": 20.8572,
435
+ "eval_samples_per_second": 23.973,
436
+ "eval_steps_per_second": 3.021,
437
+ "eval_wer": 1.0,
438
+ "step": 3800
439
+ },
440
+ {
441
+ "epoch": 27.659574468085108,
442
+ "eval_cer": 0.9850705754614549,
443
+ "eval_loss": 2.970116138458252,
444
+ "eval_runtime": 21.7187,
445
+ "eval_samples_per_second": 23.022,
446
+ "eval_steps_per_second": 2.901,
447
+ "eval_wer": 1.0,
448
+ "step": 3900
449
+ },
450
+ {
451
+ "epoch": 28.368794326241133,
452
+ "grad_norm": 0.41945821046829224,
453
+ "learning_rate": 2.8791278517630856e-06,
454
+ "loss": 2.962,
455
+ "step": 4000
456
+ },
457
+ {
458
+ "epoch": 28.368794326241133,
459
+ "eval_cer": 0.9851308963686813,
460
+ "eval_loss": 2.9688799381256104,
461
+ "eval_runtime": 21.2529,
462
+ "eval_samples_per_second": 23.526,
463
+ "eval_steps_per_second": 2.964,
464
+ "eval_wer": 1.0,
465
+ "step": 4000
466
+ },
467
+ {
468
+ "epoch": 29.078014184397162,
469
+ "eval_cer": 0.9850404150078417,
470
+ "eval_loss": 2.973837375640869,
471
+ "eval_runtime": 21.0065,
472
+ "eval_samples_per_second": 23.802,
473
+ "eval_steps_per_second": 2.999,
474
+ "eval_wer": 1.0,
475
+ "step": 4100
476
+ },
477
+ {
478
+ "epoch": 29.78723404255319,
479
+ "eval_cer": 0.9851308963686813,
480
+ "eval_loss": 2.9678163528442383,
481
+ "eval_runtime": 21.5034,
482
+ "eval_samples_per_second": 23.252,
483
+ "eval_steps_per_second": 2.93,
484
+ "eval_wer": 1.0,
485
+ "step": 4200
486
+ },
487
+ {
488
+ "epoch": 30.0,
489
+ "step": 4230,
490
+ "total_flos": 6.12842473721103e+18,
491
+ "train_loss": 3.08231661460642,
492
+ "train_runtime": 8045.4114,
493
+ "train_samples_per_second": 16.78,
494
+ "train_steps_per_second": 0.526
495
+ }
496
+ ],
497
+ "logging_steps": 500,
498
+ "max_steps": 4230,
499
+ "num_input_tokens_seen": 0,
500
+ "num_train_epochs": 30,
501
+ "save_steps": 400,
502
+ "stateful_callbacks": {
503
+ "TrainerControl": {
504
+ "args": {
505
+ "should_epoch_stop": false,
506
+ "should_evaluate": false,
507
+ "should_log": false,
508
+ "should_save": true,
509
+ "should_training_stop": true
510
+ },
511
+ "attributes": {}
512
+ }
513
+ },
514
+ "total_flos": 6.12842473721103e+18,
515
+ "train_batch_size": 16,
516
+ "trial_name": null,
517
+ "trial_params": null
518
+ }