boumehdi commited on
Commit
cac6943
·
1 Parent(s): 8fcbcd1

Upload 9 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +10 -295
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b09ddf36a7208c177f18ba4beb1f9444e589df2ba623a77db22606561e2bdd30
3
  size 2490593669
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf2c9a2b141d61ae97e6f49cd706632b3b9db6675f7fb64adc99a2beffed4a6
3
  size 2490593669
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41ae0ddbdd7c6ef360b0df269f7a8cc3222f33dbb06e8ddd3d864c8b47b15639
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f1bc7bd2434b9ee5cf00b9927285575898d67205317086fa93bb0ea75f822ef
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feea8e9302ffaaad540b7e651db7227b4d4810e6396382b696d3254395fcb71c
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9be6e271ede82a9bafb9710b6f305bb3591279ca8f3cffac6f33b9b4d8093cc7
3
  size 14639
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fccf0f9be1bb8f24861e4393745b3e09cc2687125a69e3757955fb0f0925ea5
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2891524b7f9cad24992893a2dc3e9813564138d608b7f65cca009a6af698600
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7b37a5dd6c97416b7dbfb2cf97525baab19ed24f1c419c6b6c7efc5974c4d96
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:934b8e9fba487b03faa18f476924d39420f50ac287656b5ab10fb96e2162d17f
3
  size 627
trainer_state.json CHANGED
@@ -1,316 +1,31 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.595829195630586,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.08,
12
- "learning_rate": 1e-05,
13
- "loss": 0.0212,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.08,
18
- "eval_loss": 0.32025715708732605,
19
- "eval_runtime": 212.3535,
20
- "eval_samples_per_second": 16.929,
21
- "eval_steps_per_second": 2.119,
22
- "eval_wer": 0.21324007807417047,
23
- "step": 10
24
- },
25
- {
26
- "epoch": 0.16,
27
- "learning_rate": 2e-05,
28
- "loss": 0.0188,
29
- "step": 20
30
- },
31
- {
32
- "epoch": 0.16,
33
- "eval_loss": 0.3204110860824585,
34
- "eval_runtime": 140.687,
35
- "eval_samples_per_second": 25.553,
36
- "eval_steps_per_second": 3.199,
37
- "eval_wer": 0.21329429624810237,
38
- "step": 20
39
- },
40
  {
41
  "epoch": 0.24,
42
- "learning_rate": 3e-05,
43
- "loss": 0.0171,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.24,
48
- "eval_loss": 0.3230796754360199,
49
- "eval_runtime": 143.3213,
50
- "eval_samples_per_second": 25.084,
51
- "eval_steps_per_second": 3.14,
52
- "eval_wer": 0.21454131424853612,
53
  "step": 30
54
- },
55
- {
56
- "epoch": 0.32,
57
- "learning_rate": 4e-05,
58
- "loss": 0.0187,
59
- "step": 40
60
- },
61
- {
62
- "epoch": 0.32,
63
- "eval_loss": 0.3250657021999359,
64
- "eval_runtime": 144.0633,
65
- "eval_samples_per_second": 24.954,
66
- "eval_steps_per_second": 3.124,
67
- "eval_wer": 0.21242680546519194,
68
- "step": 40
69
- },
70
- {
71
- "epoch": 0.4,
72
- "learning_rate": 5e-05,
73
- "loss": 0.0199,
74
- "step": 50
75
- },
76
- {
77
- "epoch": 0.4,
78
- "eval_loss": 0.3246767818927765,
79
- "eval_runtime": 144.6093,
80
- "eval_samples_per_second": 24.86,
81
- "eval_steps_per_second": 3.112,
82
- "eval_wer": 0.21258945998698764,
83
- "step": 50
84
- },
85
- {
86
- "epoch": 0.48,
87
- "learning_rate": 6e-05,
88
- "loss": 0.0203,
89
- "step": 60
90
- },
91
- {
92
- "epoch": 0.48,
93
- "eval_loss": 0.31979820132255554,
94
- "eval_runtime": 152.9805,
95
- "eval_samples_per_second": 23.5,
96
- "eval_steps_per_second": 2.942,
97
- "eval_wer": 0.21242680546519194,
98
- "step": 60
99
- },
100
- {
101
- "epoch": 0.56,
102
- "learning_rate": 7e-05,
103
- "loss": 0.0199,
104
- "step": 70
105
- },
106
- {
107
- "epoch": 0.56,
108
- "eval_loss": 0.31615573167800903,
109
- "eval_runtime": 152.4511,
110
- "eval_samples_per_second": 23.581,
111
- "eval_steps_per_second": 2.952,
112
- "eval_wer": 0.21378225981348947,
113
- "step": 70
114
- },
115
- {
116
- "epoch": 0.64,
117
- "learning_rate": 8e-05,
118
- "loss": 0.0203,
119
- "step": 80
120
- },
121
- {
122
- "epoch": 0.64,
123
- "eval_loss": 0.3140092194080353,
124
- "eval_runtime": 146.0972,
125
- "eval_samples_per_second": 24.607,
126
- "eval_steps_per_second": 3.08,
127
- "eval_wer": 0.2144328779006723,
128
- "step": 80
129
- },
130
- {
131
- "epoch": 0.71,
132
- "learning_rate": 9e-05,
133
- "loss": 0.0189,
134
- "step": 90
135
- },
136
- {
137
- "epoch": 0.71,
138
- "eval_loss": 0.3206515908241272,
139
- "eval_runtime": 151.0056,
140
- "eval_samples_per_second": 23.807,
141
- "eval_steps_per_second": 2.98,
142
- "eval_wer": 0.21307742355237475,
143
- "step": 90
144
- },
145
- {
146
- "epoch": 0.79,
147
- "learning_rate": 0.0001,
148
- "loss": 0.0203,
149
- "step": 100
150
- },
151
- {
152
- "epoch": 0.79,
153
- "eval_loss": 0.3265531361103058,
154
- "eval_runtime": 163.535,
155
- "eval_samples_per_second": 21.983,
156
- "eval_steps_per_second": 2.752,
157
- "eval_wer": 0.21296898720451096,
158
- "step": 100
159
- },
160
- {
161
- "epoch": 0.87,
162
- "learning_rate": 9.999199359487592e-05,
163
- "loss": 0.0221,
164
- "step": 110
165
- },
166
- {
167
- "epoch": 0.87,
168
- "eval_loss": 0.3243820071220398,
169
- "eval_runtime": 159.6061,
170
- "eval_samples_per_second": 22.524,
171
- "eval_steps_per_second": 2.819,
172
- "eval_wer": 0.2141075688570809,
173
- "step": 110
174
- },
175
- {
176
- "epoch": 0.95,
177
- "learning_rate": 9.99839871897518e-05,
178
- "loss": 0.0219,
179
- "step": 120
180
- },
181
- {
182
- "epoch": 0.95,
183
- "eval_loss": 0.3229624629020691,
184
- "eval_runtime": 154.2088,
185
- "eval_samples_per_second": 23.313,
186
- "eval_steps_per_second": 2.918,
187
- "eval_wer": 0.21486662329212752,
188
- "step": 120
189
- },
190
- {
191
- "epoch": 1.04,
192
- "learning_rate": 9.997598078462771e-05,
193
- "loss": 0.0245,
194
- "step": 130
195
- },
196
- {
197
- "epoch": 1.04,
198
- "eval_loss": 0.31454530358314514,
199
- "eval_runtime": 154.6481,
200
- "eval_samples_per_second": 23.246,
201
- "eval_steps_per_second": 2.91,
202
- "eval_wer": 0.2169811320754717,
203
- "step": 130
204
- },
205
- {
206
- "epoch": 1.12,
207
- "learning_rate": 9.996797437950361e-05,
208
- "loss": 0.0231,
209
- "step": 140
210
- },
211
- {
212
- "epoch": 1.12,
213
- "eval_loss": 0.3071616590023041,
214
- "eval_runtime": 157.0332,
215
- "eval_samples_per_second": 22.893,
216
- "eval_steps_per_second": 2.866,
217
- "eval_wer": 0.21508349598785512,
218
- "step": 140
219
- },
220
- {
221
- "epoch": 1.2,
222
- "learning_rate": 9.995996797437951e-05,
223
- "loss": 0.0194,
224
- "step": 150
225
- },
226
- {
227
- "epoch": 1.2,
228
- "eval_loss": 0.323686808347702,
229
- "eval_runtime": 158.852,
230
- "eval_samples_per_second": 22.631,
231
- "eval_steps_per_second": 2.833,
232
- "eval_wer": 0.21611364129256128,
233
- "step": 150
234
- },
235
- {
236
- "epoch": 1.28,
237
- "learning_rate": 9.995196156925542e-05,
238
- "loss": 0.0158,
239
- "step": 160
240
- },
241
- {
242
- "epoch": 1.28,
243
- "eval_loss": 0.35077646374702454,
244
- "eval_runtime": 143.668,
245
- "eval_samples_per_second": 25.023,
246
- "eval_steps_per_second": 3.132,
247
- "eval_wer": 0.21752331381479073,
248
- "step": 160
249
- },
250
- {
251
- "epoch": 1.36,
252
- "learning_rate": 9.99439551641313e-05,
253
- "loss": 0.0233,
254
- "step": 170
255
- },
256
- {
257
- "epoch": 1.36,
258
- "eval_loss": 0.34845927357673645,
259
- "eval_runtime": 146.6957,
260
- "eval_samples_per_second": 24.507,
261
- "eval_steps_per_second": 3.068,
262
- "eval_wer": 0.2139991325092171,
263
- "step": 170
264
- },
265
- {
266
- "epoch": 1.44,
267
- "learning_rate": 9.993594875900721e-05,
268
- "loss": 0.0232,
269
- "step": 180
270
- },
271
- {
272
- "epoch": 1.44,
273
- "eval_loss": 0.330231636762619,
274
- "eval_runtime": 160.7938,
275
- "eval_samples_per_second": 22.358,
276
- "eval_steps_per_second": 2.799,
277
- "eval_wer": 0.2168726957276079,
278
- "step": 180
279
- },
280
- {
281
- "epoch": 1.52,
282
- "learning_rate": 9.992794235388311e-05,
283
- "loss": 0.0164,
284
- "step": 190
285
- },
286
- {
287
- "epoch": 1.52,
288
- "eval_loss": 0.32822760939598083,
289
- "eval_runtime": 163.3169,
290
- "eval_samples_per_second": 22.012,
291
- "eval_steps_per_second": 2.755,
292
- "eval_wer": 0.21356538711776188,
293
- "step": 190
294
- },
295
- {
296
- "epoch": 1.6,
297
- "learning_rate": 9.991993594875901e-05,
298
- "loss": 0.0218,
299
- "step": 200
300
- },
301
- {
302
- "epoch": 1.6,
303
- "eval_loss": 0.33395659923553467,
304
- "eval_runtime": 163.325,
305
- "eval_samples_per_second": 22.011,
306
- "eval_steps_per_second": 2.755,
307
- "eval_wer": 0.21085447842116678,
308
- "step": 200
309
  }
310
  ],
311
  "max_steps": 125000,
312
  "num_train_epochs": 1000,
313
- "total_flos": 4.639116880735237e+18,
314
  "trial_name": null,
315
  "trial_params": null
316
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.23833167825223436,
5
+ "global_step": 30,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.24,
12
+ "learning_rate": 6e-05,
13
+ "loss": 0.019,
14
  "step": 30
15
  },
16
  {
17
  "epoch": 0.24,
18
+ "eval_loss": 0.333274781703949,
19
+ "eval_runtime": 217.3871,
20
+ "eval_samples_per_second": 16.537,
21
+ "eval_steps_per_second": 2.07,
22
+ "eval_wer": 0.20998698763825635,
23
  "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
  ],
26
  "max_steps": 125000,
27
  "num_train_epochs": 1000,
28
+ "total_flos": 6.914475006165965e+17,
29
  "trial_name": null,
30
  "trial_params": null
31
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea44a679aed6b372c35a811f556ea93b3582277ad07bd469189e9fd57352fe3c
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bc78f10706cccf49dff72ccdd19bc3f1b28bbe47c4e276e818cce22f2537b6b
3
  size 3323