mstatt commited on
Commit
aee8746
·
1 Parent(s): 983e319

Upload 11 files

Browse files
Files changed (5) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +95 -5
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38a118be7202aa4db1e3d37d22adcfc6b4e3be00af436b916228fadd4dc2e40c
3
  size 535701061
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:673f7e8faafc2900bf4a85994f72ca80ca0eaa25661c2e02ac87c4226ca7f61d
3
  size 535701061
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfcd56f2d823a9cdc2cfccb44e5e823d3200112622082e5cd94e93d019b9fc67
3
  size 267855533
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eb2a0536db14e2f001277ffeccc94f436bef6829ae9d697edcc0671e4ab02aa
3
  size 267855533
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01da4f1236b1946afbe43e51bdf7717aeb209652b7d7314cda674a4ca1e36a16
3
  size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a09cc498d55dc004d4b5c9aad1ced395abd939cae4e8d2b9da0e0073f465759a
3
  size 14511
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cc86f59df564bec394c86356ab586d662aa80b1cf79a7016636c67697d6ef91
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d40fb1a7f26e8683137c4b8cddc0f30db08f76a6b3d086416086ebaaa51cc5d5
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.022033799439668655,
3
- "best_model_checkpoint": "./results/checkpoint-6000",
4
- "epoch": 0.687915615684476,
5
- "global_step": 6000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -186,11 +186,101 @@
186
  "eval_samples_per_second": 118.75,
187
  "eval_steps_per_second": 7.424,
188
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }
190
  ],
191
  "max_steps": 26166,
192
  "num_train_epochs": 3,
193
- "total_flos": 1.2716870270976e+16,
194
  "trial_name": null,
195
  "trial_params": null
196
  }
 
1
  {
2
+ "best_metric": 0.01757008023560047,
3
+ "best_model_checkpoint": "./results/checkpoint-8500",
4
+ "epoch": 1.031873423526714,
5
+ "global_step": 9000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
186
  "eval_samples_per_second": 118.75,
187
  "eval_steps_per_second": 7.424,
188
  "step": 6000
189
+ },
190
+ {
191
+ "epoch": 0.75,
192
+ "learning_rate": 1.5031720553389898e-05,
193
+ "loss": 0.0244,
194
+ "step": 6500
195
+ },
196
+ {
197
+ "epoch": 0.75,
198
+ "eval_accuracy": 0.9947543427162758,
199
+ "eval_loss": 0.02473697066307068,
200
+ "eval_runtime": 331.6878,
201
+ "eval_samples_per_second": 105.177,
202
+ "eval_steps_per_second": 6.575,
203
+ "step": 6500
204
+ },
205
+ {
206
+ "epoch": 0.8,
207
+ "learning_rate": 1.4649545211342966e-05,
208
+ "loss": 0.0243,
209
+ "step": 7000
210
+ },
211
+ {
212
+ "epoch": 0.8,
213
+ "eval_accuracy": 0.9946970131284756,
214
+ "eval_loss": 0.02222474291920662,
215
+ "eval_runtime": 343.6254,
216
+ "eval_samples_per_second": 101.523,
217
+ "eval_steps_per_second": 6.347,
218
+ "step": 7000
219
+ },
220
+ {
221
+ "epoch": 0.86,
222
+ "learning_rate": 1.4267369869296034e-05,
223
+ "loss": 0.0204,
224
+ "step": 7500
225
+ },
226
+ {
227
+ "epoch": 0.86,
228
+ "eval_accuracy": 0.9944390299833744,
229
+ "eval_loss": 0.020777888596057892,
230
+ "eval_runtime": 342.3258,
231
+ "eval_samples_per_second": 101.909,
232
+ "eval_steps_per_second": 6.371,
233
+ "step": 7500
234
+ },
235
+ {
236
+ "epoch": 0.92,
237
+ "learning_rate": 1.3885194527249105e-05,
238
+ "loss": 0.0219,
239
+ "step": 8000
240
+ },
241
+ {
242
+ "epoch": 0.92,
243
+ "eval_accuracy": 0.9955569569454795,
244
+ "eval_loss": 0.023240169510245323,
245
+ "eval_runtime": 340.3493,
246
+ "eval_samples_per_second": 102.501,
247
+ "eval_steps_per_second": 6.408,
248
+ "step": 8000
249
+ },
250
+ {
251
+ "epoch": 0.97,
252
+ "learning_rate": 1.3503019185202171e-05,
253
+ "loss": 0.0298,
254
+ "step": 8500
255
+ },
256
+ {
257
+ "epoch": 0.97,
258
+ "eval_accuracy": 0.9955282921515795,
259
+ "eval_loss": 0.01757008023560047,
260
+ "eval_runtime": 307.8345,
261
+ "eval_samples_per_second": 113.327,
262
+ "eval_steps_per_second": 7.085,
263
+ "step": 8500
264
+ },
265
+ {
266
+ "epoch": 1.03,
267
+ "learning_rate": 1.312084384315524e-05,
268
+ "loss": 0.0098,
269
+ "step": 9000
270
+ },
271
+ {
272
+ "epoch": 1.03,
273
+ "eval_accuracy": 0.9951843146247779,
274
+ "eval_loss": 0.026897920295596123,
275
+ "eval_runtime": 318.0815,
276
+ "eval_samples_per_second": 109.676,
277
+ "eval_steps_per_second": 6.857,
278
+ "step": 9000
279
  }
280
  ],
281
  "max_steps": 26166,
282
  "num_train_epochs": 3,
283
+ "total_flos": 1.907424566727475e+16,
284
  "trial_name": null,
285
  "trial_params": null
286
  }