schnell commited on
Commit
43da16c
1 Parent(s): 5e3d17f

Training in progress, epoch 2

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff48857e140935905f2d51dd058038971b29df48f11e86540faddcb8cb55788c
3
  size 236491077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fe7b93abf02af995010aa8e52e985082cb35427475535754ba46e0f79bfe6b7
3
  size 236491077
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4803f9e0db327256eb501e38588125bc88c11e1b720f56b57560c04d0edfb58a
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc972ecc1329e0803175ccbf8d455608f73797136d228b25b2d6c64e55403179
3
  size 118253458
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6704fd65015a992608b80d84ae3a33c9753819f994516e6b0899e5280afdced5
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fa70ecb25666f6769a2077ed150c9e52861a4143626716aebc146c3d3d8cd65
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c55418e1401c7ce23c9d42366610b1a8c2a2596e8b41e0dce8eff0736e359ea
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27f16da81f42c208591e42d4a624accc6adcfabf4b156667c067c8a5a08012ca
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2eaa85b3feb378678992330dad398ea68e96da3317f4bcf13c1084640b7ec5b
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4e814a01103e7d042492e6a27b7700dc34e66d68095fbabd7c929b5bd6b2625
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "global_step": 22940,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -285,11 +285,296 @@
285
  "eval_samples_per_second": 603.699,
286
  "eval_steps_per_second": 37.731,
287
  "step": 22940
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  }
289
  ],
290
  "max_steps": 321160,
291
  "num_train_epochs": 14,
292
- "total_flos": 1.7442453853030272e+17,
293
  "trial_name": null,
294
  "trial_params": null
295
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "global_step": 45880,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
285
  "eval_samples_per_second": 603.699,
286
  "eval_steps_per_second": 37.731,
287
  "step": 22940
288
+ },
289
+ {
290
+ "epoch": 1.0,
291
+ "learning_rate": 9.377822788632103e-05,
292
+ "loss": 2.1305,
293
+ "step": 23000
294
+ },
295
+ {
296
+ "epoch": 1.02,
297
+ "learning_rate": 9.362096946670525e-05,
298
+ "loss": 2.1245,
299
+ "step": 23500
300
+ },
301
+ {
302
+ "epoch": 1.05,
303
+ "learning_rate": 9.346371104708946e-05,
304
+ "loss": 2.1178,
305
+ "step": 24000
306
+ },
307
+ {
308
+ "epoch": 1.07,
309
+ "learning_rate": 9.330676714431291e-05,
310
+ "loss": 2.1102,
311
+ "step": 24500
312
+ },
313
+ {
314
+ "epoch": 1.09,
315
+ "learning_rate": 9.314950872469712e-05,
316
+ "loss": 2.1007,
317
+ "step": 25000
318
+ },
319
+ {
320
+ "epoch": 1.11,
321
+ "learning_rate": 9.299225030508134e-05,
322
+ "loss": 2.0966,
323
+ "step": 25500
324
+ },
325
+ {
326
+ "epoch": 1.13,
327
+ "learning_rate": 9.283499188546555e-05,
328
+ "loss": 2.0878,
329
+ "step": 26000
330
+ },
331
+ {
332
+ "epoch": 1.16,
333
+ "learning_rate": 9.2678047982689e-05,
334
+ "loss": 2.0814,
335
+ "step": 26500
336
+ },
337
+ {
338
+ "epoch": 1.18,
339
+ "learning_rate": 9.252078956307321e-05,
340
+ "loss": 2.0756,
341
+ "step": 27000
342
+ },
343
+ {
344
+ "epoch": 1.2,
345
+ "learning_rate": 9.236353114345743e-05,
346
+ "loss": 2.0685,
347
+ "step": 27500
348
+ },
349
+ {
350
+ "epoch": 1.22,
351
+ "learning_rate": 9.220627272384163e-05,
352
+ "loss": 2.065,
353
+ "step": 28000
354
+ },
355
+ {
356
+ "epoch": 1.24,
357
+ "learning_rate": 9.204932882106509e-05,
358
+ "loss": 2.0592,
359
+ "step": 28500
360
+ },
361
+ {
362
+ "epoch": 1.26,
363
+ "learning_rate": 9.18920704014493e-05,
364
+ "loss": 2.0527,
365
+ "step": 29000
366
+ },
367
+ {
368
+ "epoch": 1.29,
369
+ "learning_rate": 9.173481198183351e-05,
370
+ "loss": 2.0459,
371
+ "step": 29500
372
+ },
373
+ {
374
+ "epoch": 1.31,
375
+ "learning_rate": 9.157755356221772e-05,
376
+ "loss": 2.0443,
377
+ "step": 30000
378
+ },
379
+ {
380
+ "epoch": 1.33,
381
+ "learning_rate": 9.142060965944116e-05,
382
+ "loss": 2.0367,
383
+ "step": 30500
384
+ },
385
+ {
386
+ "epoch": 1.35,
387
+ "learning_rate": 9.126366575666462e-05,
388
+ "loss": 2.0322,
389
+ "step": 31000
390
+ },
391
+ {
392
+ "epoch": 1.37,
393
+ "learning_rate": 9.110640733704882e-05,
394
+ "loss": 2.0228,
395
+ "step": 31500
396
+ },
397
+ {
398
+ "epoch": 1.39,
399
+ "learning_rate": 9.094914891743304e-05,
400
+ "loss": 2.0209,
401
+ "step": 32000
402
+ },
403
+ {
404
+ "epoch": 1.42,
405
+ "learning_rate": 9.079189049781725e-05,
406
+ "loss": 2.0156,
407
+ "step": 32500
408
+ },
409
+ {
410
+ "epoch": 1.44,
411
+ "learning_rate": 9.063463207820148e-05,
412
+ "loss": 2.0141,
413
+ "step": 33000
414
+ },
415
+ {
416
+ "epoch": 1.46,
417
+ "learning_rate": 9.047737365858569e-05,
418
+ "loss": 2.0093,
419
+ "step": 33500
420
+ },
421
+ {
422
+ "epoch": 1.48,
423
+ "learning_rate": 9.032042975580913e-05,
424
+ "loss": 2.0052,
425
+ "step": 34000
426
+ },
427
+ {
428
+ "epoch": 1.5,
429
+ "learning_rate": 9.016317133619334e-05,
430
+ "loss": 1.9973,
431
+ "step": 34500
432
+ },
433
+ {
434
+ "epoch": 1.53,
435
+ "learning_rate": 9.000591291657756e-05,
436
+ "loss": 1.995,
437
+ "step": 35000
438
+ },
439
+ {
440
+ "epoch": 1.55,
441
+ "learning_rate": 8.984865449696177e-05,
442
+ "loss": 1.9908,
443
+ "step": 35500
444
+ },
445
+ {
446
+ "epoch": 1.57,
447
+ "learning_rate": 8.969139607734599e-05,
448
+ "loss": 1.986,
449
+ "step": 36000
450
+ },
451
+ {
452
+ "epoch": 1.59,
453
+ "learning_rate": 8.95341376577302e-05,
454
+ "loss": 1.9825,
455
+ "step": 36500
456
+ },
457
+ {
458
+ "epoch": 1.61,
459
+ "learning_rate": 8.937687923811441e-05,
460
+ "loss": 1.9754,
461
+ "step": 37000
462
+ },
463
+ {
464
+ "epoch": 1.63,
465
+ "learning_rate": 8.921993533533786e-05,
466
+ "loss": 1.9733,
467
+ "step": 37500
468
+ },
469
+ {
470
+ "epoch": 1.66,
471
+ "learning_rate": 8.906267691572208e-05,
472
+ "loss": 1.9679,
473
+ "step": 38000
474
+ },
475
+ {
476
+ "epoch": 1.68,
477
+ "learning_rate": 8.890541849610629e-05,
478
+ "loss": 1.9611,
479
+ "step": 38500
480
+ },
481
+ {
482
+ "epoch": 1.7,
483
+ "learning_rate": 8.87481600764905e-05,
484
+ "loss": 1.961,
485
+ "step": 39000
486
+ },
487
+ {
488
+ "epoch": 1.72,
489
+ "learning_rate": 8.859121617371395e-05,
490
+ "loss": 1.9588,
491
+ "step": 39500
492
+ },
493
+ {
494
+ "epoch": 1.74,
495
+ "learning_rate": 8.843395775409816e-05,
496
+ "loss": 1.9578,
497
+ "step": 40000
498
+ },
499
+ {
500
+ "epoch": 1.77,
501
+ "learning_rate": 8.827669933448237e-05,
502
+ "loss": 1.9528,
503
+ "step": 40500
504
+ },
505
+ {
506
+ "epoch": 1.79,
507
+ "learning_rate": 8.811944091486659e-05,
508
+ "loss": 1.9486,
509
+ "step": 41000
510
+ },
511
+ {
512
+ "epoch": 1.81,
513
+ "learning_rate": 8.796249701209002e-05,
514
+ "loss": 1.9443,
515
+ "step": 41500
516
+ },
517
+ {
518
+ "epoch": 1.83,
519
+ "learning_rate": 8.780523859247425e-05,
520
+ "loss": 1.9406,
521
+ "step": 42000
522
+ },
523
+ {
524
+ "epoch": 1.85,
525
+ "learning_rate": 8.764798017285846e-05,
526
+ "loss": 1.9372,
527
+ "step": 42500
528
+ },
529
+ {
530
+ "epoch": 1.87,
531
+ "learning_rate": 8.749072175324268e-05,
532
+ "loss": 1.9351,
533
+ "step": 43000
534
+ },
535
+ {
536
+ "epoch": 1.9,
537
+ "learning_rate": 8.733346333362689e-05,
538
+ "loss": 1.9265,
539
+ "step": 43500
540
+ },
541
+ {
542
+ "epoch": 1.92,
543
+ "learning_rate": 8.717651943085034e-05,
544
+ "loss": 1.9272,
545
+ "step": 44000
546
+ },
547
+ {
548
+ "epoch": 1.94,
549
+ "learning_rate": 8.701926101123455e-05,
550
+ "loss": 1.9268,
551
+ "step": 44500
552
+ },
553
+ {
554
+ "epoch": 1.96,
555
+ "learning_rate": 8.686200259161877e-05,
556
+ "loss": 1.9242,
557
+ "step": 45000
558
+ },
559
+ {
560
+ "epoch": 1.98,
561
+ "learning_rate": 8.670474417200297e-05,
562
+ "loss": 1.9234,
563
+ "step": 45500
564
+ },
565
+ {
566
+ "epoch": 2.0,
567
+ "eval_accuracy": 0.6411638490813204,
568
+ "eval_loss": 1.785447597503662,
569
+ "eval_runtime": 294.391,
570
+ "eval_samples_per_second": 604.475,
571
+ "eval_steps_per_second": 37.78,
572
+ "step": 45880
573
  }
574
  ],
575
  "max_steps": 321160,
576
  "num_train_epochs": 14,
577
+ "total_flos": 3.488545026395035e+17,
578
  "trial_name": null,
579
  "trial_params": null
580
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4803f9e0db327256eb501e38588125bc88c11e1b720f56b57560c04d0edfb58a
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc972ecc1329e0803175ccbf8d455608f73797136d228b25b2d6c64e55403179
3
  size 118253458
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d78301fc79b73c4bc3a25e318065e0b0fb19b15fc290523ddb23d8d132f535ce
3
- size 11201
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:666ec60d0078a18e9ab7268ca3acc069e6bcfd168d1a61b253913169617035c1
3
+ size 18890