schnell commited on
Commit
6b5e602
1 Parent(s): 43da16c

Training in progress, epoch 3

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fe7b93abf02af995010aa8e52e985082cb35427475535754ba46e0f79bfe6b7
3
- size 236491077
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ad3d537ea4fd7d6ad4fa6cf73b1b40dd110b483d065f2705e6949affdc6cf17
3
+ size 236491269
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc972ecc1329e0803175ccbf8d455608f73797136d228b25b2d6c64e55403179
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2906e434c2be5553d8ba690fa00f38995006c684a8b4fbef8476c7418f239877
3
  size 118253458
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fa70ecb25666f6769a2077ed150c9e52861a4143626716aebc146c3d3d8cd65
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fddd988ead107be498ca0838e5d6931249bb674b40ce6296875748270d271cc4
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27f16da81f42c208591e42d4a624accc6adcfabf4b156667c067c8a5a08012ca
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ac1a1a1f98b3964d76c33e861170daff6e880017ea91dcfeb6f1af152554e5
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4e814a01103e7d042492e6a27b7700dc34e66d68095fbabd7c929b5bd6b2625
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29cf36739c6a6691ce7e0e701bc722960d20ddd1e069bdfc942253c223daa611
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
- "global_step": 45880,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -570,11 +570,296 @@
570
  "eval_samples_per_second": 604.475,
571
  "eval_steps_per_second": 37.78,
572
  "step": 45880
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
573
  }
574
  ],
575
  "max_steps": 321160,
576
  "num_train_epochs": 14,
577
- "total_flos": 3.488545026395035e+17,
578
  "trial_name": null,
579
  "trial_params": null
580
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 68820,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
570
  "eval_samples_per_second": 604.475,
571
  "eval_steps_per_second": 37.78,
572
  "step": 45880
573
+ },
574
+ {
575
+ "epoch": 2.01,
576
+ "learning_rate": 8.654780026922643e-05,
577
+ "loss": 1.9157,
578
+ "step": 46000
579
+ },
580
+ {
581
+ "epoch": 2.03,
582
+ "learning_rate": 8.639054184961062e-05,
583
+ "loss": 1.9106,
584
+ "step": 46500
585
+ },
586
+ {
587
+ "epoch": 2.05,
588
+ "learning_rate": 8.623328342999485e-05,
589
+ "loss": 1.9071,
590
+ "step": 47000
591
+ },
592
+ {
593
+ "epoch": 2.07,
594
+ "learning_rate": 8.607602501037906e-05,
595
+ "loss": 1.9029,
596
+ "step": 47500
597
+ },
598
+ {
599
+ "epoch": 2.09,
600
+ "learning_rate": 8.59190811076025e-05,
601
+ "loss": 1.9037,
602
+ "step": 48000
603
+ },
604
+ {
605
+ "epoch": 2.11,
606
+ "learning_rate": 8.576182268798671e-05,
607
+ "loss": 1.9035,
608
+ "step": 48500
609
+ },
610
+ {
611
+ "epoch": 2.14,
612
+ "learning_rate": 8.560456426837094e-05,
613
+ "loss": 1.8995,
614
+ "step": 49000
615
+ },
616
+ {
617
+ "epoch": 2.16,
618
+ "learning_rate": 8.544730584875515e-05,
619
+ "loss": 1.8955,
620
+ "step": 49500
621
+ },
622
+ {
623
+ "epoch": 2.18,
624
+ "learning_rate": 8.529036194597859e-05,
625
+ "loss": 1.894,
626
+ "step": 50000
627
+ },
628
+ {
629
+ "epoch": 2.2,
630
+ "learning_rate": 8.51331035263628e-05,
631
+ "loss": 1.8871,
632
+ "step": 50500
633
+ },
634
+ {
635
+ "epoch": 2.22,
636
+ "learning_rate": 8.497584510674703e-05,
637
+ "loss": 1.8897,
638
+ "step": 51000
639
+ },
640
+ {
641
+ "epoch": 2.24,
642
+ "learning_rate": 8.481858668713124e-05,
643
+ "loss": 1.888,
644
+ "step": 51500
645
+ },
646
+ {
647
+ "epoch": 2.27,
648
+ "learning_rate": 8.466164278435468e-05,
649
+ "loss": 1.8866,
650
+ "step": 52000
651
+ },
652
+ {
653
+ "epoch": 2.29,
654
+ "learning_rate": 8.450438436473889e-05,
655
+ "loss": 1.8829,
656
+ "step": 52500
657
+ },
658
+ {
659
+ "epoch": 2.31,
660
+ "learning_rate": 8.43471259451231e-05,
661
+ "loss": 1.8787,
662
+ "step": 53000
663
+ },
664
+ {
665
+ "epoch": 2.33,
666
+ "learning_rate": 8.418986752550731e-05,
667
+ "loss": 1.8743,
668
+ "step": 53500
669
+ },
670
+ {
671
+ "epoch": 2.35,
672
+ "learning_rate": 8.403260910589154e-05,
673
+ "loss": 1.8758,
674
+ "step": 54000
675
+ },
676
+ {
677
+ "epoch": 2.38,
678
+ "learning_rate": 8.387566520311498e-05,
679
+ "loss": 1.8728,
680
+ "step": 54500
681
+ },
682
+ {
683
+ "epoch": 2.4,
684
+ "learning_rate": 8.371840678349919e-05,
685
+ "loss": 1.8713,
686
+ "step": 55000
687
+ },
688
+ {
689
+ "epoch": 2.42,
690
+ "learning_rate": 8.35611483638834e-05,
691
+ "loss": 1.8675,
692
+ "step": 55500
693
+ },
694
+ {
695
+ "epoch": 2.44,
696
+ "learning_rate": 8.340388994426763e-05,
697
+ "loss": 1.8653,
698
+ "step": 56000
699
+ },
700
+ {
701
+ "epoch": 2.46,
702
+ "learning_rate": 8.324694604149107e-05,
703
+ "loss": 1.8635,
704
+ "step": 56500
705
+ },
706
+ {
707
+ "epoch": 2.48,
708
+ "learning_rate": 8.308968762187528e-05,
709
+ "loss": 1.8589,
710
+ "step": 57000
711
+ },
712
+ {
713
+ "epoch": 2.51,
714
+ "learning_rate": 8.293242920225949e-05,
715
+ "loss": 1.8585,
716
+ "step": 57500
717
+ },
718
+ {
719
+ "epoch": 2.53,
720
+ "learning_rate": 8.27751707826437e-05,
721
+ "loss": 1.8592,
722
+ "step": 58000
723
+ },
724
+ {
725
+ "epoch": 2.55,
726
+ "learning_rate": 8.261822687986716e-05,
727
+ "loss": 1.8551,
728
+ "step": 58500
729
+ },
730
+ {
731
+ "epoch": 2.57,
732
+ "learning_rate": 8.246096846025136e-05,
733
+ "loss": 1.8564,
734
+ "step": 59000
735
+ },
736
+ {
737
+ "epoch": 2.59,
738
+ "learning_rate": 8.230371004063558e-05,
739
+ "loss": 1.851,
740
+ "step": 59500
741
+ },
742
+ {
743
+ "epoch": 2.62,
744
+ "learning_rate": 8.214645162101979e-05,
745
+ "loss": 1.8469,
746
+ "step": 60000
747
+ },
748
+ {
749
+ "epoch": 2.64,
750
+ "learning_rate": 8.198950771824324e-05,
751
+ "loss": 1.8461,
752
+ "step": 60500
753
+ },
754
+ {
755
+ "epoch": 2.66,
756
+ "learning_rate": 8.183224929862745e-05,
757
+ "loss": 1.8487,
758
+ "step": 61000
759
+ },
760
+ {
761
+ "epoch": 2.68,
762
+ "learning_rate": 8.167499087901167e-05,
763
+ "loss": 1.8435,
764
+ "step": 61500
765
+ },
766
+ {
767
+ "epoch": 2.7,
768
+ "learning_rate": 8.151773245939588e-05,
769
+ "loss": 1.8403,
770
+ "step": 62000
771
+ },
772
+ {
773
+ "epoch": 2.72,
774
+ "learning_rate": 8.136078855661933e-05,
775
+ "loss": 1.8435,
776
+ "step": 62500
777
+ },
778
+ {
779
+ "epoch": 2.75,
780
+ "learning_rate": 8.120353013700354e-05,
781
+ "loss": 1.8409,
782
+ "step": 63000
783
+ },
784
+ {
785
+ "epoch": 2.77,
786
+ "learning_rate": 8.104627171738776e-05,
787
+ "loss": 1.8359,
788
+ "step": 63500
789
+ },
790
+ {
791
+ "epoch": 2.79,
792
+ "learning_rate": 8.088901329777196e-05,
793
+ "loss": 1.833,
794
+ "step": 64000
795
+ },
796
+ {
797
+ "epoch": 2.81,
798
+ "learning_rate": 8.073206939499542e-05,
799
+ "loss": 1.8336,
800
+ "step": 64500
801
+ },
802
+ {
803
+ "epoch": 2.83,
804
+ "learning_rate": 8.057481097537963e-05,
805
+ "loss": 1.8348,
806
+ "step": 65000
807
+ },
808
+ {
809
+ "epoch": 2.86,
810
+ "learning_rate": 8.041755255576384e-05,
811
+ "loss": 1.8302,
812
+ "step": 65500
813
+ },
814
+ {
815
+ "epoch": 2.88,
816
+ "learning_rate": 8.026029413614805e-05,
817
+ "loss": 1.8253,
818
+ "step": 66000
819
+ },
820
+ {
821
+ "epoch": 2.9,
822
+ "learning_rate": 8.01033502333715e-05,
823
+ "loss": 1.8225,
824
+ "step": 66500
825
+ },
826
+ {
827
+ "epoch": 2.92,
828
+ "learning_rate": 7.99460918137557e-05,
829
+ "loss": 1.8245,
830
+ "step": 67000
831
+ },
832
+ {
833
+ "epoch": 2.94,
834
+ "learning_rate": 7.978883339413993e-05,
835
+ "loss": 1.8246,
836
+ "step": 67500
837
+ },
838
+ {
839
+ "epoch": 2.96,
840
+ "learning_rate": 7.963157497452414e-05,
841
+ "loss": 1.8203,
842
+ "step": 68000
843
+ },
844
+ {
845
+ "epoch": 2.99,
846
+ "learning_rate": 7.947463107174758e-05,
847
+ "loss": 1.8178,
848
+ "step": 68500
849
+ },
850
+ {
851
+ "epoch": 3.0,
852
+ "eval_accuracy": 0.6562844667040338,
853
+ "eval_loss": 1.6895612478256226,
854
+ "eval_runtime": 294.9578,
855
+ "eval_samples_per_second": 603.314,
856
+ "eval_steps_per_second": 37.707,
857
+ "step": 68820
858
  }
859
  ],
860
  "max_steps": 321160,
861
  "num_train_epochs": 14,
862
+ "total_flos": 5.2328760513448845e+17,
863
  "trial_name": null,
864
  "trial_params": null
865
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc972ecc1329e0803175ccbf8d455608f73797136d228b25b2d6c64e55403179
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2906e434c2be5553d8ba690fa00f38995006c684a8b4fbef8476c7418f239877
3
  size 118253458
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:666ec60d0078a18e9ab7268ca3acc069e6bcfd168d1a61b253913169617035c1
3
- size 18890
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bf4c282d434ce5ba540303c16207d3c8f5d389184c1a3bbf4dd4df23675b929
3
+ size 26579