DuongTrongChi commited on
Commit
8702ce7
1 Parent(s): 32ce1d5

Training in progress, step 116, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d60875408f1ff4aae9279ba6e12943e7ee095a5db56e71deafd1b8e936b74742
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b570962ede4265c9488fa98dcd00095b1ca3d903d14f064ee79d3cb2379651f4
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2b81b7b1b1dc97162752ac7389fd94b8b35c908dcae5b595a54a4ddfbadad95
3
  size 50675156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68afdadf9dcafbea18732f32b8ac5fa2ad488bf587daf988c9af28727179daa0
3
  size 50675156
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00c20447fd261a108ac7b92b3fd7e46fb934233a7e75325b07a4d18c95a93b0f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e779822c485743db355cfc0cc7805b58345253d12afcfcd7953cd3834152cb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1227285179435668,
5
  "eval_steps": 500,
6
- "global_step": 84,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -595,6 +595,230 @@
595
  "learning_rate": 1.6800000000000002e-05,
596
  "loss": 1.6169,
597
  "step": 84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  }
599
  ],
600
  "logging_steps": 1,
@@ -614,7 +838,7 @@
614
  "attributes": {}
615
  }
616
  },
617
- "total_flos": 9.4533080190677e+16,
618
  "train_batch_size": 4,
619
  "trial_name": null,
620
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.16948223906492557,
5
  "eval_steps": 500,
6
+ "global_step": 116,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
595
  "learning_rate": 1.6800000000000002e-05,
596
  "loss": 1.6169,
597
  "step": 84
598
+ },
599
+ {
600
+ "epoch": 0.12418957172860925,
601
+ "grad_norm": 0.3446858525276184,
602
+ "learning_rate": 1.7e-05,
603
+ "loss": 1.6721,
604
+ "step": 85
605
+ },
606
+ {
607
+ "epoch": 0.12565062551365172,
608
+ "grad_norm": 0.33179470896720886,
609
+ "learning_rate": 1.72e-05,
610
+ "loss": 1.578,
611
+ "step": 86
612
+ },
613
+ {
614
+ "epoch": 0.12711167929869419,
615
+ "grad_norm": 0.3791605830192566,
616
+ "learning_rate": 1.7400000000000003e-05,
617
+ "loss": 1.6055,
618
+ "step": 87
619
+ },
620
+ {
621
+ "epoch": 0.12857273308373665,
622
+ "grad_norm": 0.5245212912559509,
623
+ "learning_rate": 1.76e-05,
624
+ "loss": 1.626,
625
+ "step": 88
626
+ },
627
+ {
628
+ "epoch": 0.13003378686877912,
629
+ "grad_norm": 0.43215855956077576,
630
+ "learning_rate": 1.7800000000000002e-05,
631
+ "loss": 1.6177,
632
+ "step": 89
633
+ },
634
+ {
635
+ "epoch": 0.13149484065382158,
636
+ "grad_norm": 0.4050828516483307,
637
+ "learning_rate": 1.8e-05,
638
+ "loss": 1.4903,
639
+ "step": 90
640
+ },
641
+ {
642
+ "epoch": 0.13295589443886402,
643
+ "grad_norm": 0.399501234292984,
644
+ "learning_rate": 1.8200000000000002e-05,
645
+ "loss": 1.6079,
646
+ "step": 91
647
+ },
648
+ {
649
+ "epoch": 0.13441694822390649,
650
+ "grad_norm": 0.439622700214386,
651
+ "learning_rate": 1.8400000000000003e-05,
652
+ "loss": 1.5405,
653
+ "step": 92
654
+ },
655
+ {
656
+ "epoch": 0.13587800200894895,
657
+ "grad_norm": 0.4368193447589874,
658
+ "learning_rate": 1.86e-05,
659
+ "loss": 1.415,
660
+ "step": 93
661
+ },
662
+ {
663
+ "epoch": 0.13733905579399142,
664
+ "grad_norm": 0.3644118010997772,
665
+ "learning_rate": 1.88e-05,
666
+ "loss": 1.525,
667
+ "step": 94
668
+ },
669
+ {
670
+ "epoch": 0.13880010957903388,
671
+ "grad_norm": 0.3868708312511444,
672
+ "learning_rate": 1.9e-05,
673
+ "loss": 1.4903,
674
+ "step": 95
675
+ },
676
+ {
677
+ "epoch": 0.14026116336407635,
678
+ "grad_norm": 0.43034952878952026,
679
+ "learning_rate": 1.9200000000000003e-05,
680
+ "loss": 1.4605,
681
+ "step": 96
682
+ },
683
+ {
684
+ "epoch": 0.1417222171491188,
685
+ "grad_norm": 0.4087560772895813,
686
+ "learning_rate": 1.94e-05,
687
+ "loss": 1.3544,
688
+ "step": 97
689
+ },
690
+ {
691
+ "epoch": 0.14318327093416128,
692
+ "grad_norm": 0.29801666736602783,
693
+ "learning_rate": 1.9600000000000002e-05,
694
+ "loss": 1.4098,
695
+ "step": 98
696
+ },
697
+ {
698
+ "epoch": 0.14464432471920372,
699
+ "grad_norm": 0.275905966758728,
700
+ "learning_rate": 1.98e-05,
701
+ "loss": 1.4732,
702
+ "step": 99
703
+ },
704
+ {
705
+ "epoch": 0.14610537850424618,
706
+ "grad_norm": 0.32271912693977356,
707
+ "learning_rate": 2e-05,
708
+ "loss": 1.4156,
709
+ "step": 100
710
+ },
711
+ {
712
+ "epoch": 0.14756643228928865,
713
+ "grad_norm": 0.3191397190093994,
714
+ "learning_rate": 1.9965753424657538e-05,
715
+ "loss": 1.3254,
716
+ "step": 101
717
+ },
718
+ {
719
+ "epoch": 0.1490274860743311,
720
+ "grad_norm": 0.26260653138160706,
721
+ "learning_rate": 1.993150684931507e-05,
722
+ "loss": 1.3877,
723
+ "step": 102
724
+ },
725
+ {
726
+ "epoch": 0.15048853985937358,
727
+ "grad_norm": 0.2782766819000244,
728
+ "learning_rate": 1.9897260273972604e-05,
729
+ "loss": 1.3683,
730
+ "step": 103
731
+ },
732
+ {
733
+ "epoch": 0.15194959364441604,
734
+ "grad_norm": 0.2510565221309662,
735
+ "learning_rate": 1.9863013698630137e-05,
736
+ "loss": 1.3996,
737
+ "step": 104
738
+ },
739
+ {
740
+ "epoch": 0.1534106474294585,
741
+ "grad_norm": 0.2523151934146881,
742
+ "learning_rate": 1.9828767123287674e-05,
743
+ "loss": 1.3192,
744
+ "step": 105
745
+ },
746
+ {
747
+ "epoch": 0.15487170121450095,
748
+ "grad_norm": 0.20559488236904144,
749
+ "learning_rate": 1.9794520547945207e-05,
750
+ "loss": 1.2096,
751
+ "step": 106
752
+ },
753
+ {
754
+ "epoch": 0.1563327549995434,
755
+ "grad_norm": 0.17568816244602203,
756
+ "learning_rate": 1.9760273972602743e-05,
757
+ "loss": 1.3795,
758
+ "step": 107
759
+ },
760
+ {
761
+ "epoch": 0.15779380878458588,
762
+ "grad_norm": 0.1778278350830078,
763
+ "learning_rate": 1.9726027397260276e-05,
764
+ "loss": 1.3146,
765
+ "step": 108
766
+ },
767
+ {
768
+ "epoch": 0.15925486256962834,
769
+ "grad_norm": 0.18488670885562897,
770
+ "learning_rate": 1.969178082191781e-05,
771
+ "loss": 1.4105,
772
+ "step": 109
773
+ },
774
+ {
775
+ "epoch": 0.1607159163546708,
776
+ "grad_norm": 0.1593291312456131,
777
+ "learning_rate": 1.9657534246575346e-05,
778
+ "loss": 1.3054,
779
+ "step": 110
780
+ },
781
+ {
782
+ "epoch": 0.16217697013971327,
783
+ "grad_norm": 0.14311783015727997,
784
+ "learning_rate": 1.962328767123288e-05,
785
+ "loss": 1.3985,
786
+ "step": 111
787
+ },
788
+ {
789
+ "epoch": 0.16363802392475574,
790
+ "grad_norm": 0.14948627352714539,
791
+ "learning_rate": 1.9589041095890412e-05,
792
+ "loss": 1.3395,
793
+ "step": 112
794
+ },
795
+ {
796
+ "epoch": 0.1650990777097982,
797
+ "grad_norm": 0.14075608551502228,
798
+ "learning_rate": 1.9554794520547945e-05,
799
+ "loss": 1.3868,
800
+ "step": 113
801
+ },
802
+ {
803
+ "epoch": 0.16656013149484064,
804
+ "grad_norm": 0.14439420402050018,
805
+ "learning_rate": 1.952054794520548e-05,
806
+ "loss": 1.2985,
807
+ "step": 114
808
+ },
809
+ {
810
+ "epoch": 0.1680211852798831,
811
+ "grad_norm": 0.13425147533416748,
812
+ "learning_rate": 1.9486301369863014e-05,
813
+ "loss": 1.3855,
814
+ "step": 115
815
+ },
816
+ {
817
+ "epoch": 0.16948223906492557,
818
+ "grad_norm": 0.1291724145412445,
819
+ "learning_rate": 1.945205479452055e-05,
820
+ "loss": 1.2942,
821
+ "step": 116
822
  }
823
  ],
824
  "logging_steps": 1,
 
838
  "attributes": {}
839
  }
840
  },
841
+ "total_flos": 1.304737835336663e+17,
842
  "train_batch_size": 4,
843
  "trial_name": null,
844
  "trial_params": null