DuongTrongChi commited on
Commit
c58328a
·
verified ·
1 Parent(s): 23eb1c5

Training in progress, step 837, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01b33c150c7a52d05b4ba5d81c5000a7371ef788c1d9a2846988ba7c563933ce
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11816752771862021d1f1c094679a1a902cd618f90af4e8dba0e4478f5f39b0a
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa90157b742bd4870000433650a070e605223d12eb54723b9405d5652fbb97d5
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39351dce492da1ec76082c4d982fa0e400b6d4b4fec021f8577941b139b31751
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba56b90c0f94d74c6e2b030957d920e1d37fa2b12e4f58e25953248835d1d8db
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e3bbc37b1c4948deb4e7c7ff80c3871b0fd3eb0b6501980c9b6ab76dcbae87d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8746227709190673,
5
  "eval_steps": 500,
6
- "global_step": 797,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5586,6 +5586,286 @@
5586
  "learning_rate": 2.81134401972873e-06,
5587
  "loss": 1.1065,
5588
  "step": 797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5589
  }
5590
  ],
5591
  "logging_steps": 1,
@@ -5605,7 +5885,7 @@
5605
  "attributes": {}
5606
  }
5607
  },
5608
- "total_flos": 8.267146853068431e+17,
5609
  "train_batch_size": 4,
5610
  "trial_name": null,
5611
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9185185185185185,
5
  "eval_steps": 500,
6
+ "global_step": 837,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5586
  "learning_rate": 2.81134401972873e-06,
5587
  "loss": 1.1065,
5588
  "step": 797
5589
+ },
5590
+ {
5591
+ "epoch": 0.8757201646090536,
5592
+ "grad_norm": 0.13176120817661285,
5593
+ "learning_rate": 2.7866831072749695e-06,
5594
+ "loss": 1.1029,
5595
+ "step": 798
5596
+ },
5597
+ {
5598
+ "epoch": 0.8768175582990397,
5599
+ "grad_norm": 0.1394006758928299,
5600
+ "learning_rate": 2.7620221948212084e-06,
5601
+ "loss": 1.0872,
5602
+ "step": 799
5603
+ },
5604
+ {
5605
+ "epoch": 0.877914951989026,
5606
+ "grad_norm": 0.16219663619995117,
5607
+ "learning_rate": 2.7373612823674478e-06,
5608
+ "loss": 1.1006,
5609
+ "step": 800
5610
+ },
5611
+ {
5612
+ "epoch": 0.8790123456790123,
5613
+ "grad_norm": 0.13275958597660065,
5614
+ "learning_rate": 2.712700369913687e-06,
5615
+ "loss": 1.1087,
5616
+ "step": 801
5617
+ },
5618
+ {
5619
+ "epoch": 0.8801097393689986,
5620
+ "grad_norm": 0.15880566835403442,
5621
+ "learning_rate": 2.688039457459926e-06,
5622
+ "loss": 1.0905,
5623
+ "step": 802
5624
+ },
5625
+ {
5626
+ "epoch": 0.8812071330589849,
5627
+ "grad_norm": 0.14361722767353058,
5628
+ "learning_rate": 2.6633785450061657e-06,
5629
+ "loss": 1.0701,
5630
+ "step": 803
5631
+ },
5632
+ {
5633
+ "epoch": 0.8823045267489712,
5634
+ "grad_norm": 0.13469044864177704,
5635
+ "learning_rate": 2.6387176325524042e-06,
5636
+ "loss": 1.1495,
5637
+ "step": 804
5638
+ },
5639
+ {
5640
+ "epoch": 0.8834019204389575,
5641
+ "grad_norm": 0.16092169284820557,
5642
+ "learning_rate": 2.614056720098644e-06,
5643
+ "loss": 1.074,
5644
+ "step": 805
5645
+ },
5646
+ {
5647
+ "epoch": 0.8844993141289438,
5648
+ "grad_norm": 0.13376876711845398,
5649
+ "learning_rate": 2.5893958076448833e-06,
5650
+ "loss": 1.1405,
5651
+ "step": 806
5652
+ },
5653
+ {
5654
+ "epoch": 0.8855967078189301,
5655
+ "grad_norm": 0.1367831826210022,
5656
+ "learning_rate": 2.5647348951911222e-06,
5657
+ "loss": 1.0685,
5658
+ "step": 807
5659
+ },
5660
+ {
5661
+ "epoch": 0.8866941015089164,
5662
+ "grad_norm": 0.13057412207126617,
5663
+ "learning_rate": 2.5400739827373616e-06,
5664
+ "loss": 1.133,
5665
+ "step": 808
5666
+ },
5667
+ {
5668
+ "epoch": 0.8877914951989027,
5669
+ "grad_norm": 0.1330074518918991,
5670
+ "learning_rate": 2.5154130702836005e-06,
5671
+ "loss": 1.2354,
5672
+ "step": 809
5673
+ },
5674
+ {
5675
+ "epoch": 0.8888888888888888,
5676
+ "grad_norm": 0.15305842459201813,
5677
+ "learning_rate": 2.49075215782984e-06,
5678
+ "loss": 1.0574,
5679
+ "step": 810
5680
+ },
5681
+ {
5682
+ "epoch": 0.8899862825788751,
5683
+ "grad_norm": 0.13910268247127533,
5684
+ "learning_rate": 2.466091245376079e-06,
5685
+ "loss": 1.0733,
5686
+ "step": 811
5687
+ },
5688
+ {
5689
+ "epoch": 0.8910836762688614,
5690
+ "grad_norm": 0.13843494653701782,
5691
+ "learning_rate": 2.441430332922318e-06,
5692
+ "loss": 1.0353,
5693
+ "step": 812
5694
+ },
5695
+ {
5696
+ "epoch": 0.8921810699588477,
5697
+ "grad_norm": 0.14887547492980957,
5698
+ "learning_rate": 2.416769420468558e-06,
5699
+ "loss": 0.9972,
5700
+ "step": 813
5701
+ },
5702
+ {
5703
+ "epoch": 0.893278463648834,
5704
+ "grad_norm": 0.13981156051158905,
5705
+ "learning_rate": 2.3921085080147967e-06,
5706
+ "loss": 1.2125,
5707
+ "step": 814
5708
+ },
5709
+ {
5710
+ "epoch": 0.8943758573388203,
5711
+ "grad_norm": 0.13580431044101715,
5712
+ "learning_rate": 2.367447595561036e-06,
5713
+ "loss": 1.2606,
5714
+ "step": 815
5715
+ },
5716
+ {
5717
+ "epoch": 0.8954732510288066,
5718
+ "grad_norm": 0.14896319806575775,
5719
+ "learning_rate": 2.342786683107275e-06,
5720
+ "loss": 1.1067,
5721
+ "step": 816
5722
+ },
5723
+ {
5724
+ "epoch": 0.8965706447187929,
5725
+ "grad_norm": 0.1296452283859253,
5726
+ "learning_rate": 2.3181257706535143e-06,
5727
+ "loss": 1.0699,
5728
+ "step": 817
5729
+ },
5730
+ {
5731
+ "epoch": 0.8976680384087792,
5732
+ "grad_norm": 0.145808607339859,
5733
+ "learning_rate": 2.2934648581997536e-06,
5734
+ "loss": 1.1611,
5735
+ "step": 818
5736
+ },
5737
+ {
5738
+ "epoch": 0.8987654320987655,
5739
+ "grad_norm": 0.13980808854103088,
5740
+ "learning_rate": 2.268803945745993e-06,
5741
+ "loss": 1.0291,
5742
+ "step": 819
5743
+ },
5744
+ {
5745
+ "epoch": 0.8998628257887518,
5746
+ "grad_norm": 0.13322117924690247,
5747
+ "learning_rate": 2.244143033292232e-06,
5748
+ "loss": 1.0608,
5749
+ "step": 820
5750
+ },
5751
+ {
5752
+ "epoch": 0.900960219478738,
5753
+ "grad_norm": 0.1385853886604309,
5754
+ "learning_rate": 2.219482120838471e-06,
5755
+ "loss": 1.0595,
5756
+ "step": 821
5757
+ },
5758
+ {
5759
+ "epoch": 0.9020576131687242,
5760
+ "grad_norm": 0.16439485549926758,
5761
+ "learning_rate": 2.1948212083847105e-06,
5762
+ "loss": 1.031,
5763
+ "step": 822
5764
+ },
5765
+ {
5766
+ "epoch": 0.9031550068587105,
5767
+ "grad_norm": 0.12988966703414917,
5768
+ "learning_rate": 2.1701602959309494e-06,
5769
+ "loss": 1.0905,
5770
+ "step": 823
5771
+ },
5772
+ {
5773
+ "epoch": 0.9042524005486968,
5774
+ "grad_norm": 0.13069093227386475,
5775
+ "learning_rate": 2.1454993834771887e-06,
5776
+ "loss": 1.1536,
5777
+ "step": 824
5778
+ },
5779
+ {
5780
+ "epoch": 0.9053497942386831,
5781
+ "grad_norm": 0.13863211870193481,
5782
+ "learning_rate": 2.120838471023428e-06,
5783
+ "loss": 1.1898,
5784
+ "step": 825
5785
+ },
5786
+ {
5787
+ "epoch": 0.9064471879286694,
5788
+ "grad_norm": 0.14132994413375854,
5789
+ "learning_rate": 2.0961775585696674e-06,
5790
+ "loss": 1.1759,
5791
+ "step": 826
5792
+ },
5793
+ {
5794
+ "epoch": 0.9075445816186557,
5795
+ "grad_norm": 0.14824488759040833,
5796
+ "learning_rate": 2.0715166461159063e-06,
5797
+ "loss": 1.0744,
5798
+ "step": 827
5799
+ },
5800
+ {
5801
+ "epoch": 0.908641975308642,
5802
+ "grad_norm": 0.1388639658689499,
5803
+ "learning_rate": 2.0468557336621456e-06,
5804
+ "loss": 1.0687,
5805
+ "step": 828
5806
+ },
5807
+ {
5808
+ "epoch": 0.9097393689986283,
5809
+ "grad_norm": 0.14056843519210815,
5810
+ "learning_rate": 2.022194821208385e-06,
5811
+ "loss": 1.1299,
5812
+ "step": 829
5813
+ },
5814
+ {
5815
+ "epoch": 0.9108367626886146,
5816
+ "grad_norm": 0.1364564299583435,
5817
+ "learning_rate": 1.9975339087546243e-06,
5818
+ "loss": 1.1216,
5819
+ "step": 830
5820
+ },
5821
+ {
5822
+ "epoch": 0.9119341563786009,
5823
+ "grad_norm": 0.14670343697071075,
5824
+ "learning_rate": 1.9728729963008632e-06,
5825
+ "loss": 1.0785,
5826
+ "step": 831
5827
+ },
5828
+ {
5829
+ "epoch": 0.9130315500685872,
5830
+ "grad_norm": 0.13665646314620972,
5831
+ "learning_rate": 1.9482120838471025e-06,
5832
+ "loss": 1.0355,
5833
+ "step": 832
5834
+ },
5835
+ {
5836
+ "epoch": 0.9141289437585733,
5837
+ "grad_norm": 0.1377921998500824,
5838
+ "learning_rate": 1.9235511713933415e-06,
5839
+ "loss": 1.1445,
5840
+ "step": 833
5841
+ },
5842
+ {
5843
+ "epoch": 0.9152263374485596,
5844
+ "grad_norm": 0.12789370119571686,
5845
+ "learning_rate": 1.8988902589395808e-06,
5846
+ "loss": 1.1242,
5847
+ "step": 834
5848
+ },
5849
+ {
5850
+ "epoch": 0.9163237311385459,
5851
+ "grad_norm": 0.12622785568237305,
5852
+ "learning_rate": 1.8742293464858201e-06,
5853
+ "loss": 1.1358,
5854
+ "step": 835
5855
+ },
5856
+ {
5857
+ "epoch": 0.9174211248285322,
5858
+ "grad_norm": 0.14954856038093567,
5859
+ "learning_rate": 1.8495684340320595e-06,
5860
+ "loss": 1.0822,
5861
+ "step": 836
5862
+ },
5863
+ {
5864
+ "epoch": 0.9185185185185185,
5865
+ "grad_norm": 0.12256734073162079,
5866
+ "learning_rate": 1.8249075215782986e-06,
5867
+ "loss": 1.0968,
5868
+ "step": 837
5869
  }
5870
  ],
5871
  "logging_steps": 1,
 
5885
  "attributes": {}
5886
  }
5887
  },
5888
+ "total_flos": 8.67918284112937e+17,
5889
  "train_batch_size": 4,
5890
  "trial_name": null,
5891
  "trial_params": null