DuongTrongChi commited on
Commit
7a68a8f
1 Parent(s): 91d5ac7

Training in progress, step 133, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b570962ede4265c9488fa98dcd00095b1ca3d903d14f064ee79d3cb2379651f4
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6b65aace9da077ee07a204876e8098d96dad47b8b4be8f51e9b9cbec2ff2393
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68afdadf9dcafbea18732f32b8ac5fa2ad488bf587daf988c9af28727179daa0
3
  size 50675156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527fada47e774dd945cf8bd8fa9cb84eca9c449c19271b31a9ecbc61d6166143
3
  size 50675156
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07e779822c485743db355cfc0cc7805b58345253d12afcfcd7953cd3834152cb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67c15c8690a2da73762f6bc4ec11beb5e387acb500a6b50782f5eba470e3275a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.16948223906492557,
5
  "eval_steps": 500,
6
- "global_step": 116,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -819,6 +819,125 @@
819
  "learning_rate": 1.945205479452055e-05,
820
  "loss": 1.2942,
821
  "step": 116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
  }
823
  ],
824
  "logging_steps": 1,
@@ -838,7 +957,7 @@
838
  "attributes": {}
839
  }
840
  },
841
- "total_flos": 1.304737835336663e+17,
842
  "train_batch_size": 4,
843
  "trial_name": null,
844
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.19432015341064743,
5
  "eval_steps": 500,
6
+ "global_step": 133,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
819
  "learning_rate": 1.945205479452055e-05,
820
  "loss": 1.2942,
821
  "step": 116
822
+ },
823
+ {
824
+ "epoch": 0.17094329284996804,
825
+ "grad_norm": 0.1350926160812378,
826
+ "learning_rate": 1.9417808219178084e-05,
827
+ "loss": 1.3649,
828
+ "step": 117
829
+ },
830
+ {
831
+ "epoch": 0.1724043466350105,
832
+ "grad_norm": 0.1309756338596344,
833
+ "learning_rate": 1.9383561643835617e-05,
834
+ "loss": 1.3241,
835
+ "step": 118
836
+ },
837
+ {
838
+ "epoch": 0.17386540042005297,
839
+ "grad_norm": 0.12676255404949188,
840
+ "learning_rate": 1.9349315068493153e-05,
841
+ "loss": 1.3773,
842
+ "step": 119
843
+ },
844
+ {
845
+ "epoch": 0.17532645420509543,
846
+ "grad_norm": 0.1361505538225174,
847
+ "learning_rate": 1.9315068493150686e-05,
848
+ "loss": 1.3259,
849
+ "step": 120
850
+ },
851
+ {
852
+ "epoch": 0.1767875079901379,
853
+ "grad_norm": 0.1225372925400734,
854
+ "learning_rate": 1.9280821917808223e-05,
855
+ "loss": 1.2854,
856
+ "step": 121
857
+ },
858
+ {
859
+ "epoch": 0.17824856177518034,
860
+ "grad_norm": 0.12647689878940582,
861
+ "learning_rate": 1.9246575342465756e-05,
862
+ "loss": 1.2741,
863
+ "step": 122
864
+ },
865
+ {
866
+ "epoch": 0.1797096155602228,
867
+ "grad_norm": 0.12492359429597855,
868
+ "learning_rate": 1.921232876712329e-05,
869
+ "loss": 1.2728,
870
+ "step": 123
871
+ },
872
+ {
873
+ "epoch": 0.18117066934526527,
874
+ "grad_norm": 0.1245495080947876,
875
+ "learning_rate": 1.9178082191780822e-05,
876
+ "loss": 1.2829,
877
+ "step": 124
878
+ },
879
+ {
880
+ "epoch": 0.18263172313030773,
881
+ "grad_norm": 0.12087871879339218,
882
+ "learning_rate": 1.9143835616438358e-05,
883
+ "loss": 1.3673,
884
+ "step": 125
885
+ },
886
+ {
887
+ "epoch": 0.1840927769153502,
888
+ "grad_norm": 0.13881978392601013,
889
+ "learning_rate": 1.910958904109589e-05,
890
+ "loss": 1.2749,
891
+ "step": 126
892
+ },
893
+ {
894
+ "epoch": 0.18555383070039266,
895
+ "grad_norm": 0.12226665765047073,
896
+ "learning_rate": 1.9075342465753424e-05,
897
+ "loss": 1.3216,
898
+ "step": 127
899
+ },
900
+ {
901
+ "epoch": 0.18701488448543513,
902
+ "grad_norm": 0.12877057492733002,
903
+ "learning_rate": 1.904109589041096e-05,
904
+ "loss": 1.3632,
905
+ "step": 128
906
+ },
907
+ {
908
+ "epoch": 0.1884759382704776,
909
+ "grad_norm": 0.13042791187763214,
910
+ "learning_rate": 1.9006849315068494e-05,
911
+ "loss": 1.3087,
912
+ "step": 129
913
+ },
914
+ {
915
+ "epoch": 0.18993699205552003,
916
+ "grad_norm": 0.1289220005273819,
917
+ "learning_rate": 1.897260273972603e-05,
918
+ "loss": 1.3569,
919
+ "step": 130
920
+ },
921
+ {
922
+ "epoch": 0.1913980458405625,
923
+ "grad_norm": 0.12027924507856369,
924
+ "learning_rate": 1.8938356164383563e-05,
925
+ "loss": 1.3487,
926
+ "step": 131
927
+ },
928
+ {
929
+ "epoch": 0.19285909962560496,
930
+ "grad_norm": 0.13009122014045715,
931
+ "learning_rate": 1.8904109589041096e-05,
932
+ "loss": 1.2639,
933
+ "step": 132
934
+ },
935
+ {
936
+ "epoch": 0.19432015341064743,
937
+ "grad_norm": 0.13321325182914734,
938
+ "learning_rate": 1.8869863013698633e-05,
939
+ "loss": 1.2982,
940
+ "step": 133
941
  }
942
  ],
943
  "logging_steps": 1,
 
957
  "attributes": {}
958
  }
959
  },
960
+ "total_flos": 1.4946336109032653e+17,
961
  "train_batch_size": 4,
962
  "trial_name": null,
963
  "trial_params": null