eddysang commited on
Commit
56e1533
·
verified ·
1 Parent(s): 5ae63d4

Training in progress, step 136, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b16313c79bd3f248bf4d438764be663bf73293a82f845bfc82bbdc1c96faa18
3
  size 97728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fcff8219bc1ba7c5267761e9d8e58b26e76bd77b4498ffe3eb4be61d378e5a
3
  size 97728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8820d9b006cab186b61503299fdb9b87f1c8bf0451bfe4bbedc210c4bc63254a
3
  size 212298
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9cc90d01583459809b77feb1f6a0bc3c1c8d2e63c24f720751cb3d88d193fe
3
  size 212298
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0126cf7d989a7263b97f1fe2ca3d6bc2827ac39dc2b4674586229158dba72ea3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9288f3ba37e7c624f2b8517ced4aae32ee804313f3587dcfb02d5f13209458f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4a2e0d38e4aad78961b54bb99f0a18c11d847ea1d3d3bc12cf223a3f862cf9e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aeb1fbb5e964bbc83fa43b049054867ad1faca8f43271132d241ae074069d5d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.40317628374801484,
5
  "eval_steps": 50,
6
- "global_step": 119,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -864,6 +864,125 @@
864
  "learning_rate": 6.326741512198266e-05,
865
  "loss": 10.346,
866
  "step": 119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
867
  }
868
  ],
869
  "logging_steps": 1,
@@ -883,7 +1002,7 @@
883
  "attributes": {}
884
  }
885
  },
886
- "total_flos": 50987546050560.0,
887
  "train_batch_size": 2,
888
  "trial_name": null,
889
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.46077289571201696,
5
  "eval_steps": 50,
6
+ "global_step": 136,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
864
  "learning_rate": 6.326741512198266e-05,
865
  "loss": 10.346,
866
  "step": 119
867
+ },
868
+ {
869
+ "epoch": 0.4065643197458973,
870
+ "grad_norm": 0.020479971542954445,
871
+ "learning_rate": 6.197638667498022e-05,
872
+ "loss": 10.3456,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 0.40995235574377975,
877
+ "grad_norm": 0.023177366703748703,
878
+ "learning_rate": 6.068932534675913e-05,
879
+ "loss": 10.3449,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 0.41334039174166226,
884
+ "grad_norm": 0.01870677061378956,
885
+ "learning_rate": 5.9406623188668055e-05,
886
+ "loss": 10.346,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 0.4167284277395447,
891
+ "grad_norm": 0.0195186547935009,
892
+ "learning_rate": 5.812867092421013e-05,
893
+ "loss": 10.3459,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 0.4201164637374272,
898
+ "grad_norm": 0.02066197618842125,
899
+ "learning_rate": 5.685585783002493e-05,
900
+ "loss": 10.3451,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 0.4235044997353097,
905
+ "grad_norm": 0.018614448606967926,
906
+ "learning_rate": 5.558857161731093e-05,
907
+ "loss": 10.3456,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 0.4268925357331922,
912
+ "grad_norm": 0.02485392615199089,
913
+ "learning_rate": 5.4327198313725064e-05,
914
+ "loss": 10.3456,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 0.43028057173107465,
919
+ "grad_norm": 0.019080353900790215,
920
+ "learning_rate": 5.307212214579474e-05,
921
+ "loss": 10.3463,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 0.4336686077289571,
926
+ "grad_norm": 0.02017894946038723,
927
+ "learning_rate": 5.182372542187895e-05,
928
+ "loss": 10.3456,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 0.4370566437268396,
933
+ "grad_norm": 0.02289474382996559,
934
+ "learning_rate": 5.058238841571326e-05,
935
+ "loss": 10.3443,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 0.44044467972472207,
940
+ "grad_norm": 0.02067600190639496,
941
+ "learning_rate": 4.934848925057484e-05,
942
+ "loss": 10.3459,
943
+ "step": 130
944
+ },
945
+ {
946
+ "epoch": 0.4438327157226046,
947
+ "grad_norm": 0.02071257308125496,
948
+ "learning_rate": 4.812240378410248e-05,
949
+ "loss": 10.3452,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 0.44722075172048703,
954
+ "grad_norm": 0.02436411753296852,
955
+ "learning_rate": 4.690450549380659e-05,
956
+ "loss": 10.3449,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 0.4506087877183695,
961
+ "grad_norm": 0.0237566027790308,
962
+ "learning_rate": 4.569516536330447e-05,
963
+ "loss": 10.3462,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 0.453996823716252,
968
+ "grad_norm": 0.029039116576313972,
969
+ "learning_rate": 4.449475176931499e-05,
970
+ "loss": 10.3455,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 0.45738485971413445,
975
+ "grad_norm": 0.021290864795446396,
976
+ "learning_rate": 4.3303630369447554e-05,
977
+ "loss": 10.3446,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 0.46077289571201696,
982
+ "grad_norm": 0.01688864268362522,
983
+ "learning_rate": 4.212216399081918e-05,
984
+ "loss": 10.3439,
985
+ "step": 136
986
  }
987
  ],
988
  "logging_steps": 1,
 
1002
  "attributes": {}
1003
  }
1004
  },
1005
+ "total_flos": 58258116182016.0,
1006
  "train_batch_size": 2,
1007
  "trial_name": null,
1008
  "trial_params": null