ToastyPigeon commited on
Commit
f6e62de
·
verified ·
1 Parent(s): 6f58f1f

Training in progress, step 156, checkpoint

Browse files
Files changed (28) hide show
  1. last-checkpoint/adapter_model.safetensors +1 -1
  2. last-checkpoint/global_step156/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step156/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step156/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step156/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step156/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step156/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step156/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step156/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  10. last-checkpoint/global_step156/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step156/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step156/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step156/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step156/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step156/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step156/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/global_step156/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/scheduler.pt +1 -1
  28. last-checkpoint/trainer_state.json +284 -3
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:510e88c394d7b67aa131e16eb2d4057894848ee91683b790cfe04a0ffd4371fe
3
  size 550593856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8546044019154ce3968e6a6ba6a86f7aa3175824cef87135b2149628694d455c
3
  size 550593856
last-checkpoint/global_step156/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fb5be0a5f26cb07c799f46bb1862098d91fa16518bfc8875ac9fcc5e7740561
3
+ size 243590592
last-checkpoint/global_step156/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa645ee95c39b5b497ff5f86cfd818320fe51bb72fd47f138e22d48a26dab541
3
+ size 243590592
last-checkpoint/global_step156/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210fc2037e25678118657e27309536a0fa50b5ba86c636be21bdb2209f2dec97
3
+ size 243590592
last-checkpoint/global_step156/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec460de6bd172eae661c430cf806a16a0c1607eb52adf5bc5c7e171411e85ab6
3
+ size 243590592
last-checkpoint/global_step156/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95dd69a185a59513ffa3ff81a5b3f6388c84710694de7b43dfb078f8c70f1e85
3
+ size 243590592
last-checkpoint/global_step156/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23918528a67b93abd5356257022ed6da16f23ebcee411b5992ff681cea30903d
3
+ size 243590592
last-checkpoint/global_step156/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82a6353c0c6295705f9ded5e47fb1513485a975279c507405959431a9d1cc963
3
+ size 243590592
last-checkpoint/global_step156/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:164b62b107af40c179352364ffb2009aec6bfe4fd593b2869afded7221558fe1
3
+ size 243590592
last-checkpoint/global_step156/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:debf1185e8b75b8e355f0cee329a6fcb38feca0910d724b7eb7dc2ec8a38efd6
3
+ size 211435686
last-checkpoint/global_step156/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1a2705bb1b4552cdea7af94d241262202b6b8a67c862f451ac3d7b98c34e8b
3
+ size 211435686
last-checkpoint/global_step156/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8f491814d9c49f2165e4d8c5b31531f16593aaf1661d2506585d3e6b993eeb9
3
+ size 211435686
last-checkpoint/global_step156/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48be9ef8d59d015becf0ed68c0f3bd3cc5cf2a42ccf7623e178b595c196e6ec9
3
+ size 211435686
last-checkpoint/global_step156/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a82ac3de9087af93f59e8236dacc634c86e0e30a109e3ef84dba0aab387c66a
3
+ size 211435686
last-checkpoint/global_step156/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e8cc083f831ef3443f1f1702cde406b2390f4d408a93dca6eb700156c23082
3
+ size 211435686
last-checkpoint/global_step156/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c36dd49ee3aae7b73530658214d7872f7c323aa63363c6bb64e5cf2444719300
3
+ size 211435686
last-checkpoint/global_step156/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766f01511652728b366ce75467fcb3e68f9361004823a91082d03ec486dd3f23
3
+ size 211435686
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step117
 
1
+ global_step156
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e1c39d9c1614e5a5db49c2adcaea58c50f91741ffdc85357e20965a732ab918
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c561e839b2d36fc5a0e0f7b6936e77fe5a5c8df4881367ae16771a48d03931
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:302ede7ac90ff693eab149e42ad40ee9fc950c1ef8919c67ed57abb0686bbbe5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb40f6e44cd62eb5b3d663b4cd4a22621261ff4270ac7334a82a6c5654582d4f
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd760eaa5a69675cd82d86a72d467afbc30b5630d5157f32efb4ea7ae17fc2f2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:809d109a794ef0cf1a0ad4ebb3119098360aab2adb6f08bddff73f40f315a7e5
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ba53ce4c9ec1239ebb539d4b9d5b4d60afc5de8260b83004180fbef5fa1ad12
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b412bb58b079b8554c3ec8e287b647f0d5eb9af588672a244913fe25977d512c
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37f68cc323d6a100d3e96e363031f94ca17676b3152edfe25789da3092f4e267
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:500bd67adda8cf35848a173b29f37f06b58546a6456d0acafb829bcd30658146
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f9ed76e42e0966ecee5df95b284ed0f8c2c9eef1e477abfa1252598e3566c6a
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2ef4aff8cc70904fad22aaf8933fc74acea3612ad213c1998d6fbe418d76763
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81c7b8456e2d33481d89db677988b4ad26b87e262b4951b9a31bbe8c34add21b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5aa0392cb422a7717e1c27ead3b9ec1f2b7fa04f0e7c4a432e11c7d9fd4d50c
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6154b2373b0985a816ea00d8d6b2c75a97f53aa8e7a463dabbbf8cd42f0a62c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d820cd9be3a77ff0f6d736eb3ab23e99a61accdfc90219866962f22853a7422
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:317bfb15ccbbc1daa774ce78869237c5810b7c8eb3f18edff11bd8175bd36feb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22af69e6b655a005d672b3a204eb6484f3ef29e640fcbdffa3f8d613a5d04cf0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3038961038961039,
5
  "eval_steps": 39,
6
- "global_step": 117,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -858,6 +858,287 @@
858
  "eval_samples_per_second": 1.233,
859
  "eval_steps_per_second": 0.154,
860
  "step": 117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
861
  }
862
  ],
863
  "logging_steps": 1,
@@ -877,7 +1158,7 @@
877
  "attributes": {}
878
  }
879
  },
880
- "total_flos": 38677656895488.0,
881
  "train_batch_size": 1,
882
  "trial_name": null,
883
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4051948051948052,
5
  "eval_steps": 39,
6
+ "global_step": 156,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
858
  "eval_samples_per_second": 1.233,
859
  "eval_steps_per_second": 0.154,
860
  "step": 117
861
+ },
862
+ {
863
+ "epoch": 0.3064935064935065,
864
+ "grad_norm": 1.11071544369134,
865
+ "learning_rate": 8.49184929083929e-05,
866
+ "loss": 2.6445,
867
+ "step": 118
868
+ },
869
+ {
870
+ "epoch": 0.3090909090909091,
871
+ "grad_norm": 1.1571880416140317,
872
+ "learning_rate": 8.462807131141221e-05,
873
+ "loss": 2.5551,
874
+ "step": 119
875
+ },
876
+ {
877
+ "epoch": 0.3116883116883117,
878
+ "grad_norm": 0.9303010187060028,
879
+ "learning_rate": 8.433545481454206e-05,
880
+ "loss": 2.3873,
881
+ "step": 120
882
+ },
883
+ {
884
+ "epoch": 0.3142857142857143,
885
+ "grad_norm": 0.9575555447726172,
886
+ "learning_rate": 8.404066509532978e-05,
887
+ "loss": 2.5088,
888
+ "step": 121
889
+ },
890
+ {
891
+ "epoch": 0.3168831168831169,
892
+ "grad_norm": 0.9710292387067033,
893
+ "learning_rate": 8.374372399231873e-05,
894
+ "loss": 2.4983,
895
+ "step": 122
896
+ },
897
+ {
898
+ "epoch": 0.3194805194805195,
899
+ "grad_norm": 1.048408170787146,
900
+ "learning_rate": 8.344465350343069e-05,
901
+ "loss": 2.3834,
902
+ "step": 123
903
+ },
904
+ {
905
+ "epoch": 0.3220779220779221,
906
+ "grad_norm": 0.9705702680470921,
907
+ "learning_rate": 8.314347578433608e-05,
908
+ "loss": 2.4382,
909
+ "step": 124
910
+ },
911
+ {
912
+ "epoch": 0.3246753246753247,
913
+ "grad_norm": 0.9614277096166124,
914
+ "learning_rate": 8.284021314681265e-05,
915
+ "loss": 2.3393,
916
+ "step": 125
917
+ },
918
+ {
919
+ "epoch": 0.32727272727272727,
920
+ "grad_norm": 0.9281949363762984,
921
+ "learning_rate": 8.253488805709259e-05,
922
+ "loss": 2.6182,
923
+ "step": 126
924
+ },
925
+ {
926
+ "epoch": 0.32987012987012987,
927
+ "grad_norm": 0.9623514960091324,
928
+ "learning_rate": 8.222752313419822e-05,
929
+ "loss": 2.4305,
930
+ "step": 127
931
+ },
932
+ {
933
+ "epoch": 0.33246753246753247,
934
+ "grad_norm": 0.9531142482825348,
935
+ "learning_rate": 8.191814114826625e-05,
936
+ "loss": 2.5015,
937
+ "step": 128
938
+ },
939
+ {
940
+ "epoch": 0.33506493506493507,
941
+ "grad_norm": 1.031934013410237,
942
+ "learning_rate": 8.160676501886102e-05,
943
+ "loss": 2.4596,
944
+ "step": 129
945
+ },
946
+ {
947
+ "epoch": 0.33766233766233766,
948
+ "grad_norm": 1.055900967871222,
949
+ "learning_rate": 8.129341781327658e-05,
950
+ "loss": 2.7157,
951
+ "step": 130
952
+ },
953
+ {
954
+ "epoch": 0.34025974025974026,
955
+ "grad_norm": 0.9833403621365481,
956
+ "learning_rate": 8.097812274482774e-05,
957
+ "loss": 2.3566,
958
+ "step": 131
959
+ },
960
+ {
961
+ "epoch": 0.34285714285714286,
962
+ "grad_norm": 1.0871003085779585,
963
+ "learning_rate": 8.066090317113043e-05,
964
+ "loss": 2.4638,
965
+ "step": 132
966
+ },
967
+ {
968
+ "epoch": 0.34545454545454546,
969
+ "grad_norm": 0.9406528674917229,
970
+ "learning_rate": 8.034178259237134e-05,
971
+ "loss": 2.2718,
972
+ "step": 133
973
+ },
974
+ {
975
+ "epoch": 0.34805194805194806,
976
+ "grad_norm": 0.9582596034300516,
977
+ "learning_rate": 8.002078464956701e-05,
978
+ "loss": 2.4254,
979
+ "step": 134
980
+ },
981
+ {
982
+ "epoch": 0.35064935064935066,
983
+ "grad_norm": 1.0907653310302854,
984
+ "learning_rate": 7.969793312281237e-05,
985
+ "loss": 2.4145,
986
+ "step": 135
987
+ },
988
+ {
989
+ "epoch": 0.35324675324675325,
990
+ "grad_norm": 1.0102280592637967,
991
+ "learning_rate": 7.937325192951917e-05,
992
+ "loss": 2.4132,
993
+ "step": 136
994
+ },
995
+ {
996
+ "epoch": 0.35584415584415585,
997
+ "grad_norm": 1.06644159749959,
998
+ "learning_rate": 7.904676512264406e-05,
999
+ "loss": 2.4369,
1000
+ "step": 137
1001
+ },
1002
+ {
1003
+ "epoch": 0.35844155844155845,
1004
+ "grad_norm": 0.9673017199924268,
1005
+ "learning_rate": 7.871849688890674e-05,
1006
+ "loss": 2.4244,
1007
+ "step": 138
1008
+ },
1009
+ {
1010
+ "epoch": 0.36103896103896105,
1011
+ "grad_norm": 1.2268344248013403,
1012
+ "learning_rate": 7.838847154699821e-05,
1013
+ "loss": 2.2285,
1014
+ "step": 139
1015
+ },
1016
+ {
1017
+ "epoch": 0.36363636363636365,
1018
+ "grad_norm": 1.073462737631204,
1019
+ "learning_rate": 7.805671354577908e-05,
1020
+ "loss": 2.3111,
1021
+ "step": 140
1022
+ },
1023
+ {
1024
+ "epoch": 0.36623376623376624,
1025
+ "grad_norm": 1.574004120111183,
1026
+ "learning_rate": 7.772324746246842e-05,
1027
+ "loss": 2.4494,
1028
+ "step": 141
1029
+ },
1030
+ {
1031
+ "epoch": 0.36883116883116884,
1032
+ "grad_norm": 0.9241427879924983,
1033
+ "learning_rate": 7.738809800082314e-05,
1034
+ "loss": 2.3341,
1035
+ "step": 142
1036
+ },
1037
+ {
1038
+ "epoch": 0.37142857142857144,
1039
+ "grad_norm": 0.8766148723800662,
1040
+ "learning_rate": 7.705128998930766e-05,
1041
+ "loss": 2.2851,
1042
+ "step": 143
1043
+ },
1044
+ {
1045
+ "epoch": 0.37402597402597404,
1046
+ "grad_norm": 1.0218314429150428,
1047
+ "learning_rate": 7.671284837925483e-05,
1048
+ "loss": 2.3307,
1049
+ "step": 144
1050
+ },
1051
+ {
1052
+ "epoch": 0.37662337662337664,
1053
+ "grad_norm": 1.0085214768978241,
1054
+ "learning_rate": 7.637279824301728e-05,
1055
+ "loss": 2.407,
1056
+ "step": 145
1057
+ },
1058
+ {
1059
+ "epoch": 0.37922077922077924,
1060
+ "grad_norm": 1.1498069068985362,
1061
+ "learning_rate": 7.60311647721101e-05,
1062
+ "loss": 2.3903,
1063
+ "step": 146
1064
+ },
1065
+ {
1066
+ "epoch": 0.38181818181818183,
1067
+ "grad_norm": 0.9898512411601635,
1068
+ "learning_rate": 7.56879732753447e-05,
1069
+ "loss": 2.4284,
1070
+ "step": 147
1071
+ },
1072
+ {
1073
+ "epoch": 0.38441558441558443,
1074
+ "grad_norm": 1.0768194487299194,
1075
+ "learning_rate": 7.53432491769537e-05,
1076
+ "loss": 2.4555,
1077
+ "step": 148
1078
+ },
1079
+ {
1080
+ "epoch": 0.38701298701298703,
1081
+ "grad_norm": 1.2604006103307979,
1082
+ "learning_rate": 7.49970180147076e-05,
1083
+ "loss": 2.5891,
1084
+ "step": 149
1085
+ },
1086
+ {
1087
+ "epoch": 0.38961038961038963,
1088
+ "grad_norm": 1.0237041933548676,
1089
+ "learning_rate": 7.464930543802289e-05,
1090
+ "loss": 2.4212,
1091
+ "step": 150
1092
+ },
1093
+ {
1094
+ "epoch": 0.3922077922077922,
1095
+ "grad_norm": 1.0931014839822084,
1096
+ "learning_rate": 7.430013720606176e-05,
1097
+ "loss": 2.4502,
1098
+ "step": 151
1099
+ },
1100
+ {
1101
+ "epoch": 0.3948051948051948,
1102
+ "grad_norm": 1.2220277182692068,
1103
+ "learning_rate": 7.394953918582403e-05,
1104
+ "loss": 2.4569,
1105
+ "step": 152
1106
+ },
1107
+ {
1108
+ "epoch": 0.3974025974025974,
1109
+ "grad_norm": 1.1663502019072647,
1110
+ "learning_rate": 7.35975373502307e-05,
1111
+ "loss": 2.3729,
1112
+ "step": 153
1113
+ },
1114
+ {
1115
+ "epoch": 0.4,
1116
+ "grad_norm": 1.0686488749921856,
1117
+ "learning_rate": 7.324415777619988e-05,
1118
+ "loss": 2.3302,
1119
+ "step": 154
1120
+ },
1121
+ {
1122
+ "epoch": 0.4025974025974026,
1123
+ "grad_norm": 0.929261927962379,
1124
+ "learning_rate": 7.288942664271503e-05,
1125
+ "loss": 2.2966,
1126
+ "step": 155
1127
+ },
1128
+ {
1129
+ "epoch": 0.4051948051948052,
1130
+ "grad_norm": 0.957275997972424,
1131
+ "learning_rate": 7.253337022888546e-05,
1132
+ "loss": 2.2596,
1133
+ "step": 156
1134
+ },
1135
+ {
1136
+ "epoch": 0.4051948051948052,
1137
+ "eval_loss": 2.42930269241333,
1138
+ "eval_runtime": 64.9704,
1139
+ "eval_samples_per_second": 1.231,
1140
+ "eval_steps_per_second": 0.154,
1141
+ "step": 156
1142
  }
1143
  ],
1144
  "logging_steps": 1,
 
1158
  "attributes": {}
1159
  }
1160
  },
1161
+ "total_flos": 51570209193984.0,
1162
  "train_batch_size": 1,
1163
  "trial_name": null,
1164
  "trial_params": null