schnell commited on
Commit
4d5ac0d
1 Parent(s): 6b5e602

Training in progress, epoch 4

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ad3d537ea4fd7d6ad4fa6cf73b1b40dd110b483d065f2705e6949affdc6cf17
3
  size 236491269
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ef39ac23e005dc178e9ef1a544aa5b8f96467e8469680a8607d2d7347b1ee53
3
  size 236491269
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2906e434c2be5553d8ba690fa00f38995006c684a8b4fbef8476c7418f239877
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2079a756a8358ae3a535c523dc50111795059680977ed08b4832ffbac8ef5fc
3
  size 118253458
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fddd988ead107be498ca0838e5d6931249bb674b40ce6296875748270d271cc4
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af9d4596051180f9613336aed3a4246c78a50ebaf3faba7d98ece3fdb0a943a
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45ac1a1a1f98b3964d76c33e861170daff6e880017ea91dcfeb6f1af152554e5
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccbda19fc21cde44b0ae7a422108e69de01877a51e50220752bf822b9a78b1f9
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29cf36739c6a6691ce7e0e701bc722960d20ddd1e069bdfc942253c223daa611
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5962a4375a7ac9a133f1b777607905c970b63f4d098090eca341092be76e05b
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
- "global_step": 68820,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -855,11 +855,296 @@
855
  "eval_samples_per_second": 603.314,
856
  "eval_steps_per_second": 37.707,
857
  "step": 68820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  }
859
  ],
860
  "max_steps": 321160,
861
  "num_train_epochs": 14,
862
- "total_flos": 5.2328760513448845e+17,
863
  "trial_name": null,
864
  "trial_params": null
865
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "global_step": 91760,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
855
  "eval_samples_per_second": 603.314,
856
  "eval_steps_per_second": 37.707,
857
  "step": 68820
858
+ },
859
+ {
860
+ "epoch": 3.01,
861
+ "learning_rate": 7.93173726521318e-05,
862
+ "loss": 1.8202,
863
+ "step": 69000
864
+ },
865
+ {
866
+ "epoch": 3.03,
867
+ "learning_rate": 7.916011423251602e-05,
868
+ "loss": 1.8172,
869
+ "step": 69500
870
+ },
871
+ {
872
+ "epoch": 3.05,
873
+ "learning_rate": 7.900285581290023e-05,
874
+ "loss": 1.8126,
875
+ "step": 70000
876
+ },
877
+ {
878
+ "epoch": 3.07,
879
+ "learning_rate": 7.884591191012367e-05,
880
+ "loss": 1.8141,
881
+ "step": 70500
882
+ },
883
+ {
884
+ "epoch": 3.1,
885
+ "learning_rate": 7.868865349050788e-05,
886
+ "loss": 1.8052,
887
+ "step": 71000
888
+ },
889
+ {
890
+ "epoch": 3.12,
891
+ "learning_rate": 7.85313950708921e-05,
892
+ "loss": 1.8075,
893
+ "step": 71500
894
+ },
895
+ {
896
+ "epoch": 3.14,
897
+ "learning_rate": 7.83741366512763e-05,
898
+ "loss": 1.8115,
899
+ "step": 72000
900
+ },
901
+ {
902
+ "epoch": 3.16,
903
+ "learning_rate": 7.821719274849975e-05,
904
+ "loss": 1.8076,
905
+ "step": 72500
906
+ },
907
+ {
908
+ "epoch": 3.18,
909
+ "learning_rate": 7.805993432888397e-05,
910
+ "loss": 1.8078,
911
+ "step": 73000
912
+ },
913
+ {
914
+ "epoch": 3.2,
915
+ "learning_rate": 7.790267590926819e-05,
916
+ "loss": 1.8015,
917
+ "step": 73500
918
+ },
919
+ {
920
+ "epoch": 3.23,
921
+ "learning_rate": 7.77454174896524e-05,
922
+ "loss": 1.8043,
923
+ "step": 74000
924
+ },
925
+ {
926
+ "epoch": 3.25,
927
+ "learning_rate": 7.758847358687584e-05,
928
+ "loss": 1.8002,
929
+ "step": 74500
930
+ },
931
+ {
932
+ "epoch": 3.27,
933
+ "learning_rate": 7.743121516726006e-05,
934
+ "loss": 1.7954,
935
+ "step": 75000
936
+ },
937
+ {
938
+ "epoch": 3.29,
939
+ "learning_rate": 7.727395674764428e-05,
940
+ "loss": 1.7936,
941
+ "step": 75500
942
+ },
943
+ {
944
+ "epoch": 3.31,
945
+ "learning_rate": 7.711669832802849e-05,
946
+ "loss": 1.7903,
947
+ "step": 76000
948
+ },
949
+ {
950
+ "epoch": 3.33,
951
+ "learning_rate": 7.695975442525193e-05,
952
+ "loss": 1.7951,
953
+ "step": 76500
954
+ },
955
+ {
956
+ "epoch": 3.36,
957
+ "learning_rate": 7.680249600563615e-05,
958
+ "loss": 1.7946,
959
+ "step": 77000
960
+ },
961
+ {
962
+ "epoch": 3.38,
963
+ "learning_rate": 7.664523758602035e-05,
964
+ "loss": 1.7946,
965
+ "step": 77500
966
+ },
967
+ {
968
+ "epoch": 3.4,
969
+ "learning_rate": 7.648797916640458e-05,
970
+ "loss": 1.7902,
971
+ "step": 78000
972
+ },
973
+ {
974
+ "epoch": 3.42,
975
+ "learning_rate": 7.633103526362802e-05,
976
+ "loss": 1.7905,
977
+ "step": 78500
978
+ },
979
+ {
980
+ "epoch": 3.44,
981
+ "learning_rate": 7.617377684401223e-05,
982
+ "loss": 1.7898,
983
+ "step": 79000
984
+ },
985
+ {
986
+ "epoch": 3.47,
987
+ "learning_rate": 7.601651842439644e-05,
988
+ "loss": 1.7885,
989
+ "step": 79500
990
+ },
991
+ {
992
+ "epoch": 3.49,
993
+ "learning_rate": 7.585926000478067e-05,
994
+ "loss": 1.7852,
995
+ "step": 80000
996
+ },
997
+ {
998
+ "epoch": 3.51,
999
+ "learning_rate": 7.57023161020041e-05,
1000
+ "loss": 1.7815,
1001
+ "step": 80500
1002
+ },
1003
+ {
1004
+ "epoch": 3.53,
1005
+ "learning_rate": 7.554505768238832e-05,
1006
+ "loss": 1.7833,
1007
+ "step": 81000
1008
+ },
1009
+ {
1010
+ "epoch": 3.55,
1011
+ "learning_rate": 7.538779926277253e-05,
1012
+ "loss": 1.7815,
1013
+ "step": 81500
1014
+ },
1015
+ {
1016
+ "epoch": 3.57,
1017
+ "learning_rate": 7.523054084315676e-05,
1018
+ "loss": 1.7778,
1019
+ "step": 82000
1020
+ },
1021
+ {
1022
+ "epoch": 3.6,
1023
+ "learning_rate": 7.507359694038019e-05,
1024
+ "loss": 1.7811,
1025
+ "step": 82500
1026
+ },
1027
+ {
1028
+ "epoch": 3.62,
1029
+ "learning_rate": 7.491633852076441e-05,
1030
+ "loss": 1.7786,
1031
+ "step": 83000
1032
+ },
1033
+ {
1034
+ "epoch": 3.64,
1035
+ "learning_rate": 7.475908010114862e-05,
1036
+ "loss": 1.7767,
1037
+ "step": 83500
1038
+ },
1039
+ {
1040
+ "epoch": 3.66,
1041
+ "learning_rate": 7.460182168153283e-05,
1042
+ "loss": 1.7804,
1043
+ "step": 84000
1044
+ },
1045
+ {
1046
+ "epoch": 3.68,
1047
+ "learning_rate": 7.444487777875628e-05,
1048
+ "loss": 1.7716,
1049
+ "step": 84500
1050
+ },
1051
+ {
1052
+ "epoch": 3.71,
1053
+ "learning_rate": 7.428761935914049e-05,
1054
+ "loss": 1.7777,
1055
+ "step": 85000
1056
+ },
1057
+ {
1058
+ "epoch": 3.73,
1059
+ "learning_rate": 7.41303609395247e-05,
1060
+ "loss": 1.7712,
1061
+ "step": 85500
1062
+ },
1063
+ {
1064
+ "epoch": 3.75,
1065
+ "learning_rate": 7.397310251990892e-05,
1066
+ "loss": 1.7678,
1067
+ "step": 86000
1068
+ },
1069
+ {
1070
+ "epoch": 3.77,
1071
+ "learning_rate": 7.381615861713237e-05,
1072
+ "loss": 1.7702,
1073
+ "step": 86500
1074
+ },
1075
+ {
1076
+ "epoch": 3.79,
1077
+ "learning_rate": 7.365890019751658e-05,
1078
+ "loss": 1.7701,
1079
+ "step": 87000
1080
+ },
1081
+ {
1082
+ "epoch": 3.81,
1083
+ "learning_rate": 7.350164177790079e-05,
1084
+ "loss": 1.7689,
1085
+ "step": 87500
1086
+ },
1087
+ {
1088
+ "epoch": 3.84,
1089
+ "learning_rate": 7.334438335828501e-05,
1090
+ "loss": 1.7665,
1091
+ "step": 88000
1092
+ },
1093
+ {
1094
+ "epoch": 3.86,
1095
+ "learning_rate": 7.318743945550846e-05,
1096
+ "loss": 1.7711,
1097
+ "step": 88500
1098
+ },
1099
+ {
1100
+ "epoch": 3.88,
1101
+ "learning_rate": 7.303018103589267e-05,
1102
+ "loss": 1.7626,
1103
+ "step": 89000
1104
+ },
1105
+ {
1106
+ "epoch": 3.9,
1107
+ "learning_rate": 7.287292261627688e-05,
1108
+ "loss": 1.7628,
1109
+ "step": 89500
1110
+ },
1111
+ {
1112
+ "epoch": 3.92,
1113
+ "learning_rate": 7.271566419666109e-05,
1114
+ "loss": 1.7589,
1115
+ "step": 90000
1116
+ },
1117
+ {
1118
+ "epoch": 3.95,
1119
+ "learning_rate": 7.255872029388455e-05,
1120
+ "loss": 1.7614,
1121
+ "step": 90500
1122
+ },
1123
+ {
1124
+ "epoch": 3.97,
1125
+ "learning_rate": 7.240146187426874e-05,
1126
+ "loss": 1.7646,
1127
+ "step": 91000
1128
+ },
1129
+ {
1130
+ "epoch": 3.99,
1131
+ "learning_rate": 7.224420345465297e-05,
1132
+ "loss": 1.7636,
1133
+ "step": 91500
1134
+ },
1135
+ {
1136
+ "epoch": 4.0,
1137
+ "eval_accuracy": 0.6662888180155826,
1138
+ "eval_loss": 1.6273692846298218,
1139
+ "eval_runtime": 294.4726,
1140
+ "eval_samples_per_second": 604.307,
1141
+ "eval_steps_per_second": 37.769,
1142
+ "step": 91760
1143
  }
1144
  ],
1145
  "max_steps": 321160,
1146
  "num_train_epochs": 14,
1147
+ "total_flos": 6.9770786168245e+17,
1148
  "trial_name": null,
1149
  "trial_params": null
1150
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2906e434c2be5553d8ba690fa00f38995006c684a8b4fbef8476c7418f239877
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2079a756a8358ae3a535c523dc50111795059680977ed08b4832ffbac8ef5fc
3
  size 118253458
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bf4c282d434ce5ba540303c16207d3c8f5d389184c1a3bbf4dd4df23675b929
3
- size 26579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c61cb2a0b744c6498ef70bdac9f8c45f62963b3650e88b2e9078f86270e723f
3
+ size 34268