kmnis commited on
Commit
48c9b52
·
1 Parent(s): 578c4de

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:227406cf9f8c3ed4c7b57871e5ea249d82e689257232840bcb04f29e3861b0eb
3
  size 19744138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9161afa5d6de49b7d4607930397f3b8680b15a014838d9a51b8431ed94b0c5
3
  size 19744138
last-checkpoint/global_step2000/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3e6486b33b9f9699e667a0d3bf2c0c9e7460150b77eac6aa64dd3ba6062b9f9
3
+ size 6508458036
last-checkpoint/global_step2000/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:710bec908b49c920ae032f919ee1c910c6209a803677937cbf6dcf2f127244d8
3
+ size 29495149
last-checkpoint/global_step2000/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25213fc5dbea9d2f91a7def92940c4fb8a41382d6f1d0617d85116c0b92612c9
3
+ size 6508458036
last-checkpoint/global_step2000/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c3528795d45372246c2b8fbe30f235dfb3382a9cbf4e630a7ba769c1566d31
3
+ size 29495149
last-checkpoint/global_step2000/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2abe3724da23ef9e3105ccc9ec9ea5116de6c4af98ad83d973cfb6830fac6ef
3
+ size 6508458036
last-checkpoint/global_step2000/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f2c0cf6f89838a55f4bc9f0dda11faf0569bc734375b6560790a2c1f407725e
3
+ size 29495149
last-checkpoint/global_step2000/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4e983f783585a3c339c728679be5627b3559014ccec6418ba814746b830587
3
+ size 6508458036
last-checkpoint/global_step2000/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c18d17a5961435774a94a192edafbd12dc17b94cfddb11a6bd233abf0d99055
3
+ size 29495149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1500
 
1
+ global_step2000
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aae6d6720b15d33c5be4514b3a84567730795fd13a72414380b33c8627313482
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5907a54cddc69fe20c02c40139b18624ac2dbae5bcf42b9774c58b64c40b44c7
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21210fc84cdeb9ead738b46e656c45624567b9d572c3ad0ea9fa169dff66448e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c083526885eb8bf3a2ee040372afafd0bb8ab3fad4c8309d345237f500f3a1c
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8694520fe08f7a3fe633f373a0dc945b74219cd9f27299648abfd33c17fb7442
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c68b0b0876647c73f1e948eb1f7cd95a5f2a05b6f8a5d5f754e4e9a76c606d
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20a9d6951e84f6da45adca2f04a48bf1d75088b7d2fe1a9040ff33c84bae9cfc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c858b96940d74592597bc7a918935a99c2fc8e9f641f494a7e5c566c09a6221
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6323777403035413,
5
  "eval_steps": 500,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -907,13 +907,313 @@
907
  "learning_rate": 1e-05,
908
  "loss": 0.705,
909
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
910
  }
911
  ],
912
  "logging_steps": 10,
913
  "max_steps": 10000,
914
  "num_train_epochs": 5,
915
  "save_steps": 500,
916
- "total_flos": 376920751472640.0,
917
  "trial_name": null,
918
  "trial_params": null
919
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8431703204047217,
5
  "eval_steps": 500,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
907
  "learning_rate": 1e-05,
908
  "loss": 0.705,
909
  "step": 1500
910
+ },
911
+ {
912
+ "epoch": 0.64,
913
+ "learning_rate": 1e-05,
914
+ "loss": 0.6431,
915
+ "step": 1510
916
+ },
917
+ {
918
+ "epoch": 0.64,
919
+ "learning_rate": 1e-05,
920
+ "loss": 0.6615,
921
+ "step": 1520
922
+ },
923
+ {
924
+ "epoch": 0.65,
925
+ "learning_rate": 1e-05,
926
+ "loss": 0.7101,
927
+ "step": 1530
928
+ },
929
+ {
930
+ "epoch": 0.65,
931
+ "learning_rate": 1e-05,
932
+ "loss": 0.7265,
933
+ "step": 1540
934
+ },
935
+ {
936
+ "epoch": 0.65,
937
+ "learning_rate": 1e-05,
938
+ "loss": 0.7188,
939
+ "step": 1550
940
+ },
941
+ {
942
+ "epoch": 0.66,
943
+ "learning_rate": 1e-05,
944
+ "loss": 0.7148,
945
+ "step": 1560
946
+ },
947
+ {
948
+ "epoch": 0.66,
949
+ "learning_rate": 1e-05,
950
+ "loss": 0.7713,
951
+ "step": 1570
952
+ },
953
+ {
954
+ "epoch": 0.67,
955
+ "learning_rate": 1e-05,
956
+ "loss": 0.6934,
957
+ "step": 1580
958
+ },
959
+ {
960
+ "epoch": 0.67,
961
+ "learning_rate": 1e-05,
962
+ "loss": 0.6369,
963
+ "step": 1590
964
+ },
965
+ {
966
+ "epoch": 0.67,
967
+ "learning_rate": 1e-05,
968
+ "loss": 0.673,
969
+ "step": 1600
970
+ },
971
+ {
972
+ "epoch": 0.68,
973
+ "learning_rate": 1e-05,
974
+ "loss": 0.7517,
975
+ "step": 1610
976
+ },
977
+ {
978
+ "epoch": 0.68,
979
+ "learning_rate": 1e-05,
980
+ "loss": 0.6622,
981
+ "step": 1620
982
+ },
983
+ {
984
+ "epoch": 0.69,
985
+ "learning_rate": 1e-05,
986
+ "loss": 0.6657,
987
+ "step": 1630
988
+ },
989
+ {
990
+ "epoch": 0.69,
991
+ "learning_rate": 1e-05,
992
+ "loss": 0.6895,
993
+ "step": 1640
994
+ },
995
+ {
996
+ "epoch": 0.7,
997
+ "learning_rate": 1e-05,
998
+ "loss": 0.7961,
999
+ "step": 1650
1000
+ },
1001
+ {
1002
+ "epoch": 0.7,
1003
+ "learning_rate": 1e-05,
1004
+ "loss": 0.677,
1005
+ "step": 1660
1006
+ },
1007
+ {
1008
+ "epoch": 0.7,
1009
+ "learning_rate": 1e-05,
1010
+ "loss": 0.6579,
1011
+ "step": 1670
1012
+ },
1013
+ {
1014
+ "epoch": 0.71,
1015
+ "learning_rate": 1e-05,
1016
+ "loss": 0.6797,
1017
+ "step": 1680
1018
+ },
1019
+ {
1020
+ "epoch": 0.71,
1021
+ "learning_rate": 1e-05,
1022
+ "loss": 0.7199,
1023
+ "step": 1690
1024
+ },
1025
+ {
1026
+ "epoch": 0.72,
1027
+ "learning_rate": 1e-05,
1028
+ "loss": 0.7065,
1029
+ "step": 1700
1030
+ },
1031
+ {
1032
+ "epoch": 0.72,
1033
+ "learning_rate": 1e-05,
1034
+ "loss": 0.7162,
1035
+ "step": 1710
1036
+ },
1037
+ {
1038
+ "epoch": 0.73,
1039
+ "learning_rate": 1e-05,
1040
+ "loss": 0.6851,
1041
+ "step": 1720
1042
+ },
1043
+ {
1044
+ "epoch": 0.73,
1045
+ "learning_rate": 1e-05,
1046
+ "loss": 0.7251,
1047
+ "step": 1730
1048
+ },
1049
+ {
1050
+ "epoch": 0.73,
1051
+ "learning_rate": 1e-05,
1052
+ "loss": 0.6808,
1053
+ "step": 1740
1054
+ },
1055
+ {
1056
+ "epoch": 0.74,
1057
+ "learning_rate": 1e-05,
1058
+ "loss": 0.7358,
1059
+ "step": 1750
1060
+ },
1061
+ {
1062
+ "epoch": 0.74,
1063
+ "learning_rate": 1e-05,
1064
+ "loss": 0.7403,
1065
+ "step": 1760
1066
+ },
1067
+ {
1068
+ "epoch": 0.75,
1069
+ "learning_rate": 1e-05,
1070
+ "loss": 0.7051,
1071
+ "step": 1770
1072
+ },
1073
+ {
1074
+ "epoch": 0.75,
1075
+ "learning_rate": 1e-05,
1076
+ "loss": 0.6662,
1077
+ "step": 1780
1078
+ },
1079
+ {
1080
+ "epoch": 0.75,
1081
+ "learning_rate": 1e-05,
1082
+ "loss": 0.6922,
1083
+ "step": 1790
1084
+ },
1085
+ {
1086
+ "epoch": 0.76,
1087
+ "learning_rate": 1e-05,
1088
+ "loss": 0.7086,
1089
+ "step": 1800
1090
+ },
1091
+ {
1092
+ "epoch": 0.76,
1093
+ "learning_rate": 1e-05,
1094
+ "loss": 0.6558,
1095
+ "step": 1810
1096
+ },
1097
+ {
1098
+ "epoch": 0.77,
1099
+ "learning_rate": 1e-05,
1100
+ "loss": 0.6953,
1101
+ "step": 1820
1102
+ },
1103
+ {
1104
+ "epoch": 0.77,
1105
+ "learning_rate": 1e-05,
1106
+ "loss": 0.7355,
1107
+ "step": 1830
1108
+ },
1109
+ {
1110
+ "epoch": 0.78,
1111
+ "learning_rate": 1e-05,
1112
+ "loss": 0.7042,
1113
+ "step": 1840
1114
+ },
1115
+ {
1116
+ "epoch": 0.78,
1117
+ "learning_rate": 1e-05,
1118
+ "loss": 0.6836,
1119
+ "step": 1850
1120
+ },
1121
+ {
1122
+ "epoch": 0.78,
1123
+ "learning_rate": 1e-05,
1124
+ "loss": 0.7482,
1125
+ "step": 1860
1126
+ },
1127
+ {
1128
+ "epoch": 0.79,
1129
+ "learning_rate": 1e-05,
1130
+ "loss": 0.7727,
1131
+ "step": 1870
1132
+ },
1133
+ {
1134
+ "epoch": 0.79,
1135
+ "learning_rate": 1e-05,
1136
+ "loss": 0.7302,
1137
+ "step": 1880
1138
+ },
1139
+ {
1140
+ "epoch": 0.8,
1141
+ "learning_rate": 1e-05,
1142
+ "loss": 0.686,
1143
+ "step": 1890
1144
+ },
1145
+ {
1146
+ "epoch": 0.8,
1147
+ "learning_rate": 1e-05,
1148
+ "loss": 0.7062,
1149
+ "step": 1900
1150
+ },
1151
+ {
1152
+ "epoch": 0.81,
1153
+ "learning_rate": 1e-05,
1154
+ "loss": 0.7315,
1155
+ "step": 1910
1156
+ },
1157
+ {
1158
+ "epoch": 0.81,
1159
+ "learning_rate": 1e-05,
1160
+ "loss": 0.6259,
1161
+ "step": 1920
1162
+ },
1163
+ {
1164
+ "epoch": 0.81,
1165
+ "learning_rate": 1e-05,
1166
+ "loss": 0.6999,
1167
+ "step": 1930
1168
+ },
1169
+ {
1170
+ "epoch": 0.82,
1171
+ "learning_rate": 1e-05,
1172
+ "loss": 0.6675,
1173
+ "step": 1940
1174
+ },
1175
+ {
1176
+ "epoch": 0.82,
1177
+ "learning_rate": 1e-05,
1178
+ "loss": 0.6953,
1179
+ "step": 1950
1180
+ },
1181
+ {
1182
+ "epoch": 0.83,
1183
+ "learning_rate": 1e-05,
1184
+ "loss": 0.7274,
1185
+ "step": 1960
1186
+ },
1187
+ {
1188
+ "epoch": 0.83,
1189
+ "learning_rate": 1e-05,
1190
+ "loss": 0.6864,
1191
+ "step": 1970
1192
+ },
1193
+ {
1194
+ "epoch": 0.83,
1195
+ "learning_rate": 1e-05,
1196
+ "loss": 0.6822,
1197
+ "step": 1980
1198
+ },
1199
+ {
1200
+ "epoch": 0.84,
1201
+ "learning_rate": 1e-05,
1202
+ "loss": 0.6641,
1203
+ "step": 1990
1204
+ },
1205
+ {
1206
+ "epoch": 0.84,
1207
+ "learning_rate": 1e-05,
1208
+ "loss": 0.6513,
1209
+ "step": 2000
1210
  }
1211
  ],
1212
  "logging_steps": 10,
1213
  "max_steps": 10000,
1214
  "num_train_epochs": 5,
1215
  "save_steps": 500,
1216
+ "total_flos": 502812786032640.0,
1217
  "trial_name": null,
1218
  "trial_params": null
1219
  }