Training in progress, epoch 5
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +288 -3
- pytorch_model.bin +1 -1
- runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 236491269
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c89b45e2ac69f6293669df6313ee21058cc2af394a4390bba808d002466ccd2
|
3 |
size 236491269
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118253458
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6862122e6b9f883fcb720027b7e6e1ee4be3dcf9522d8dc9475125e601d5bc53
|
3 |
size 118253458
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15597
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3214d2294320f690f0de7c994db816bb6b393e81c203b4bd95ac5070d6787ff6
|
3 |
size 15597
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 557
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:274b42d0611ce03f64f3695a4574be0aa6d9ba137add81127f546ea0347ddbe2
|
3 |
size 557
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e06a18a940ba98c5f1a42737f61af8460d78a917521a83b1e094435306c8218
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1140,11 +1140,296 @@
|
|
1140 |
"eval_samples_per_second": 604.307,
|
1141 |
"eval_steps_per_second": 37.769,
|
1142 |
"step": 91760
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1143 |
}
|
1144 |
],
|
1145 |
"max_steps": 321160,
|
1146 |
"num_train_epochs": 14,
|
1147 |
-
"total_flos":
|
1148 |
"trial_name": null,
|
1149 |
"trial_params": null
|
1150 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.0,
|
5 |
+
"global_step": 114700,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1140 |
"eval_samples_per_second": 604.307,
|
1141 |
"eval_steps_per_second": 37.769,
|
1142 |
"step": 91760
|
1143 |
+
},
|
1144 |
+
{
|
1145 |
+
"epoch": 4.01,
|
1146 |
+
"learning_rate": 7.208694503503718e-05,
|
1147 |
+
"loss": 1.7576,
|
1148 |
+
"step": 92000
|
1149 |
+
},
|
1150 |
+
{
|
1151 |
+
"epoch": 4.03,
|
1152 |
+
"learning_rate": 7.193000113226062e-05,
|
1153 |
+
"loss": 1.7556,
|
1154 |
+
"step": 92500
|
1155 |
+
},
|
1156 |
+
{
|
1157 |
+
"epoch": 4.05,
|
1158 |
+
"learning_rate": 7.177274271264483e-05,
|
1159 |
+
"loss": 1.7484,
|
1160 |
+
"step": 93000
|
1161 |
+
},
|
1162 |
+
{
|
1163 |
+
"epoch": 4.08,
|
1164 |
+
"learning_rate": 7.161548429302906e-05,
|
1165 |
+
"loss": 1.7533,
|
1166 |
+
"step": 93500
|
1167 |
+
},
|
1168 |
+
{
|
1169 |
+
"epoch": 4.1,
|
1170 |
+
"learning_rate": 7.145822587341327e-05,
|
1171 |
+
"loss": 1.7508,
|
1172 |
+
"step": 94000
|
1173 |
+
},
|
1174 |
+
{
|
1175 |
+
"epoch": 4.12,
|
1176 |
+
"learning_rate": 7.130128197063671e-05,
|
1177 |
+
"loss": 1.7589,
|
1178 |
+
"step": 94500
|
1179 |
+
},
|
1180 |
+
{
|
1181 |
+
"epoch": 4.14,
|
1182 |
+
"learning_rate": 7.114402355102092e-05,
|
1183 |
+
"loss": 1.7485,
|
1184 |
+
"step": 95000
|
1185 |
+
},
|
1186 |
+
{
|
1187 |
+
"epoch": 4.16,
|
1188 |
+
"learning_rate": 7.098676513140515e-05,
|
1189 |
+
"loss": 1.7477,
|
1190 |
+
"step": 95500
|
1191 |
+
},
|
1192 |
+
{
|
1193 |
+
"epoch": 4.18,
|
1194 |
+
"learning_rate": 7.082950671178934e-05,
|
1195 |
+
"loss": 1.7523,
|
1196 |
+
"step": 96000
|
1197 |
+
},
|
1198 |
+
{
|
1199 |
+
"epoch": 4.21,
|
1200 |
+
"learning_rate": 7.06725628090128e-05,
|
1201 |
+
"loss": 1.7443,
|
1202 |
+
"step": 96500
|
1203 |
+
},
|
1204 |
+
{
|
1205 |
+
"epoch": 4.23,
|
1206 |
+
"learning_rate": 7.051530438939701e-05,
|
1207 |
+
"loss": 1.7481,
|
1208 |
+
"step": 97000
|
1209 |
+
},
|
1210 |
+
{
|
1211 |
+
"epoch": 4.25,
|
1212 |
+
"learning_rate": 7.035804596978122e-05,
|
1213 |
+
"loss": 1.7421,
|
1214 |
+
"step": 97500
|
1215 |
+
},
|
1216 |
+
{
|
1217 |
+
"epoch": 4.27,
|
1218 |
+
"learning_rate": 7.020078755016543e-05,
|
1219 |
+
"loss": 1.7449,
|
1220 |
+
"step": 98000
|
1221 |
+
},
|
1222 |
+
{
|
1223 |
+
"epoch": 4.29,
|
1224 |
+
"learning_rate": 7.004384364738888e-05,
|
1225 |
+
"loss": 1.7453,
|
1226 |
+
"step": 98500
|
1227 |
+
},
|
1228 |
+
{
|
1229 |
+
"epoch": 4.32,
|
1230 |
+
"learning_rate": 6.988658522777309e-05,
|
1231 |
+
"loss": 1.7443,
|
1232 |
+
"step": 99000
|
1233 |
+
},
|
1234 |
+
{
|
1235 |
+
"epoch": 4.34,
|
1236 |
+
"learning_rate": 6.972932680815731e-05,
|
1237 |
+
"loss": 1.7398,
|
1238 |
+
"step": 99500
|
1239 |
+
},
|
1240 |
+
{
|
1241 |
+
"epoch": 4.36,
|
1242 |
+
"learning_rate": 6.957206838854152e-05,
|
1243 |
+
"loss": 1.7387,
|
1244 |
+
"step": 100000
|
1245 |
+
},
|
1246 |
+
{
|
1247 |
+
"epoch": 4.38,
|
1248 |
+
"learning_rate": 6.941512448576497e-05,
|
1249 |
+
"loss": 1.7398,
|
1250 |
+
"step": 100500
|
1251 |
+
},
|
1252 |
+
{
|
1253 |
+
"epoch": 4.4,
|
1254 |
+
"learning_rate": 6.925786606614918e-05,
|
1255 |
+
"loss": 1.7383,
|
1256 |
+
"step": 101000
|
1257 |
+
},
|
1258 |
+
{
|
1259 |
+
"epoch": 4.42,
|
1260 |
+
"learning_rate": 6.91006076465334e-05,
|
1261 |
+
"loss": 1.7379,
|
1262 |
+
"step": 101500
|
1263 |
+
},
|
1264 |
+
{
|
1265 |
+
"epoch": 4.45,
|
1266 |
+
"learning_rate": 6.894334922691761e-05,
|
1267 |
+
"loss": 1.7365,
|
1268 |
+
"step": 102000
|
1269 |
+
},
|
1270 |
+
{
|
1271 |
+
"epoch": 4.47,
|
1272 |
+
"learning_rate": 6.878640532414106e-05,
|
1273 |
+
"loss": 1.7372,
|
1274 |
+
"step": 102500
|
1275 |
+
},
|
1276 |
+
{
|
1277 |
+
"epoch": 4.49,
|
1278 |
+
"learning_rate": 6.862914690452527e-05,
|
1279 |
+
"loss": 1.7336,
|
1280 |
+
"step": 103000
|
1281 |
+
},
|
1282 |
+
{
|
1283 |
+
"epoch": 4.51,
|
1284 |
+
"learning_rate": 6.847188848490948e-05,
|
1285 |
+
"loss": 1.7379,
|
1286 |
+
"step": 103500
|
1287 |
+
},
|
1288 |
+
{
|
1289 |
+
"epoch": 4.53,
|
1290 |
+
"learning_rate": 6.831463006529369e-05,
|
1291 |
+
"loss": 1.7325,
|
1292 |
+
"step": 104000
|
1293 |
+
},
|
1294 |
+
{
|
1295 |
+
"epoch": 4.56,
|
1296 |
+
"learning_rate": 6.815768616251714e-05,
|
1297 |
+
"loss": 1.7329,
|
1298 |
+
"step": 104500
|
1299 |
+
},
|
1300 |
+
{
|
1301 |
+
"epoch": 4.58,
|
1302 |
+
"learning_rate": 6.800042774290136e-05,
|
1303 |
+
"loss": 1.7335,
|
1304 |
+
"step": 105000
|
1305 |
+
},
|
1306 |
+
{
|
1307 |
+
"epoch": 4.6,
|
1308 |
+
"learning_rate": 6.784316932328557e-05,
|
1309 |
+
"loss": 1.7347,
|
1310 |
+
"step": 105500
|
1311 |
+
},
|
1312 |
+
{
|
1313 |
+
"epoch": 4.62,
|
1314 |
+
"learning_rate": 6.768591090366978e-05,
|
1315 |
+
"loss": 1.7293,
|
1316 |
+
"step": 106000
|
1317 |
+
},
|
1318 |
+
{
|
1319 |
+
"epoch": 4.64,
|
1320 |
+
"learning_rate": 6.752896700089323e-05,
|
1321 |
+
"loss": 1.7307,
|
1322 |
+
"step": 106500
|
1323 |
+
},
|
1324 |
+
{
|
1325 |
+
"epoch": 4.66,
|
1326 |
+
"learning_rate": 6.737170858127745e-05,
|
1327 |
+
"loss": 1.7264,
|
1328 |
+
"step": 107000
|
1329 |
+
},
|
1330 |
+
{
|
1331 |
+
"epoch": 4.69,
|
1332 |
+
"learning_rate": 6.721445016166166e-05,
|
1333 |
+
"loss": 1.7294,
|
1334 |
+
"step": 107500
|
1335 |
+
},
|
1336 |
+
{
|
1337 |
+
"epoch": 4.71,
|
1338 |
+
"learning_rate": 6.705719174204587e-05,
|
1339 |
+
"loss": 1.7324,
|
1340 |
+
"step": 108000
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"epoch": 4.73,
|
1344 |
+
"learning_rate": 6.690024783926932e-05,
|
1345 |
+
"loss": 1.7279,
|
1346 |
+
"step": 108500
|
1347 |
+
},
|
1348 |
+
{
|
1349 |
+
"epoch": 4.75,
|
1350 |
+
"learning_rate": 6.674298941965354e-05,
|
1351 |
+
"loss": 1.7228,
|
1352 |
+
"step": 109000
|
1353 |
+
},
|
1354 |
+
{
|
1355 |
+
"epoch": 4.77,
|
1356 |
+
"learning_rate": 6.658573100003774e-05,
|
1357 |
+
"loss": 1.7248,
|
1358 |
+
"step": 109500
|
1359 |
+
},
|
1360 |
+
{
|
1361 |
+
"epoch": 4.8,
|
1362 |
+
"learning_rate": 6.642847258042196e-05,
|
1363 |
+
"loss": 1.7278,
|
1364 |
+
"step": 110000
|
1365 |
+
},
|
1366 |
+
{
|
1367 |
+
"epoch": 4.82,
|
1368 |
+
"learning_rate": 6.62715286776454e-05,
|
1369 |
+
"loss": 1.7211,
|
1370 |
+
"step": 110500
|
1371 |
+
},
|
1372 |
+
{
|
1373 |
+
"epoch": 4.84,
|
1374 |
+
"learning_rate": 6.611427025802962e-05,
|
1375 |
+
"loss": 1.7227,
|
1376 |
+
"step": 111000
|
1377 |
+
},
|
1378 |
+
{
|
1379 |
+
"epoch": 4.86,
|
1380 |
+
"learning_rate": 6.595701183841383e-05,
|
1381 |
+
"loss": 1.7181,
|
1382 |
+
"step": 111500
|
1383 |
+
},
|
1384 |
+
{
|
1385 |
+
"epoch": 4.88,
|
1386 |
+
"learning_rate": 6.579975341879805e-05,
|
1387 |
+
"loss": 1.7188,
|
1388 |
+
"step": 112000
|
1389 |
+
},
|
1390 |
+
{
|
1391 |
+
"epoch": 4.9,
|
1392 |
+
"learning_rate": 6.564280951602148e-05,
|
1393 |
+
"loss": 1.7169,
|
1394 |
+
"step": 112500
|
1395 |
+
},
|
1396 |
+
{
|
1397 |
+
"epoch": 4.93,
|
1398 |
+
"learning_rate": 6.54855510964057e-05,
|
1399 |
+
"loss": 1.7186,
|
1400 |
+
"step": 113000
|
1401 |
+
},
|
1402 |
+
{
|
1403 |
+
"epoch": 4.95,
|
1404 |
+
"learning_rate": 6.532829267678992e-05,
|
1405 |
+
"loss": 1.7199,
|
1406 |
+
"step": 113500
|
1407 |
+
},
|
1408 |
+
{
|
1409 |
+
"epoch": 4.97,
|
1410 |
+
"learning_rate": 6.517103425717414e-05,
|
1411 |
+
"loss": 1.7216,
|
1412 |
+
"step": 114000
|
1413 |
+
},
|
1414 |
+
{
|
1415 |
+
"epoch": 4.99,
|
1416 |
+
"learning_rate": 6.501409035439757e-05,
|
1417 |
+
"loss": 1.7173,
|
1418 |
+
"step": 114500
|
1419 |
+
},
|
1420 |
+
{
|
1421 |
+
"epoch": 5.0,
|
1422 |
+
"eval_accuracy": 0.6715102448572298,
|
1423 |
+
"eval_loss": 1.5880473852157593,
|
1424 |
+
"eval_runtime": 294.7287,
|
1425 |
+
"eval_samples_per_second": 603.782,
|
1426 |
+
"eval_steps_per_second": 37.736,
|
1427 |
+
"step": 114700
|
1428 |
}
|
1429 |
],
|
1430 |
"max_steps": 321160,
|
1431 |
"num_train_epochs": 14,
|
1432 |
+
"total_flos": 8.721147965075726e+17,
|
1433 |
"trial_name": null,
|
1434 |
"trial_params": null
|
1435 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 118253458
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6862122e6b9f883fcb720027b7e6e1ee4be3dcf9522d8dc9475125e601d5bc53
|
3 |
size 118253458
|
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87287f5853697bd232c1d288f5654c67eba5514d42ef65d845face5a9d732001
|
3 |
+
size 41957
|