schnell commited on
Commit
df543e6
1 Parent(s): 4d5ac0d

Training in progress, epoch 5

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ef39ac23e005dc178e9ef1a544aa5b8f96467e8469680a8607d2d7347b1ee53
3
  size 236491269
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c89b45e2ac69f6293669df6313ee21058cc2af394a4390bba808d002466ccd2
3
  size 236491269
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2079a756a8358ae3a535c523dc50111795059680977ed08b4832ffbac8ef5fc
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6862122e6b9f883fcb720027b7e6e1ee4be3dcf9522d8dc9475125e601d5bc53
3
  size 118253458
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8af9d4596051180f9613336aed3a4246c78a50ebaf3faba7d98ece3fdb0a943a
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3214d2294320f690f0de7c994db816bb6b393e81c203b4bd95ac5070d6787ff6
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccbda19fc21cde44b0ae7a422108e69de01877a51e50220752bf822b9a78b1f9
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:274b42d0611ce03f64f3695a4574be0aa6d9ba137add81127f546ea0347ddbe2
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5962a4375a7ac9a133f1b777607905c970b63f4d098090eca341092be76e05b
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e06a18a940ba98c5f1a42737f61af8460d78a917521a83b1e094435306c8218
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.0,
5
- "global_step": 91760,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1140,11 +1140,296 @@
1140
  "eval_samples_per_second": 604.307,
1141
  "eval_steps_per_second": 37.769,
1142
  "step": 91760
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1143
  }
1144
  ],
1145
  "max_steps": 321160,
1146
  "num_train_epochs": 14,
1147
- "total_flos": 6.9770786168245e+17,
1148
  "trial_name": null,
1149
  "trial_params": null
1150
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "global_step": 114700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1140
  "eval_samples_per_second": 604.307,
1141
  "eval_steps_per_second": 37.769,
1142
  "step": 91760
1143
+ },
1144
+ {
1145
+ "epoch": 4.01,
1146
+ "learning_rate": 7.208694503503718e-05,
1147
+ "loss": 1.7576,
1148
+ "step": 92000
1149
+ },
1150
+ {
1151
+ "epoch": 4.03,
1152
+ "learning_rate": 7.193000113226062e-05,
1153
+ "loss": 1.7556,
1154
+ "step": 92500
1155
+ },
1156
+ {
1157
+ "epoch": 4.05,
1158
+ "learning_rate": 7.177274271264483e-05,
1159
+ "loss": 1.7484,
1160
+ "step": 93000
1161
+ },
1162
+ {
1163
+ "epoch": 4.08,
1164
+ "learning_rate": 7.161548429302906e-05,
1165
+ "loss": 1.7533,
1166
+ "step": 93500
1167
+ },
1168
+ {
1169
+ "epoch": 4.1,
1170
+ "learning_rate": 7.145822587341327e-05,
1171
+ "loss": 1.7508,
1172
+ "step": 94000
1173
+ },
1174
+ {
1175
+ "epoch": 4.12,
1176
+ "learning_rate": 7.130128197063671e-05,
1177
+ "loss": 1.7589,
1178
+ "step": 94500
1179
+ },
1180
+ {
1181
+ "epoch": 4.14,
1182
+ "learning_rate": 7.114402355102092e-05,
1183
+ "loss": 1.7485,
1184
+ "step": 95000
1185
+ },
1186
+ {
1187
+ "epoch": 4.16,
1188
+ "learning_rate": 7.098676513140515e-05,
1189
+ "loss": 1.7477,
1190
+ "step": 95500
1191
+ },
1192
+ {
1193
+ "epoch": 4.18,
1194
+ "learning_rate": 7.082950671178934e-05,
1195
+ "loss": 1.7523,
1196
+ "step": 96000
1197
+ },
1198
+ {
1199
+ "epoch": 4.21,
1200
+ "learning_rate": 7.06725628090128e-05,
1201
+ "loss": 1.7443,
1202
+ "step": 96500
1203
+ },
1204
+ {
1205
+ "epoch": 4.23,
1206
+ "learning_rate": 7.051530438939701e-05,
1207
+ "loss": 1.7481,
1208
+ "step": 97000
1209
+ },
1210
+ {
1211
+ "epoch": 4.25,
1212
+ "learning_rate": 7.035804596978122e-05,
1213
+ "loss": 1.7421,
1214
+ "step": 97500
1215
+ },
1216
+ {
1217
+ "epoch": 4.27,
1218
+ "learning_rate": 7.020078755016543e-05,
1219
+ "loss": 1.7449,
1220
+ "step": 98000
1221
+ },
1222
+ {
1223
+ "epoch": 4.29,
1224
+ "learning_rate": 7.004384364738888e-05,
1225
+ "loss": 1.7453,
1226
+ "step": 98500
1227
+ },
1228
+ {
1229
+ "epoch": 4.32,
1230
+ "learning_rate": 6.988658522777309e-05,
1231
+ "loss": 1.7443,
1232
+ "step": 99000
1233
+ },
1234
+ {
1235
+ "epoch": 4.34,
1236
+ "learning_rate": 6.972932680815731e-05,
1237
+ "loss": 1.7398,
1238
+ "step": 99500
1239
+ },
1240
+ {
1241
+ "epoch": 4.36,
1242
+ "learning_rate": 6.957206838854152e-05,
1243
+ "loss": 1.7387,
1244
+ "step": 100000
1245
+ },
1246
+ {
1247
+ "epoch": 4.38,
1248
+ "learning_rate": 6.941512448576497e-05,
1249
+ "loss": 1.7398,
1250
+ "step": 100500
1251
+ },
1252
+ {
1253
+ "epoch": 4.4,
1254
+ "learning_rate": 6.925786606614918e-05,
1255
+ "loss": 1.7383,
1256
+ "step": 101000
1257
+ },
1258
+ {
1259
+ "epoch": 4.42,
1260
+ "learning_rate": 6.91006076465334e-05,
1261
+ "loss": 1.7379,
1262
+ "step": 101500
1263
+ },
1264
+ {
1265
+ "epoch": 4.45,
1266
+ "learning_rate": 6.894334922691761e-05,
1267
+ "loss": 1.7365,
1268
+ "step": 102000
1269
+ },
1270
+ {
1271
+ "epoch": 4.47,
1272
+ "learning_rate": 6.878640532414106e-05,
1273
+ "loss": 1.7372,
1274
+ "step": 102500
1275
+ },
1276
+ {
1277
+ "epoch": 4.49,
1278
+ "learning_rate": 6.862914690452527e-05,
1279
+ "loss": 1.7336,
1280
+ "step": 103000
1281
+ },
1282
+ {
1283
+ "epoch": 4.51,
1284
+ "learning_rate": 6.847188848490948e-05,
1285
+ "loss": 1.7379,
1286
+ "step": 103500
1287
+ },
1288
+ {
1289
+ "epoch": 4.53,
1290
+ "learning_rate": 6.831463006529369e-05,
1291
+ "loss": 1.7325,
1292
+ "step": 104000
1293
+ },
1294
+ {
1295
+ "epoch": 4.56,
1296
+ "learning_rate": 6.815768616251714e-05,
1297
+ "loss": 1.7329,
1298
+ "step": 104500
1299
+ },
1300
+ {
1301
+ "epoch": 4.58,
1302
+ "learning_rate": 6.800042774290136e-05,
1303
+ "loss": 1.7335,
1304
+ "step": 105000
1305
+ },
1306
+ {
1307
+ "epoch": 4.6,
1308
+ "learning_rate": 6.784316932328557e-05,
1309
+ "loss": 1.7347,
1310
+ "step": 105500
1311
+ },
1312
+ {
1313
+ "epoch": 4.62,
1314
+ "learning_rate": 6.768591090366978e-05,
1315
+ "loss": 1.7293,
1316
+ "step": 106000
1317
+ },
1318
+ {
1319
+ "epoch": 4.64,
1320
+ "learning_rate": 6.752896700089323e-05,
1321
+ "loss": 1.7307,
1322
+ "step": 106500
1323
+ },
1324
+ {
1325
+ "epoch": 4.66,
1326
+ "learning_rate": 6.737170858127745e-05,
1327
+ "loss": 1.7264,
1328
+ "step": 107000
1329
+ },
1330
+ {
1331
+ "epoch": 4.69,
1332
+ "learning_rate": 6.721445016166166e-05,
1333
+ "loss": 1.7294,
1334
+ "step": 107500
1335
+ },
1336
+ {
1337
+ "epoch": 4.71,
1338
+ "learning_rate": 6.705719174204587e-05,
1339
+ "loss": 1.7324,
1340
+ "step": 108000
1341
+ },
1342
+ {
1343
+ "epoch": 4.73,
1344
+ "learning_rate": 6.690024783926932e-05,
1345
+ "loss": 1.7279,
1346
+ "step": 108500
1347
+ },
1348
+ {
1349
+ "epoch": 4.75,
1350
+ "learning_rate": 6.674298941965354e-05,
1351
+ "loss": 1.7228,
1352
+ "step": 109000
1353
+ },
1354
+ {
1355
+ "epoch": 4.77,
1356
+ "learning_rate": 6.658573100003774e-05,
1357
+ "loss": 1.7248,
1358
+ "step": 109500
1359
+ },
1360
+ {
1361
+ "epoch": 4.8,
1362
+ "learning_rate": 6.642847258042196e-05,
1363
+ "loss": 1.7278,
1364
+ "step": 110000
1365
+ },
1366
+ {
1367
+ "epoch": 4.82,
1368
+ "learning_rate": 6.62715286776454e-05,
1369
+ "loss": 1.7211,
1370
+ "step": 110500
1371
+ },
1372
+ {
1373
+ "epoch": 4.84,
1374
+ "learning_rate": 6.611427025802962e-05,
1375
+ "loss": 1.7227,
1376
+ "step": 111000
1377
+ },
1378
+ {
1379
+ "epoch": 4.86,
1380
+ "learning_rate": 6.595701183841383e-05,
1381
+ "loss": 1.7181,
1382
+ "step": 111500
1383
+ },
1384
+ {
1385
+ "epoch": 4.88,
1386
+ "learning_rate": 6.579975341879805e-05,
1387
+ "loss": 1.7188,
1388
+ "step": 112000
1389
+ },
1390
+ {
1391
+ "epoch": 4.9,
1392
+ "learning_rate": 6.564280951602148e-05,
1393
+ "loss": 1.7169,
1394
+ "step": 112500
1395
+ },
1396
+ {
1397
+ "epoch": 4.93,
1398
+ "learning_rate": 6.54855510964057e-05,
1399
+ "loss": 1.7186,
1400
+ "step": 113000
1401
+ },
1402
+ {
1403
+ "epoch": 4.95,
1404
+ "learning_rate": 6.532829267678992e-05,
1405
+ "loss": 1.7199,
1406
+ "step": 113500
1407
+ },
1408
+ {
1409
+ "epoch": 4.97,
1410
+ "learning_rate": 6.517103425717414e-05,
1411
+ "loss": 1.7216,
1412
+ "step": 114000
1413
+ },
1414
+ {
1415
+ "epoch": 4.99,
1416
+ "learning_rate": 6.501409035439757e-05,
1417
+ "loss": 1.7173,
1418
+ "step": 114500
1419
+ },
1420
+ {
1421
+ "epoch": 5.0,
1422
+ "eval_accuracy": 0.6715102448572298,
1423
+ "eval_loss": 1.5880473852157593,
1424
+ "eval_runtime": 294.7287,
1425
+ "eval_samples_per_second": 603.782,
1426
+ "eval_steps_per_second": 37.736,
1427
+ "step": 114700
1428
  }
1429
  ],
1430
  "max_steps": 321160,
1431
  "num_train_epochs": 14,
1432
+ "total_flos": 8.721147965075726e+17,
1433
  "trial_name": null,
1434
  "trial_params": null
1435
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2079a756a8358ae3a535c523dc50111795059680977ed08b4832ffbac8ef5fc
3
  size 118253458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6862122e6b9f883fcb720027b7e6e1ee4be3dcf9522d8dc9475125e601d5bc53
3
  size 118253458
runs/Feb20_18-29-06_ubuntu-2004/events.out.tfevents.1676885357.ubuntu-2004.887393.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c61cb2a0b744c6498ef70bdac9f8c45f62963b3650e88b2e9078f86270e723f
3
- size 34268
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87287f5853697bd232c1d288f5654c67eba5514d42ef65d845face5a9d732001
3
+ size 41957