kmnis commited on
Commit
7e222af
·
1 Parent(s): 5836442

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be60ba8c5321f17ec4b3f2b6a50f255c7034e1a8d70ca9633be1a0b46a85dfa2
3
  size 19744138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f93f9dbc221ce7eb2f7a986f2bdf2025a0344f7d11dda174684e2f36ab62d20
3
  size 19744138
last-checkpoint/global_step2500/zero_pp_rank_0_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:646fe8a551f2bd17b48c6752c7304ae8e1fcf3843feb5be277804d085d96f19a
3
  size 6508458036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70c46d86da91d9c6ec1531e6c6d8a2dd86b8cdba8a4a17be4fa512f40a9d78cc
3
  size 6508458036
last-checkpoint/global_step2500/zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2750a56e16ea0229cf9f3e8e23006bfb9a7c0358aa28d39d92863d9ab7c0a580
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8856be1339d6883c09d0b8988ff5df8643c90607dcc44c5ee0c96e06a8012e43
3
  size 29495149
last-checkpoint/global_step2500/zero_pp_rank_1_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f99081c9def57becd4d3a4d9de52a72f89f23a102200db3216b54d510d8a1cc
3
  size 6508458036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e04d558bf61661db9da9d77c6440a377a127f675876f05975e834670b4091c
3
  size 6508458036
last-checkpoint/global_step2500/zero_pp_rank_1_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:222fe183f3eea9e389bcaacbecabaae4810fdc3d60d8ba363b689b48d252d83a
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63da6d6ff981d85554533d3c5ee6657e0556bc5f60b5bbcc79d6f4913c35e1e7
3
  size 29495149
last-checkpoint/global_step2500/zero_pp_rank_2_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e48a0f58625f48764d03261d7a14db5ba2c78ffd1d7df544cd9183710a16a3e7
3
  size 6508458036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41ed229fd122478e56a15a162611f0c5199d0fcdb8bfcef172a6ac022b363456
3
  size 6508458036
last-checkpoint/global_step2500/zero_pp_rank_2_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7156818814735141f1f945fc98b72f62886378bb94487320e68b6afc0c2abfa
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a73a2b1df375f648c11537c000eb296e6e3979a807266832bc02324fcbaedf0
3
  size 29495149
last-checkpoint/global_step2500/zero_pp_rank_3_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a2be9b2d58c8643c7fb55afc2ef6aed69d3c5534cfad815bb0c8d72e367a0e0
3
  size 6508458036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:975c77957f54ac39ad043dbf7040a8ac7af3dbcbb65b33a83e89d644f4c04209
3
  size 6508458036
last-checkpoint/global_step2500/zero_pp_rank_3_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5da05a7373a17e7c1f226a1b0cb3e78ca8f434ee0679ac3531e404c06c2f4f28
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85118289c3243a02f4b200c2b6f4344c2a014bcaa13bbf45e0d6ba2c1ab7c47d
3
  size 29495149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2000
 
1
+ global_step2500
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5907a54cddc69fe20c02c40139b18624ac2dbae5bcf42b9774c58b64c40b44c7
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c8dec4848de85a7459619a3ee9d2272ba9c96e55d70dd5489741e08b8473bb3
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c083526885eb8bf3a2ee040372afafd0bb8ab3fad4c8309d345237f500f3a1c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23bab7c930535eb3f4cd9b227c386dd1d48f7c52b2fc08ce849b1269c36bd946
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19c68b0b0876647c73f1e948eb1f7cd95a5f2a05b6f8a5d5f754e4e9a76c606d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3effdd25d25bd2c7d4880812b78b2b7de8af5816064ffe51585b24820d0691cb
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c858b96940d74592597bc7a918935a99c2fc8e9f641f494a7e5c566c09a6221
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:059a50f85f504da39009dc3cc341f1e2fbc7dd40780b00f8ebc9bed068e45c3e
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8431703204047217,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1207,13 +1207,313 @@
1207
  "learning_rate": 1e-05,
1208
  "loss": 0.6565,
1209
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1210
  }
1211
  ],
1212
  "logging_steps": 10,
1213
  "max_steps": 5000,
1214
  "num_train_epochs": 3,
1215
  "save_steps": 500,
1216
- "total_flos": 502812786032640.0,
1217
  "trial_name": null,
1218
  "trial_params": null
1219
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0539629005059021,
5
  "eval_steps": 500,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1207
  "learning_rate": 1e-05,
1208
  "loss": 0.6565,
1209
  "step": 2000
1210
+ },
1211
+ {
1212
+ "epoch": 0.85,
1213
+ "learning_rate": 1e-05,
1214
+ "loss": 0.6263,
1215
+ "step": 2010
1216
+ },
1217
+ {
1218
+ "epoch": 0.85,
1219
+ "learning_rate": 1e-05,
1220
+ "loss": 0.6665,
1221
+ "step": 2020
1222
+ },
1223
+ {
1224
+ "epoch": 0.86,
1225
+ "learning_rate": 1e-05,
1226
+ "loss": 0.7087,
1227
+ "step": 2030
1228
+ },
1229
+ {
1230
+ "epoch": 0.86,
1231
+ "learning_rate": 1e-05,
1232
+ "loss": 0.7511,
1233
+ "step": 2040
1234
+ },
1235
+ {
1236
+ "epoch": 0.86,
1237
+ "learning_rate": 1e-05,
1238
+ "loss": 0.6708,
1239
+ "step": 2050
1240
+ },
1241
+ {
1242
+ "epoch": 0.87,
1243
+ "learning_rate": 1e-05,
1244
+ "loss": 0.674,
1245
+ "step": 2060
1246
+ },
1247
+ {
1248
+ "epoch": 0.87,
1249
+ "learning_rate": 1e-05,
1250
+ "loss": 0.7629,
1251
+ "step": 2070
1252
+ },
1253
+ {
1254
+ "epoch": 0.88,
1255
+ "learning_rate": 1e-05,
1256
+ "loss": 0.623,
1257
+ "step": 2080
1258
+ },
1259
+ {
1260
+ "epoch": 0.88,
1261
+ "learning_rate": 1e-05,
1262
+ "loss": 0.6838,
1263
+ "step": 2090
1264
+ },
1265
+ {
1266
+ "epoch": 0.89,
1267
+ "learning_rate": 1e-05,
1268
+ "loss": 0.6836,
1269
+ "step": 2100
1270
+ },
1271
+ {
1272
+ "epoch": 0.89,
1273
+ "learning_rate": 1e-05,
1274
+ "loss": 0.627,
1275
+ "step": 2110
1276
+ },
1277
+ {
1278
+ "epoch": 0.89,
1279
+ "learning_rate": 1e-05,
1280
+ "loss": 0.7598,
1281
+ "step": 2120
1282
+ },
1283
+ {
1284
+ "epoch": 0.9,
1285
+ "learning_rate": 1e-05,
1286
+ "loss": 0.7417,
1287
+ "step": 2130
1288
+ },
1289
+ {
1290
+ "epoch": 0.9,
1291
+ "learning_rate": 1e-05,
1292
+ "loss": 0.6853,
1293
+ "step": 2140
1294
+ },
1295
+ {
1296
+ "epoch": 0.91,
1297
+ "learning_rate": 1e-05,
1298
+ "loss": 0.6359,
1299
+ "step": 2150
1300
+ },
1301
+ {
1302
+ "epoch": 0.91,
1303
+ "learning_rate": 1e-05,
1304
+ "loss": 0.6933,
1305
+ "step": 2160
1306
+ },
1307
+ {
1308
+ "epoch": 0.91,
1309
+ "learning_rate": 1e-05,
1310
+ "loss": 0.742,
1311
+ "step": 2170
1312
+ },
1313
+ {
1314
+ "epoch": 0.92,
1315
+ "learning_rate": 1e-05,
1316
+ "loss": 0.6966,
1317
+ "step": 2180
1318
+ },
1319
+ {
1320
+ "epoch": 0.92,
1321
+ "learning_rate": 1e-05,
1322
+ "loss": 0.6848,
1323
+ "step": 2190
1324
+ },
1325
+ {
1326
+ "epoch": 0.93,
1327
+ "learning_rate": 1e-05,
1328
+ "loss": 0.6774,
1329
+ "step": 2200
1330
+ },
1331
+ {
1332
+ "epoch": 0.93,
1333
+ "learning_rate": 1e-05,
1334
+ "loss": 0.7237,
1335
+ "step": 2210
1336
+ },
1337
+ {
1338
+ "epoch": 0.94,
1339
+ "learning_rate": 1e-05,
1340
+ "loss": 0.7163,
1341
+ "step": 2220
1342
+ },
1343
+ {
1344
+ "epoch": 0.94,
1345
+ "learning_rate": 1e-05,
1346
+ "loss": 0.6856,
1347
+ "step": 2230
1348
+ },
1349
+ {
1350
+ "epoch": 0.94,
1351
+ "learning_rate": 1e-05,
1352
+ "loss": 0.7414,
1353
+ "step": 2240
1354
+ },
1355
+ {
1356
+ "epoch": 0.95,
1357
+ "learning_rate": 1e-05,
1358
+ "loss": 0.6774,
1359
+ "step": 2250
1360
+ },
1361
+ {
1362
+ "epoch": 0.95,
1363
+ "learning_rate": 1e-05,
1364
+ "loss": 0.6008,
1365
+ "step": 2260
1366
+ },
1367
+ {
1368
+ "epoch": 0.96,
1369
+ "learning_rate": 1e-05,
1370
+ "loss": 0.7281,
1371
+ "step": 2270
1372
+ },
1373
+ {
1374
+ "epoch": 0.96,
1375
+ "learning_rate": 1e-05,
1376
+ "loss": 0.7157,
1377
+ "step": 2280
1378
+ },
1379
+ {
1380
+ "epoch": 0.97,
1381
+ "learning_rate": 1e-05,
1382
+ "loss": 0.6478,
1383
+ "step": 2290
1384
+ },
1385
+ {
1386
+ "epoch": 0.97,
1387
+ "learning_rate": 1e-05,
1388
+ "loss": 0.7042,
1389
+ "step": 2300
1390
+ },
1391
+ {
1392
+ "epoch": 0.97,
1393
+ "learning_rate": 1e-05,
1394
+ "loss": 0.6797,
1395
+ "step": 2310
1396
+ },
1397
+ {
1398
+ "epoch": 0.98,
1399
+ "learning_rate": 1e-05,
1400
+ "loss": 0.7048,
1401
+ "step": 2320
1402
+ },
1403
+ {
1404
+ "epoch": 0.98,
1405
+ "learning_rate": 1e-05,
1406
+ "loss": 0.6199,
1407
+ "step": 2330
1408
+ },
1409
+ {
1410
+ "epoch": 0.99,
1411
+ "learning_rate": 1e-05,
1412
+ "loss": 0.617,
1413
+ "step": 2340
1414
+ },
1415
+ {
1416
+ "epoch": 0.99,
1417
+ "learning_rate": 1e-05,
1418
+ "loss": 0.658,
1419
+ "step": 2350
1420
+ },
1421
+ {
1422
+ "epoch": 0.99,
1423
+ "learning_rate": 1e-05,
1424
+ "loss": 0.7155,
1425
+ "step": 2360
1426
+ },
1427
+ {
1428
+ "epoch": 1.0,
1429
+ "learning_rate": 1e-05,
1430
+ "loss": 0.6392,
1431
+ "step": 2370
1432
+ },
1433
+ {
1434
+ "epoch": 1.0,
1435
+ "learning_rate": 1e-05,
1436
+ "loss": 0.6793,
1437
+ "step": 2380
1438
+ },
1439
+ {
1440
+ "epoch": 1.01,
1441
+ "learning_rate": 1e-05,
1442
+ "loss": 0.6842,
1443
+ "step": 2390
1444
+ },
1445
+ {
1446
+ "epoch": 1.01,
1447
+ "learning_rate": 1e-05,
1448
+ "loss": 0.6615,
1449
+ "step": 2400
1450
+ },
1451
+ {
1452
+ "epoch": 1.02,
1453
+ "learning_rate": 1e-05,
1454
+ "loss": 0.7222,
1455
+ "step": 2410
1456
+ },
1457
+ {
1458
+ "epoch": 1.02,
1459
+ "learning_rate": 1e-05,
1460
+ "loss": 0.7035,
1461
+ "step": 2420
1462
+ },
1463
+ {
1464
+ "epoch": 1.02,
1465
+ "learning_rate": 1e-05,
1466
+ "loss": 0.6866,
1467
+ "step": 2430
1468
+ },
1469
+ {
1470
+ "epoch": 1.03,
1471
+ "learning_rate": 1e-05,
1472
+ "loss": 0.6694,
1473
+ "step": 2440
1474
+ },
1475
+ {
1476
+ "epoch": 1.03,
1477
+ "learning_rate": 1e-05,
1478
+ "loss": 0.6687,
1479
+ "step": 2450
1480
+ },
1481
+ {
1482
+ "epoch": 1.04,
1483
+ "learning_rate": 1e-05,
1484
+ "loss": 0.6515,
1485
+ "step": 2460
1486
+ },
1487
+ {
1488
+ "epoch": 1.04,
1489
+ "learning_rate": 1e-05,
1490
+ "loss": 0.66,
1491
+ "step": 2470
1492
+ },
1493
+ {
1494
+ "epoch": 1.05,
1495
+ "learning_rate": 1e-05,
1496
+ "loss": 0.6523,
1497
+ "step": 2480
1498
+ },
1499
+ {
1500
+ "epoch": 1.05,
1501
+ "learning_rate": 1e-05,
1502
+ "loss": 0.676,
1503
+ "step": 2490
1504
+ },
1505
+ {
1506
+ "epoch": 1.05,
1507
+ "learning_rate": 1e-05,
1508
+ "loss": 0.6396,
1509
+ "step": 2500
1510
  }
1511
  ],
1512
  "logging_steps": 10,
1513
  "max_steps": 5000,
1514
  "num_train_epochs": 3,
1515
  "save_steps": 500,
1516
+ "total_flos": 628704820592640.0,
1517
  "trial_name": null,
1518
  "trial_params": null
1519
  }