DuongTrongChi commited on
Commit
7df635a
1 Parent(s): 04f4ee7

Training in progress, step 206, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d08662b031cdd86ed774881debe128c873e00074cf7929595f42b6a63d84048
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9653531852ec6119db0beb46f48bfa17c88968f05b0e674dc503a553f84fe4e3
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa10ee462c7e49a82fafb47e9d4304eed05d8aaf2fa6327757fcf5a538adb758
3
  size 50675156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80dafb883e7ca8362cc0eba677e7edbf34ec8bc9e8592483d28a1fed39eb72cd
3
  size 50675156
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ac39c24740490f5e39e7ce5934c2a2903951fd3baae22c89e765d403647b6d1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcbc2526bc157d1d1697d4e94eb6c17525855f6b21b0575b373b92dfaeff6f39
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.28198338051319516,
5
  "eval_steps": 500,
6
- "global_step": 193,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1358,6 +1358,97 @@
1358
  "learning_rate": 1.6815068493150686e-05,
1359
  "loss": 1.2553,
1360
  "step": 193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1361
  }
1362
  ],
1363
  "logging_steps": 1,
@@ -1377,7 +1468,7 @@
1377
  "attributes": {}
1378
  }
1379
  },
1380
- "total_flos": 2.1621842547974554e+17,
1381
  "train_batch_size": 4,
1382
  "trial_name": null,
1383
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.30097707971874715,
5
  "eval_steps": 500,
6
+ "global_step": 206,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1358
  "learning_rate": 1.6815068493150686e-05,
1359
  "loss": 1.2553,
1360
  "step": 193
1361
+ },
1362
+ {
1363
+ "epoch": 0.2834444342982376,
1364
+ "grad_norm": 0.1810723841190338,
1365
+ "learning_rate": 1.678082191780822e-05,
1366
+ "loss": 1.2121,
1367
+ "step": 194
1368
+ },
1369
+ {
1370
+ "epoch": 0.2849054880832801,
1371
+ "grad_norm": 0.16387374699115753,
1372
+ "learning_rate": 1.6746575342465753e-05,
1373
+ "loss": 1.1702,
1374
+ "step": 195
1375
+ },
1376
+ {
1377
+ "epoch": 0.28636654186832255,
1378
+ "grad_norm": 0.1537161022424698,
1379
+ "learning_rate": 1.671232876712329e-05,
1380
+ "loss": 1.1865,
1381
+ "step": 196
1382
+ },
1383
+ {
1384
+ "epoch": 0.28782759565336496,
1385
+ "grad_norm": 0.13615332543849945,
1386
+ "learning_rate": 1.6678082191780822e-05,
1387
+ "loss": 1.2578,
1388
+ "step": 197
1389
+ },
1390
+ {
1391
+ "epoch": 0.28928864943840743,
1392
+ "grad_norm": 0.13642196357250214,
1393
+ "learning_rate": 1.664383561643836e-05,
1394
+ "loss": 1.1813,
1395
+ "step": 198
1396
+ },
1397
+ {
1398
+ "epoch": 0.2907497032234499,
1399
+ "grad_norm": 0.1444728523492813,
1400
+ "learning_rate": 1.660958904109589e-05,
1401
+ "loss": 1.2815,
1402
+ "step": 199
1403
+ },
1404
+ {
1405
+ "epoch": 0.29221075700849236,
1406
+ "grad_norm": 0.13030050694942474,
1407
+ "learning_rate": 1.6575342465753425e-05,
1408
+ "loss": 1.2848,
1409
+ "step": 200
1410
+ },
1411
+ {
1412
+ "epoch": 0.2936718107935348,
1413
+ "grad_norm": 0.13471786677837372,
1414
+ "learning_rate": 1.654109589041096e-05,
1415
+ "loss": 1.1634,
1416
+ "step": 201
1417
+ },
1418
+ {
1419
+ "epoch": 0.2951328645785773,
1420
+ "grad_norm": 0.11596754193305969,
1421
+ "learning_rate": 1.6506849315068494e-05,
1422
+ "loss": 1.2522,
1423
+ "step": 202
1424
+ },
1425
+ {
1426
+ "epoch": 0.29659391836361976,
1427
+ "grad_norm": 0.11978977173566818,
1428
+ "learning_rate": 1.647260273972603e-05,
1429
+ "loss": 1.2585,
1430
+ "step": 203
1431
+ },
1432
+ {
1433
+ "epoch": 0.2980549721486622,
1434
+ "grad_norm": 0.11857204139232635,
1435
+ "learning_rate": 1.6438356164383563e-05,
1436
+ "loss": 1.1372,
1437
+ "step": 204
1438
+ },
1439
+ {
1440
+ "epoch": 0.2995160259337047,
1441
+ "grad_norm": 0.12098690867424011,
1442
+ "learning_rate": 1.6404109589041096e-05,
1443
+ "loss": 1.1391,
1444
+ "step": 205
1445
+ },
1446
+ {
1447
+ "epoch": 0.30097707971874715,
1448
+ "grad_norm": 0.12197306752204895,
1449
+ "learning_rate": 1.6369863013698633e-05,
1450
+ "loss": 1.2073,
1451
+ "step": 206
1452
  }
1453
  ],
1454
  "logging_steps": 1,
 
1468
  "attributes": {}
1469
  }
1470
  },
1471
+ "total_flos": 2.3075689748657357e+17,
1472
  "train_batch_size": 4,
1473
  "trial_name": null,
1474
  "trial_params": null