DuongTrongChi commited on
Commit
8b79474
1 Parent(s): cd079f7

Training in progress, step 233, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:473f4b3ff0b219aa3d4708c39fab335413427789124d1cc01e586e727315cff4
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afd25c170355ea632cfa5da978f90f702ff16dbc4d51afcebf4989ad9d0f333b
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ec22a1dfeb40ec71635f49ced12c4198b8107b4b216dad54278bd643cdb721c
3
  size 50675156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73e0747b869366afbdebd37788920b02982df01eccc71e682aec61e6a7645b33
3
  size 50675156
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7650503afdff505462325f6f430e903bb44a778f9119236884da4fcf462c9e5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c9368a0cddac000b7996e7cc550ed94e1405683912f0e0601527c458e30db8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.306821294858917,
5
  "eval_steps": 500,
6
- "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1477,6 +1477,167 @@
1477
  "learning_rate": 1.623287671232877e-05,
1478
  "loss": 1.2169,
1479
  "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1480
  }
1481
  ],
1482
  "logging_steps": 1,
@@ -1496,7 +1657,7 @@
1496
  "attributes": {}
1497
  }
1498
  },
1499
- "total_flos": 2.3556959221775155e+17,
1500
  "train_batch_size": 4,
1501
  "trial_name": null,
1502
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3404255319148936,
5
  "eval_steps": 500,
6
+ "global_step": 233,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1477
  "learning_rate": 1.623287671232877e-05,
1478
  "loss": 1.2169,
1479
  "step": 210
1480
+ },
1481
+ {
1482
+ "epoch": 0.3082823486439595,
1483
+ "grad_norm": 0.11075271666049957,
1484
+ "learning_rate": 1.61986301369863e-05,
1485
+ "loss": 1.2867,
1486
+ "step": 211
1487
+ },
1488
+ {
1489
+ "epoch": 0.3097434024290019,
1490
+ "grad_norm": 0.10098811239004135,
1491
+ "learning_rate": 1.6164383561643838e-05,
1492
+ "loss": 1.2625,
1493
+ "step": 212
1494
+ },
1495
+ {
1496
+ "epoch": 0.31120445621404436,
1497
+ "grad_norm": 0.10328993201255798,
1498
+ "learning_rate": 1.613013698630137e-05,
1499
+ "loss": 1.2121,
1500
+ "step": 213
1501
+ },
1502
+ {
1503
+ "epoch": 0.3126655099990868,
1504
+ "grad_norm": 0.11792083084583282,
1505
+ "learning_rate": 1.6095890410958904e-05,
1506
+ "loss": 1.2316,
1507
+ "step": 214
1508
+ },
1509
+ {
1510
+ "epoch": 0.3141265637841293,
1511
+ "grad_norm": 0.1143653616309166,
1512
+ "learning_rate": 1.606164383561644e-05,
1513
+ "loss": 1.2084,
1514
+ "step": 215
1515
+ },
1516
+ {
1517
+ "epoch": 0.31558761756917175,
1518
+ "grad_norm": 0.10241192579269409,
1519
+ "learning_rate": 1.6027397260273974e-05,
1520
+ "loss": 1.2924,
1521
+ "step": 216
1522
+ },
1523
+ {
1524
+ "epoch": 0.3170486713542142,
1525
+ "grad_norm": 0.09764024615287781,
1526
+ "learning_rate": 1.599315068493151e-05,
1527
+ "loss": 1.2387,
1528
+ "step": 217
1529
+ },
1530
+ {
1531
+ "epoch": 0.3185097251392567,
1532
+ "grad_norm": 0.0964062437415123,
1533
+ "learning_rate": 1.5958904109589043e-05,
1534
+ "loss": 1.2143,
1535
+ "step": 218
1536
+ },
1537
+ {
1538
+ "epoch": 0.31997077892429915,
1539
+ "grad_norm": 0.10618474334478378,
1540
+ "learning_rate": 1.5924657534246576e-05,
1541
+ "loss": 1.1912,
1542
+ "step": 219
1543
+ },
1544
+ {
1545
+ "epoch": 0.3214318327093416,
1546
+ "grad_norm": 0.10085848718881607,
1547
+ "learning_rate": 1.589041095890411e-05,
1548
+ "loss": 1.1691,
1549
+ "step": 220
1550
+ },
1551
+ {
1552
+ "epoch": 0.3228928864943841,
1553
+ "grad_norm": 0.10805616527795792,
1554
+ "learning_rate": 1.5856164383561646e-05,
1555
+ "loss": 1.2028,
1556
+ "step": 221
1557
+ },
1558
+ {
1559
+ "epoch": 0.32435394027942654,
1560
+ "grad_norm": 0.10075750201940536,
1561
+ "learning_rate": 1.5821917808219182e-05,
1562
+ "loss": 1.2286,
1563
+ "step": 222
1564
+ },
1565
+ {
1566
+ "epoch": 0.325814994064469,
1567
+ "grad_norm": 0.09440125524997711,
1568
+ "learning_rate": 1.5787671232876715e-05,
1569
+ "loss": 1.24,
1570
+ "step": 223
1571
+ },
1572
+ {
1573
+ "epoch": 0.3272760478495115,
1574
+ "grad_norm": 0.09914068877696991,
1575
+ "learning_rate": 1.5753424657534248e-05,
1576
+ "loss": 1.2225,
1577
+ "step": 224
1578
+ },
1579
+ {
1580
+ "epoch": 0.32873710163455394,
1581
+ "grad_norm": 0.10779386013746262,
1582
+ "learning_rate": 1.571917808219178e-05,
1583
+ "loss": 1.1821,
1584
+ "step": 225
1585
+ },
1586
+ {
1587
+ "epoch": 0.3301981554195964,
1588
+ "grad_norm": 0.09676062315702438,
1589
+ "learning_rate": 1.5684931506849318e-05,
1590
+ "loss": 1.2975,
1591
+ "step": 226
1592
+ },
1593
+ {
1594
+ "epoch": 0.33165920920463887,
1595
+ "grad_norm": 0.09622418135404587,
1596
+ "learning_rate": 1.565068493150685e-05,
1597
+ "loss": 1.2935,
1598
+ "step": 227
1599
+ },
1600
+ {
1601
+ "epoch": 0.3331202629896813,
1602
+ "grad_norm": 0.10332711786031723,
1603
+ "learning_rate": 1.5616438356164384e-05,
1604
+ "loss": 1.2178,
1605
+ "step": 228
1606
+ },
1607
+ {
1608
+ "epoch": 0.33458131677472375,
1609
+ "grad_norm": 0.10794605314731598,
1610
+ "learning_rate": 1.5582191780821917e-05,
1611
+ "loss": 1.1103,
1612
+ "step": 229
1613
+ },
1614
+ {
1615
+ "epoch": 0.3360423705597662,
1616
+ "grad_norm": 0.10352062433958054,
1617
+ "learning_rate": 1.5547945205479453e-05,
1618
+ "loss": 1.2187,
1619
+ "step": 230
1620
+ },
1621
+ {
1622
+ "epoch": 0.3375034243448087,
1623
+ "grad_norm": 0.10264533013105392,
1624
+ "learning_rate": 1.551369863013699e-05,
1625
+ "loss": 1.3449,
1626
+ "step": 231
1627
+ },
1628
+ {
1629
+ "epoch": 0.33896447812985114,
1630
+ "grad_norm": 0.10094834864139557,
1631
+ "learning_rate": 1.5479452054794523e-05,
1632
+ "loss": 1.2042,
1633
+ "step": 232
1634
+ },
1635
+ {
1636
+ "epoch": 0.3404255319148936,
1637
+ "grad_norm": 0.09931448101997375,
1638
+ "learning_rate": 1.5445205479452056e-05,
1639
+ "loss": 1.1876,
1640
+ "step": 233
1641
  }
1642
  ],
1643
  "logging_steps": 1,
 
1657
  "attributes": {}
1658
  }
1659
  },
1660
+ "total_flos": 2.619657311583191e+17,
1661
  "train_batch_size": 4,
1662
  "trial_name": null,
1663
  "trial_params": null