kmnis commited on
Commit
fa5a6d7
·
1 Parent(s): 29b8318

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f93f9dbc221ce7eb2f7a986f2bdf2025a0344f7d11dda174684e2f36ab62d20
3
  size 19744138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09ca6bc9ed7563f7804e6870dfb0262ec9d7ef1b9b317e3e50537421dc41436e
3
  size 19744138
last-checkpoint/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cce0927e394639eeb0bd610f31ee8cbf9503b8c13f4532a8304ee5bfe2b27843
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a72c585f47068754b88db4a7995ba4dda7962a48c54c29f27de81a59ada2bd9c
3
  size 29495149
last-checkpoint/global_step3000/zero_pp_rank_1_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31d3cc26103d70e2131559ace58bfb81ba66514571d1e4bd5b8703dc09e5bc0a
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1e9ce1f3afc5a389822bfa65228bb38d3496b2c6f17016b230db6baa735257
3
  size 29495149
last-checkpoint/global_step3000/zero_pp_rank_2_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:655f27fc3a28657447c812500eb050ab2234ef9e0301ee6a3ec668f039310e13
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cb471591296d9209f005f7ca04a794c791716dcbe6849722ca60fd780b8e4f5
3
  size 29495149
last-checkpoint/global_step3000/zero_pp_rank_3_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f53189fdda0e03570b86da54fdb10905918b600bf0aa2aca3aeba7b41048090
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03473b2ecc4164b11c4193e945649be0af72e76a8a8d73af30bdc3a72d1e9679
3
  size 29495149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2500
 
1
+ global_step3000
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c8dec4848de85a7459619a3ee9d2272ba9c96e55d70dd5489741e08b8473bb3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d887fde7ddbc72491d6886015e2826b2f67780c0d1c9fd59e1d88ce5fc09e31b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23bab7c930535eb3f4cd9b227c386dd1d48f7c52b2fc08ce849b1269c36bd946
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7edde062ca170b1a53cd6353a93ce1f9782a4edeac65ea031afcf5aea5323ca4
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3effdd25d25bd2c7d4880812b78b2b7de8af5816064ffe51585b24820d0691cb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:559285b660bfd3d01043c44ad11fa1111ae6e093b1d70a9a10b4160231b87936
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:059a50f85f504da39009dc3cc341f1e2fbc7dd40780b00f8ebc9bed068e45c3e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac86af3890b7d390a131628964790e2daa6e964a408d352e767975a2f58c75c
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0539629005059021,
5
  "eval_steps": 500,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1507,13 +1507,313 @@
1507
  "learning_rate": 1e-05,
1508
  "loss": 0.6396,
1509
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1510
  }
1511
  ],
1512
  "logging_steps": 10,
1513
  "max_steps": 5000,
1514
  "num_train_epochs": 3,
1515
  "save_steps": 500,
1516
- "total_flos": 628704820592640.0,
1517
  "trial_name": null,
1518
  "trial_params": null
1519
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2647554806070826,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1507
  "learning_rate": 1e-05,
1508
  "loss": 0.6396,
1509
  "step": 2500
1510
+ },
1511
+ {
1512
+ "epoch": 1.06,
1513
+ "learning_rate": 1e-05,
1514
+ "loss": 0.6692,
1515
+ "step": 2510
1516
+ },
1517
+ {
1518
+ "epoch": 1.06,
1519
+ "learning_rate": 1e-05,
1520
+ "loss": 0.6417,
1521
+ "step": 2520
1522
+ },
1523
+ {
1524
+ "epoch": 1.07,
1525
+ "learning_rate": 1e-05,
1526
+ "loss": 0.7044,
1527
+ "step": 2530
1528
+ },
1529
+ {
1530
+ "epoch": 1.07,
1531
+ "learning_rate": 1e-05,
1532
+ "loss": 0.7393,
1533
+ "step": 2540
1534
+ },
1535
+ {
1536
+ "epoch": 1.08,
1537
+ "learning_rate": 1e-05,
1538
+ "loss": 0.6352,
1539
+ "step": 2550
1540
+ },
1541
+ {
1542
+ "epoch": 1.08,
1543
+ "learning_rate": 1e-05,
1544
+ "loss": 0.6468,
1545
+ "step": 2560
1546
+ },
1547
+ {
1548
+ "epoch": 1.08,
1549
+ "learning_rate": 1e-05,
1550
+ "loss": 0.6693,
1551
+ "step": 2570
1552
+ },
1553
+ {
1554
+ "epoch": 1.09,
1555
+ "learning_rate": 1e-05,
1556
+ "loss": 0.675,
1557
+ "step": 2580
1558
+ },
1559
+ {
1560
+ "epoch": 1.09,
1561
+ "learning_rate": 1e-05,
1562
+ "loss": 0.6965,
1563
+ "step": 2590
1564
+ },
1565
+ {
1566
+ "epoch": 1.1,
1567
+ "learning_rate": 1e-05,
1568
+ "loss": 0.5961,
1569
+ "step": 2600
1570
+ },
1571
+ {
1572
+ "epoch": 1.1,
1573
+ "learning_rate": 1e-05,
1574
+ "loss": 0.7622,
1575
+ "step": 2610
1576
+ },
1577
+ {
1578
+ "epoch": 1.1,
1579
+ "learning_rate": 1e-05,
1580
+ "loss": 0.6352,
1581
+ "step": 2620
1582
+ },
1583
+ {
1584
+ "epoch": 1.11,
1585
+ "learning_rate": 1e-05,
1586
+ "loss": 0.6257,
1587
+ "step": 2630
1588
+ },
1589
+ {
1590
+ "epoch": 1.11,
1591
+ "learning_rate": 1e-05,
1592
+ "loss": 0.713,
1593
+ "step": 2640
1594
+ },
1595
+ {
1596
+ "epoch": 1.12,
1597
+ "learning_rate": 1e-05,
1598
+ "loss": 0.6418,
1599
+ "step": 2650
1600
+ },
1601
+ {
1602
+ "epoch": 1.12,
1603
+ "learning_rate": 1e-05,
1604
+ "loss": 0.7266,
1605
+ "step": 2660
1606
+ },
1607
+ {
1608
+ "epoch": 1.13,
1609
+ "learning_rate": 1e-05,
1610
+ "loss": 0.6281,
1611
+ "step": 2670
1612
+ },
1613
+ {
1614
+ "epoch": 1.13,
1615
+ "learning_rate": 1e-05,
1616
+ "loss": 0.6832,
1617
+ "step": 2680
1618
+ },
1619
+ {
1620
+ "epoch": 1.13,
1621
+ "learning_rate": 1e-05,
1622
+ "loss": 0.6341,
1623
+ "step": 2690
1624
+ },
1625
+ {
1626
+ "epoch": 1.14,
1627
+ "learning_rate": 1e-05,
1628
+ "loss": 0.6785,
1629
+ "step": 2700
1630
+ },
1631
+ {
1632
+ "epoch": 1.14,
1633
+ "learning_rate": 1e-05,
1634
+ "loss": 0.6371,
1635
+ "step": 2710
1636
+ },
1637
+ {
1638
+ "epoch": 1.15,
1639
+ "learning_rate": 1e-05,
1640
+ "loss": 0.7453,
1641
+ "step": 2720
1642
+ },
1643
+ {
1644
+ "epoch": 1.15,
1645
+ "learning_rate": 1e-05,
1646
+ "loss": 0.6299,
1647
+ "step": 2730
1648
+ },
1649
+ {
1650
+ "epoch": 1.16,
1651
+ "learning_rate": 1e-05,
1652
+ "loss": 0.7199,
1653
+ "step": 2740
1654
+ },
1655
+ {
1656
+ "epoch": 1.16,
1657
+ "learning_rate": 1e-05,
1658
+ "loss": 0.6664,
1659
+ "step": 2750
1660
+ },
1661
+ {
1662
+ "epoch": 1.16,
1663
+ "learning_rate": 1e-05,
1664
+ "loss": 0.5977,
1665
+ "step": 2760
1666
+ },
1667
+ {
1668
+ "epoch": 1.17,
1669
+ "learning_rate": 1e-05,
1670
+ "loss": 0.6706,
1671
+ "step": 2770
1672
+ },
1673
+ {
1674
+ "epoch": 1.17,
1675
+ "learning_rate": 1e-05,
1676
+ "loss": 0.6715,
1677
+ "step": 2780
1678
+ },
1679
+ {
1680
+ "epoch": 1.18,
1681
+ "learning_rate": 1e-05,
1682
+ "loss": 0.6268,
1683
+ "step": 2790
1684
+ },
1685
+ {
1686
+ "epoch": 1.18,
1687
+ "learning_rate": 1e-05,
1688
+ "loss": 0.6117,
1689
+ "step": 2800
1690
+ },
1691
+ {
1692
+ "epoch": 1.18,
1693
+ "learning_rate": 1e-05,
1694
+ "loss": 0.6529,
1695
+ "step": 2810
1696
+ },
1697
+ {
1698
+ "epoch": 1.19,
1699
+ "learning_rate": 1e-05,
1700
+ "loss": 0.6977,
1701
+ "step": 2820
1702
+ },
1703
+ {
1704
+ "epoch": 1.19,
1705
+ "learning_rate": 1e-05,
1706
+ "loss": 0.6732,
1707
+ "step": 2830
1708
+ },
1709
+ {
1710
+ "epoch": 1.2,
1711
+ "learning_rate": 1e-05,
1712
+ "loss": 0.6915,
1713
+ "step": 2840
1714
+ },
1715
+ {
1716
+ "epoch": 1.2,
1717
+ "learning_rate": 1e-05,
1718
+ "loss": 0.7073,
1719
+ "step": 2850
1720
+ },
1721
+ {
1722
+ "epoch": 1.21,
1723
+ "learning_rate": 1e-05,
1724
+ "loss": 0.6972,
1725
+ "step": 2860
1726
+ },
1727
+ {
1728
+ "epoch": 1.21,
1729
+ "learning_rate": 1e-05,
1730
+ "loss": 0.6559,
1731
+ "step": 2870
1732
+ },
1733
+ {
1734
+ "epoch": 1.21,
1735
+ "learning_rate": 1e-05,
1736
+ "loss": 0.667,
1737
+ "step": 2880
1738
+ },
1739
+ {
1740
+ "epoch": 1.22,
1741
+ "learning_rate": 1e-05,
1742
+ "loss": 0.6715,
1743
+ "step": 2890
1744
+ },
1745
+ {
1746
+ "epoch": 1.22,
1747
+ "learning_rate": 1e-05,
1748
+ "loss": 0.6339,
1749
+ "step": 2900
1750
+ },
1751
+ {
1752
+ "epoch": 1.23,
1753
+ "learning_rate": 1e-05,
1754
+ "loss": 0.6251,
1755
+ "step": 2910
1756
+ },
1757
+ {
1758
+ "epoch": 1.23,
1759
+ "learning_rate": 1e-05,
1760
+ "loss": 0.6384,
1761
+ "step": 2920
1762
+ },
1763
+ {
1764
+ "epoch": 1.24,
1765
+ "learning_rate": 1e-05,
1766
+ "loss": 0.6075,
1767
+ "step": 2930
1768
+ },
1769
+ {
1770
+ "epoch": 1.24,
1771
+ "learning_rate": 1e-05,
1772
+ "loss": 0.6598,
1773
+ "step": 2940
1774
+ },
1775
+ {
1776
+ "epoch": 1.24,
1777
+ "learning_rate": 1e-05,
1778
+ "loss": 0.6625,
1779
+ "step": 2950
1780
+ },
1781
+ {
1782
+ "epoch": 1.25,
1783
+ "learning_rate": 1e-05,
1784
+ "loss": 0.6064,
1785
+ "step": 2960
1786
+ },
1787
+ {
1788
+ "epoch": 1.25,
1789
+ "learning_rate": 1e-05,
1790
+ "loss": 0.6928,
1791
+ "step": 2970
1792
+ },
1793
+ {
1794
+ "epoch": 1.26,
1795
+ "learning_rate": 1e-05,
1796
+ "loss": 0.7012,
1797
+ "step": 2980
1798
+ },
1799
+ {
1800
+ "epoch": 1.26,
1801
+ "learning_rate": 1e-05,
1802
+ "loss": 0.6488,
1803
+ "step": 2990
1804
+ },
1805
+ {
1806
+ "epoch": 1.26,
1807
+ "learning_rate": 1e-05,
1808
+ "loss": 0.6872,
1809
+ "step": 3000
1810
  }
1811
  ],
1812
  "logging_steps": 10,
1813
  "max_steps": 5000,
1814
  "num_train_epochs": 3,
1815
  "save_steps": 500,
1816
+ "total_flos": 754093287014400.0,
1817
  "trial_name": null,
1818
  "trial_params": null
1819
  }