kmnis commited on
Commit
e687f9c
·
1 Parent(s): ee81d38

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0434f5977741abb3d5a00fe24c7d31391987cda5332ba573b25aa52bbe86d617
3
  size 19744138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b81209ae3e3142691e6cbf9afb802b8896c391877a723ce33eab916d0afb9edc
3
  size 19744138
last-checkpoint/global_step3000/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:069b14fe37fa6530b30e52fa4da09f794b5ef5d65dff7d46d2f699520301d6e2
3
+ size 6508458036
last-checkpoint/global_step3000/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cce0927e394639eeb0bd610f31ee8cbf9503b8c13f4532a8304ee5bfe2b27843
3
+ size 29495149
last-checkpoint/global_step3000/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ffb2ecacac48d97fbeeba2b130befff541bbf45a4a2a197c0ec646b2eeafa7
3
+ size 6508458036
last-checkpoint/global_step3000/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31d3cc26103d70e2131559ace58bfb81ba66514571d1e4bd5b8703dc09e5bc0a
3
+ size 29495149
last-checkpoint/global_step3000/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2151925d5eb9ad3c55e3ece5285cd9a788486265a47ae1114426cc44b9a8ee
3
+ size 6508458036
last-checkpoint/global_step3000/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:655f27fc3a28657447c812500eb050ab2234ef9e0301ee6a3ec668f039310e13
3
+ size 29495149
last-checkpoint/global_step3000/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7992df307533d0bfdf23be800a117e8b3efc88c9da0ae34a0aa4c2113dcfaba
3
+ size 6508458036
last-checkpoint/global_step3000/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f53189fdda0e03570b86da54fdb10905918b600bf0aa2aca3aeba7b41048090
3
+ size 29495149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2500
 
1
+ global_step3000
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c8dec4848de85a7459619a3ee9d2272ba9c96e55d70dd5489741e08b8473bb3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d887fde7ddbc72491d6886015e2826b2f67780c0d1c9fd59e1d88ce5fc09e31b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23bab7c930535eb3f4cd9b227c386dd1d48f7c52b2fc08ce849b1269c36bd946
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7edde062ca170b1a53cd6353a93ce1f9782a4edeac65ea031afcf5aea5323ca4
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3effdd25d25bd2c7d4880812b78b2b7de8af5816064ffe51585b24820d0691cb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:559285b660bfd3d01043c44ad11fa1111ae6e093b1d70a9a10b4160231b87936
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:059a50f85f504da39009dc3cc341f1e2fbc7dd40780b00f8ebc9bed068e45c3e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac86af3890b7d390a131628964790e2daa6e964a408d352e767975a2f58c75c
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0539629005059021,
5
  "eval_steps": 500,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1507,13 +1507,313 @@
1507
  "learning_rate": 1e-05,
1508
  "loss": 0.6425,
1509
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1510
  }
1511
  ],
1512
  "logging_steps": 10,
1513
  "max_steps": 10000,
1514
  "num_train_epochs": 5,
1515
  "save_steps": 500,
1516
- "total_flos": 628453036523520.0,
1517
  "trial_name": null,
1518
  "trial_params": null
1519
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2647554806070826,
5
  "eval_steps": 500,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1507
  "learning_rate": 1e-05,
1508
  "loss": 0.6425,
1509
  "step": 2500
1510
+ },
1511
+ {
1512
+ "epoch": 1.06,
1513
+ "learning_rate": 1e-05,
1514
+ "loss": 0.6704,
1515
+ "step": 2510
1516
+ },
1517
+ {
1518
+ "epoch": 1.06,
1519
+ "learning_rate": 1e-05,
1520
+ "loss": 0.6431,
1521
+ "step": 2520
1522
+ },
1523
+ {
1524
+ "epoch": 1.07,
1525
+ "learning_rate": 1e-05,
1526
+ "loss": 0.702,
1527
+ "step": 2530
1528
+ },
1529
+ {
1530
+ "epoch": 1.07,
1531
+ "learning_rate": 1e-05,
1532
+ "loss": 0.7384,
1533
+ "step": 2540
1534
+ },
1535
+ {
1536
+ "epoch": 1.08,
1537
+ "learning_rate": 1e-05,
1538
+ "loss": 0.6324,
1539
+ "step": 2550
1540
+ },
1541
+ {
1542
+ "epoch": 1.08,
1543
+ "learning_rate": 1e-05,
1544
+ "loss": 0.647,
1545
+ "step": 2560
1546
+ },
1547
+ {
1548
+ "epoch": 1.08,
1549
+ "learning_rate": 1e-05,
1550
+ "loss": 0.6693,
1551
+ "step": 2570
1552
+ },
1553
+ {
1554
+ "epoch": 1.09,
1555
+ "learning_rate": 1e-05,
1556
+ "loss": 0.6746,
1557
+ "step": 2580
1558
+ },
1559
+ {
1560
+ "epoch": 1.09,
1561
+ "learning_rate": 1e-05,
1562
+ "loss": 0.6975,
1563
+ "step": 2590
1564
+ },
1565
+ {
1566
+ "epoch": 1.1,
1567
+ "learning_rate": 1e-05,
1568
+ "loss": 0.594,
1569
+ "step": 2600
1570
+ },
1571
+ {
1572
+ "epoch": 1.1,
1573
+ "learning_rate": 1e-05,
1574
+ "loss": 0.7621,
1575
+ "step": 2610
1576
+ },
1577
+ {
1578
+ "epoch": 1.1,
1579
+ "learning_rate": 1e-05,
1580
+ "loss": 0.6325,
1581
+ "step": 2620
1582
+ },
1583
+ {
1584
+ "epoch": 1.11,
1585
+ "learning_rate": 1e-05,
1586
+ "loss": 0.6287,
1587
+ "step": 2630
1588
+ },
1589
+ {
1590
+ "epoch": 1.11,
1591
+ "learning_rate": 1e-05,
1592
+ "loss": 0.7122,
1593
+ "step": 2640
1594
+ },
1595
+ {
1596
+ "epoch": 1.12,
1597
+ "learning_rate": 1e-05,
1598
+ "loss": 0.6411,
1599
+ "step": 2650
1600
+ },
1601
+ {
1602
+ "epoch": 1.12,
1603
+ "learning_rate": 1e-05,
1604
+ "loss": 0.7214,
1605
+ "step": 2660
1606
+ },
1607
+ {
1608
+ "epoch": 1.13,
1609
+ "learning_rate": 1e-05,
1610
+ "loss": 0.6269,
1611
+ "step": 2670
1612
+ },
1613
+ {
1614
+ "epoch": 1.13,
1615
+ "learning_rate": 1e-05,
1616
+ "loss": 0.6836,
1617
+ "step": 2680
1618
+ },
1619
+ {
1620
+ "epoch": 1.13,
1621
+ "learning_rate": 1e-05,
1622
+ "loss": 0.6308,
1623
+ "step": 2690
1624
+ },
1625
+ {
1626
+ "epoch": 1.14,
1627
+ "learning_rate": 1e-05,
1628
+ "loss": 0.6759,
1629
+ "step": 2700
1630
+ },
1631
+ {
1632
+ "epoch": 1.14,
1633
+ "learning_rate": 1e-05,
1634
+ "loss": 0.6376,
1635
+ "step": 2710
1636
+ },
1637
+ {
1638
+ "epoch": 1.15,
1639
+ "learning_rate": 1e-05,
1640
+ "loss": 0.7418,
1641
+ "step": 2720
1642
+ },
1643
+ {
1644
+ "epoch": 1.15,
1645
+ "learning_rate": 1e-05,
1646
+ "loss": 0.6286,
1647
+ "step": 2730
1648
+ },
1649
+ {
1650
+ "epoch": 1.16,
1651
+ "learning_rate": 1e-05,
1652
+ "loss": 0.722,
1653
+ "step": 2740
1654
+ },
1655
+ {
1656
+ "epoch": 1.16,
1657
+ "learning_rate": 1e-05,
1658
+ "loss": 0.6689,
1659
+ "step": 2750
1660
+ },
1661
+ {
1662
+ "epoch": 1.16,
1663
+ "learning_rate": 1e-05,
1664
+ "loss": 0.598,
1665
+ "step": 2760
1666
+ },
1667
+ {
1668
+ "epoch": 1.17,
1669
+ "learning_rate": 1e-05,
1670
+ "loss": 0.6646,
1671
+ "step": 2770
1672
+ },
1673
+ {
1674
+ "epoch": 1.17,
1675
+ "learning_rate": 1e-05,
1676
+ "loss": 0.6716,
1677
+ "step": 2780
1678
+ },
1679
+ {
1680
+ "epoch": 1.18,
1681
+ "learning_rate": 1e-05,
1682
+ "loss": 0.6279,
1683
+ "step": 2790
1684
+ },
1685
+ {
1686
+ "epoch": 1.18,
1687
+ "learning_rate": 1e-05,
1688
+ "loss": 0.61,
1689
+ "step": 2800
1690
+ },
1691
+ {
1692
+ "epoch": 1.18,
1693
+ "learning_rate": 1e-05,
1694
+ "loss": 0.6543,
1695
+ "step": 2810
1696
+ },
1697
+ {
1698
+ "epoch": 1.19,
1699
+ "learning_rate": 1e-05,
1700
+ "loss": 0.6929,
1701
+ "step": 2820
1702
+ },
1703
+ {
1704
+ "epoch": 1.19,
1705
+ "learning_rate": 1e-05,
1706
+ "loss": 0.6731,
1707
+ "step": 2830
1708
+ },
1709
+ {
1710
+ "epoch": 1.2,
1711
+ "learning_rate": 1e-05,
1712
+ "loss": 0.6931,
1713
+ "step": 2840
1714
+ },
1715
+ {
1716
+ "epoch": 1.2,
1717
+ "learning_rate": 1e-05,
1718
+ "loss": 0.7082,
1719
+ "step": 2850
1720
+ },
1721
+ {
1722
+ "epoch": 1.21,
1723
+ "learning_rate": 1e-05,
1724
+ "loss": 0.6964,
1725
+ "step": 2860
1726
+ },
1727
+ {
1728
+ "epoch": 1.21,
1729
+ "learning_rate": 1e-05,
1730
+ "loss": 0.654,
1731
+ "step": 2870
1732
+ },
1733
+ {
1734
+ "epoch": 1.21,
1735
+ "learning_rate": 1e-05,
1736
+ "loss": 0.6697,
1737
+ "step": 2880
1738
+ },
1739
+ {
1740
+ "epoch": 1.22,
1741
+ "learning_rate": 1e-05,
1742
+ "loss": 0.6724,
1743
+ "step": 2890
1744
+ },
1745
+ {
1746
+ "epoch": 1.22,
1747
+ "learning_rate": 1e-05,
1748
+ "loss": 0.6244,
1749
+ "step": 2900
1750
+ },
1751
+ {
1752
+ "epoch": 1.23,
1753
+ "learning_rate": 1e-05,
1754
+ "loss": 0.6244,
1755
+ "step": 2910
1756
+ },
1757
+ {
1758
+ "epoch": 1.23,
1759
+ "learning_rate": 1e-05,
1760
+ "loss": 0.6343,
1761
+ "step": 2920
1762
+ },
1763
+ {
1764
+ "epoch": 1.24,
1765
+ "learning_rate": 1e-05,
1766
+ "loss": 0.6054,
1767
+ "step": 2930
1768
+ },
1769
+ {
1770
+ "epoch": 1.24,
1771
+ "learning_rate": 1e-05,
1772
+ "loss": 0.6593,
1773
+ "step": 2940
1774
+ },
1775
+ {
1776
+ "epoch": 1.24,
1777
+ "learning_rate": 1e-05,
1778
+ "loss": 0.6636,
1779
+ "step": 2950
1780
+ },
1781
+ {
1782
+ "epoch": 1.25,
1783
+ "learning_rate": 1e-05,
1784
+ "loss": 0.6098,
1785
+ "step": 2960
1786
+ },
1787
+ {
1788
+ "epoch": 1.25,
1789
+ "learning_rate": 1e-05,
1790
+ "loss": 0.6938,
1791
+ "step": 2970
1792
+ },
1793
+ {
1794
+ "epoch": 1.26,
1795
+ "learning_rate": 1e-05,
1796
+ "loss": 0.7027,
1797
+ "step": 2980
1798
+ },
1799
+ {
1800
+ "epoch": 1.26,
1801
+ "learning_rate": 1e-05,
1802
+ "loss": 0.6503,
1803
+ "step": 2990
1804
+ },
1805
+ {
1806
+ "epoch": 1.26,
1807
+ "learning_rate": 1e-05,
1808
+ "loss": 0.6872,
1809
+ "step": 3000
1810
  }
1811
  ],
1812
  "logging_steps": 10,
1813
  "max_steps": 10000,
1814
  "num_train_epochs": 5,
1815
  "save_steps": 500,
1816
+ "total_flos": 754093287014400.0,
1817
  "trial_name": null,
1818
  "trial_params": null
1819
  }