kmnis commited on
Commit
69a879e
·
1 Parent(s): d9dcd89

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09ca6bc9ed7563f7804e6870dfb0262ec9d7ef1b9b317e3e50537421dc41436e
3
  size 19744138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7475b8ecf45853a6e11d3131546bc2494a0f2f9aef14d558aebd64613aa5fa63
3
  size 19744138
last-checkpoint/global_step3500/zero_pp_rank_0_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fce09c29b811c7ea4199006dbd6d8339c1fc7bdc081893f35ccdb3a70a05909
3
  size 6508458036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58a12985547b6d63d9c944b31660b27e2429626c42ac44c8e10bc4dee2c74e03
3
  size 6508458036
last-checkpoint/global_step3500/zero_pp_rank_0_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf152ee02d109cd4e41ac960a2bab701e26fafbfabfe86cb19b80f43429c24e7
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8323588d81b6e6dd32945a9095e9cb4bb35b70ef9c48247d18a8ff1ade805fb8
3
  size 29495149
last-checkpoint/global_step3500/zero_pp_rank_1_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e59eb771ebbe1285b7c5deeedbfd1aa04e30c76c33d6e25f438bd099a62e79c
3
  size 6508458036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7da19a7144d1b39bdca6b707be0afd8f1679d74aeeddeb269ca8e3202954f475
3
  size 6508458036
last-checkpoint/global_step3500/zero_pp_rank_1_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a193d82cab005b1cfd1bc9232106ef8e261cdb0da14a6725ee1e0572575c04b6
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edd7c0d02413f1c9f3ff5f833a04357860e89627e6d53cb497f368ff8e83783f
3
  size 29495149
last-checkpoint/global_step3500/zero_pp_rank_2_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5c9eb46b0e3d1511072d4205695779d8a1414845797072ec774fa9b749d14fe
3
  size 6508458036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29a7958de549fd19fc2df99664d7c1603c4c0efa905991e0e67cfd3d071833d7
3
  size 6508458036
last-checkpoint/global_step3500/zero_pp_rank_2_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1dccd1b9d85a4881d31c4dfc2980d120236983d59c9f58430be6b0a9a86f9de
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:311f318e31767a09670b9edce765eafa2ef2a2d52df2d2132dea612da960ee27
3
  size 29495149
last-checkpoint/global_step3500/zero_pp_rank_3_mp_rank_00_model_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f28172e8ff0d7773db83bab45d556eed1e926e85287a1f922b64f6b83ca87516
3
  size 6508458036
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2505329397209039888e36669c6a2a2795e847603459437768b6f58b0907222c
3
  size 6508458036
last-checkpoint/global_step3500/zero_pp_rank_3_mp_rank_00_optim_states.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d0b6b0145ae9c8306b861b62e9d9002f51233a48f0913f318923b42cace9933
3
  size 29495149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52b85d91f7bc855f1fb1ea77100f4278103706b49555968152882b0e560aa66b
3
  size 29495149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3000
 
1
+ global_step3500
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d887fde7ddbc72491d6886015e2826b2f67780c0d1c9fd59e1d88ce5fc09e31b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d17f7aa721b5bf66d195101eeb7532aba710f38c1348eeda7e9e1927fcb5d364
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7edde062ca170b1a53cd6353a93ce1f9782a4edeac65ea031afcf5aea5323ca4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00c1e9949deeaeaee9b8afe63d1213b81b693226f393b73c75b76cb9300b54a
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:559285b660bfd3d01043c44ad11fa1111ae6e093b1d70a9a10b4160231b87936
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf736fc64eb84ddbe364e183cfed301bd08c6ce9463f9aacea48ab077038026
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ac86af3890b7d390a131628964790e2daa6e964a408d352e767975a2f58c75c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ff4aac5f7d7fe8d6253945ad55d59286ca926a9e46b229588f4a981b62a9d0
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2647554806070826,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1807,13 +1807,313 @@
1807
  "learning_rate": 1e-05,
1808
  "loss": 0.6872,
1809
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1810
  }
1811
  ],
1812
  "logging_steps": 10,
1813
  "max_steps": 5000,
1814
  "num_train_epochs": 3,
1815
  "save_steps": 500,
1816
- "total_flos": 754093287014400.0,
1817
  "trial_name": null,
1818
  "trial_params": null
1819
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4755480607082632,
5
  "eval_steps": 500,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1807
  "learning_rate": 1e-05,
1808
  "loss": 0.6872,
1809
  "step": 3000
1810
+ },
1811
+ {
1812
+ "epoch": 1.27,
1813
+ "learning_rate": 1e-05,
1814
+ "loss": 0.6221,
1815
+ "step": 3010
1816
+ },
1817
+ {
1818
+ "epoch": 1.27,
1819
+ "learning_rate": 1e-05,
1820
+ "loss": 0.6229,
1821
+ "step": 3020
1822
+ },
1823
+ {
1824
+ "epoch": 1.28,
1825
+ "learning_rate": 1e-05,
1826
+ "loss": 0.5707,
1827
+ "step": 3030
1828
+ },
1829
+ {
1830
+ "epoch": 1.28,
1831
+ "learning_rate": 1e-05,
1832
+ "loss": 0.6352,
1833
+ "step": 3040
1834
+ },
1835
+ {
1836
+ "epoch": 1.29,
1837
+ "learning_rate": 1e-05,
1838
+ "loss": 0.6581,
1839
+ "step": 3050
1840
+ },
1841
+ {
1842
+ "epoch": 1.29,
1843
+ "learning_rate": 1e-05,
1844
+ "loss": 0.7119,
1845
+ "step": 3060
1846
+ },
1847
+ {
1848
+ "epoch": 1.29,
1849
+ "learning_rate": 1e-05,
1850
+ "loss": 0.6462,
1851
+ "step": 3070
1852
+ },
1853
+ {
1854
+ "epoch": 1.3,
1855
+ "learning_rate": 1e-05,
1856
+ "loss": 0.628,
1857
+ "step": 3080
1858
+ },
1859
+ {
1860
+ "epoch": 1.3,
1861
+ "learning_rate": 1e-05,
1862
+ "loss": 0.6886,
1863
+ "step": 3090
1864
+ },
1865
+ {
1866
+ "epoch": 1.31,
1867
+ "learning_rate": 1e-05,
1868
+ "loss": 0.6323,
1869
+ "step": 3100
1870
+ },
1871
+ {
1872
+ "epoch": 1.31,
1873
+ "learning_rate": 1e-05,
1874
+ "loss": 0.6333,
1875
+ "step": 3110
1876
+ },
1877
+ {
1878
+ "epoch": 1.32,
1879
+ "learning_rate": 1e-05,
1880
+ "loss": 0.6815,
1881
+ "step": 3120
1882
+ },
1883
+ {
1884
+ "epoch": 1.32,
1885
+ "learning_rate": 1e-05,
1886
+ "loss": 0.6526,
1887
+ "step": 3130
1888
+ },
1889
+ {
1890
+ "epoch": 1.32,
1891
+ "learning_rate": 1e-05,
1892
+ "loss": 0.6232,
1893
+ "step": 3140
1894
+ },
1895
+ {
1896
+ "epoch": 1.33,
1897
+ "learning_rate": 1e-05,
1898
+ "loss": 0.6477,
1899
+ "step": 3150
1900
+ },
1901
+ {
1902
+ "epoch": 1.33,
1903
+ "learning_rate": 1e-05,
1904
+ "loss": 0.6534,
1905
+ "step": 3160
1906
+ },
1907
+ {
1908
+ "epoch": 1.34,
1909
+ "learning_rate": 1e-05,
1910
+ "loss": 0.6534,
1911
+ "step": 3170
1912
+ },
1913
+ {
1914
+ "epoch": 1.34,
1915
+ "learning_rate": 1e-05,
1916
+ "loss": 0.6548,
1917
+ "step": 3180
1918
+ },
1919
+ {
1920
+ "epoch": 1.34,
1921
+ "learning_rate": 1e-05,
1922
+ "loss": 0.6622,
1923
+ "step": 3190
1924
+ },
1925
+ {
1926
+ "epoch": 1.35,
1927
+ "learning_rate": 1e-05,
1928
+ "loss": 0.6524,
1929
+ "step": 3200
1930
+ },
1931
+ {
1932
+ "epoch": 1.35,
1933
+ "learning_rate": 1e-05,
1934
+ "loss": 0.6364,
1935
+ "step": 3210
1936
+ },
1937
+ {
1938
+ "epoch": 1.36,
1939
+ "learning_rate": 1e-05,
1940
+ "loss": 0.6687,
1941
+ "step": 3220
1942
+ },
1943
+ {
1944
+ "epoch": 1.36,
1945
+ "learning_rate": 1e-05,
1946
+ "loss": 0.6814,
1947
+ "step": 3230
1948
+ },
1949
+ {
1950
+ "epoch": 1.37,
1951
+ "learning_rate": 1e-05,
1952
+ "loss": 0.6885,
1953
+ "step": 3240
1954
+ },
1955
+ {
1956
+ "epoch": 1.37,
1957
+ "learning_rate": 1e-05,
1958
+ "loss": 0.6885,
1959
+ "step": 3250
1960
+ },
1961
+ {
1962
+ "epoch": 1.37,
1963
+ "learning_rate": 1e-05,
1964
+ "loss": 0.6826,
1965
+ "step": 3260
1966
+ },
1967
+ {
1968
+ "epoch": 1.38,
1969
+ "learning_rate": 1e-05,
1970
+ "loss": 0.6689,
1971
+ "step": 3270
1972
+ },
1973
+ {
1974
+ "epoch": 1.38,
1975
+ "learning_rate": 1e-05,
1976
+ "loss": 0.8184,
1977
+ "step": 3280
1978
+ },
1979
+ {
1980
+ "epoch": 1.39,
1981
+ "learning_rate": 1e-05,
1982
+ "loss": 0.617,
1983
+ "step": 3290
1984
+ },
1985
+ {
1986
+ "epoch": 1.39,
1987
+ "learning_rate": 1e-05,
1988
+ "loss": 0.7429,
1989
+ "step": 3300
1990
+ },
1991
+ {
1992
+ "epoch": 1.4,
1993
+ "learning_rate": 1e-05,
1994
+ "loss": 0.6808,
1995
+ "step": 3310
1996
+ },
1997
+ {
1998
+ "epoch": 1.4,
1999
+ "learning_rate": 1e-05,
2000
+ "loss": 0.6454,
2001
+ "step": 3320
2002
+ },
2003
+ {
2004
+ "epoch": 1.4,
2005
+ "learning_rate": 1e-05,
2006
+ "loss": 0.6676,
2007
+ "step": 3330
2008
+ },
2009
+ {
2010
+ "epoch": 1.41,
2011
+ "learning_rate": 1e-05,
2012
+ "loss": 0.6154,
2013
+ "step": 3340
2014
+ },
2015
+ {
2016
+ "epoch": 1.41,
2017
+ "learning_rate": 1e-05,
2018
+ "loss": 0.6773,
2019
+ "step": 3350
2020
+ },
2021
+ {
2022
+ "epoch": 1.42,
2023
+ "learning_rate": 1e-05,
2024
+ "loss": 0.6902,
2025
+ "step": 3360
2026
+ },
2027
+ {
2028
+ "epoch": 1.42,
2029
+ "learning_rate": 1e-05,
2030
+ "loss": 0.6361,
2031
+ "step": 3370
2032
+ },
2033
+ {
2034
+ "epoch": 1.42,
2035
+ "learning_rate": 1e-05,
2036
+ "loss": 0.6433,
2037
+ "step": 3380
2038
+ },
2039
+ {
2040
+ "epoch": 1.43,
2041
+ "learning_rate": 1e-05,
2042
+ "loss": 0.6637,
2043
+ "step": 3390
2044
+ },
2045
+ {
2046
+ "epoch": 1.43,
2047
+ "learning_rate": 1e-05,
2048
+ "loss": 0.7161,
2049
+ "step": 3400
2050
+ },
2051
+ {
2052
+ "epoch": 1.44,
2053
+ "learning_rate": 1e-05,
2054
+ "loss": 0.691,
2055
+ "step": 3410
2056
+ },
2057
+ {
2058
+ "epoch": 1.44,
2059
+ "learning_rate": 1e-05,
2060
+ "loss": 0.5989,
2061
+ "step": 3420
2062
+ },
2063
+ {
2064
+ "epoch": 1.45,
2065
+ "learning_rate": 1e-05,
2066
+ "loss": 0.6744,
2067
+ "step": 3430
2068
+ },
2069
+ {
2070
+ "epoch": 1.45,
2071
+ "learning_rate": 1e-05,
2072
+ "loss": 0.6739,
2073
+ "step": 3440
2074
+ },
2075
+ {
2076
+ "epoch": 1.45,
2077
+ "learning_rate": 1e-05,
2078
+ "loss": 0.6385,
2079
+ "step": 3450
2080
+ },
2081
+ {
2082
+ "epoch": 1.46,
2083
+ "learning_rate": 1e-05,
2084
+ "loss": 0.6796,
2085
+ "step": 3460
2086
+ },
2087
+ {
2088
+ "epoch": 1.46,
2089
+ "learning_rate": 1e-05,
2090
+ "loss": 0.6625,
2091
+ "step": 3470
2092
+ },
2093
+ {
2094
+ "epoch": 1.47,
2095
+ "learning_rate": 1e-05,
2096
+ "loss": 0.7484,
2097
+ "step": 3480
2098
+ },
2099
+ {
2100
+ "epoch": 1.47,
2101
+ "learning_rate": 1e-05,
2102
+ "loss": 0.6711,
2103
+ "step": 3490
2104
+ },
2105
+ {
2106
+ "epoch": 1.48,
2107
+ "learning_rate": 1e-05,
2108
+ "loss": 0.6091,
2109
+ "step": 3500
2110
  }
2111
  ],
2112
  "logging_steps": 10,
2113
  "max_steps": 5000,
2114
  "num_train_epochs": 3,
2115
  "save_steps": 500,
2116
+ "total_flos": 879733537505280.0,
2117
  "trial_name": null,
2118
  "trial_params": null
2119
  }