kmnis commited on
Commit
47e8727
·
1 Parent(s): 07f1355

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7475b8ecf45853a6e11d3131546bc2494a0f2f9aef14d558aebd64613aa5fa63
3
  size 19744138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b074dc89146bd028c0664b132fc46bfc59201185bda06775c72139e7c815a15
3
  size 19744138
last-checkpoint/global_step4000/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce6b4c9dcedd64467ad302888d9d03990b2a496988b7b6c2740204ce7313f44e
3
+ size 6508458036
last-checkpoint/global_step4000/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c299557772557013b972c2a06c9a6bb6602373482f42a769c8f7a6aa04cefccb
3
+ size 29495149
last-checkpoint/global_step4000/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e86fd6ae2c479448da228eb07296019768c4f0e4dc30347b9037e01cb86c994b
3
+ size 6508458036
last-checkpoint/global_step4000/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ea5e6e6c511616aee9a69a7f0f2412db28efbab49b0ec207d9d03fdd503ee72
3
+ size 29495149
last-checkpoint/global_step4000/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4596995abffaa68f30aeed25446c64c5dfc8f5723d5c9cf9cbcc4afb30924de4
3
+ size 6508458036
last-checkpoint/global_step4000/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd30c5a87bdb041aa371932bc4b3e79aadcfcde787a1c043bd483e5ab5c52d1
3
+ size 29495149
last-checkpoint/global_step4000/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91826afa1d75a4d9fbb2622cdf64c876ef725685a3429693964d9c91d12f04ef
3
+ size 6508458036
last-checkpoint/global_step4000/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f70fbe73734097b278f8f7aa50fc9d5f1fe73a7ba0143a9e58741a5757e5594
3
+ size 29495149
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step3500
 
1
+ global_step4000
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d17f7aa721b5bf66d195101eeb7532aba710f38c1348eeda7e9e1927fcb5d364
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cda7acc40557204710df648d13c6c64dd3bee9e11d98ca8ec6bf9765f6fd55b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b00c1e9949deeaeaee9b8afe63d1213b81b693226f393b73c75b76cb9300b54a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df659af25f38deebdca58158c68718a93017141cfb7b33e8079633d427d6debf
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bf736fc64eb84ddbe364e183cfed301bd08c6ce9463f9aacea48ab077038026
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f89a12d795a7d9929749bfcc711935eda3c929f167285a61a0defe0e6815157d
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4ff4aac5f7d7fe8d6253945ad55d59286ca926a9e46b229588f4a981b62a9d0
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:595bcf6bb327a594b78f46fc654a60578f254399a5961cafc50cf97fe1934fba
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4755480607082632,
5
  "eval_steps": 500,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2107,13 +2107,313 @@
2107
  "learning_rate": 1e-05,
2108
  "loss": 0.6091,
2109
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2110
  }
2111
  ],
2112
  "logging_steps": 10,
2113
  "max_steps": 5000,
2114
  "num_train_epochs": 3,
2115
  "save_steps": 500,
2116
- "total_flos": 879733537505280.0,
2117
  "trial_name": null,
2118
  "trial_params": null
2119
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.6863406408094435,
5
  "eval_steps": 500,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2107
  "learning_rate": 1e-05,
2108
  "loss": 0.6091,
2109
  "step": 3500
2110
+ },
2111
+ {
2112
+ "epoch": 1.48,
2113
+ "learning_rate": 1e-05,
2114
+ "loss": 0.6603,
2115
+ "step": 3510
2116
+ },
2117
+ {
2118
+ "epoch": 1.48,
2119
+ "learning_rate": 1e-05,
2120
+ "loss": 0.6306,
2121
+ "step": 3520
2122
+ },
2123
+ {
2124
+ "epoch": 1.49,
2125
+ "learning_rate": 1e-05,
2126
+ "loss": 0.6345,
2127
+ "step": 3530
2128
+ },
2129
+ {
2130
+ "epoch": 1.49,
2131
+ "learning_rate": 1e-05,
2132
+ "loss": 0.636,
2133
+ "step": 3540
2134
+ },
2135
+ {
2136
+ "epoch": 1.5,
2137
+ "learning_rate": 1e-05,
2138
+ "loss": 0.692,
2139
+ "step": 3550
2140
+ },
2141
+ {
2142
+ "epoch": 1.5,
2143
+ "learning_rate": 1e-05,
2144
+ "loss": 0.6703,
2145
+ "step": 3560
2146
+ },
2147
+ {
2148
+ "epoch": 1.51,
2149
+ "learning_rate": 1e-05,
2150
+ "loss": 0.672,
2151
+ "step": 3570
2152
+ },
2153
+ {
2154
+ "epoch": 1.51,
2155
+ "learning_rate": 1e-05,
2156
+ "loss": 0.6706,
2157
+ "step": 3580
2158
+ },
2159
+ {
2160
+ "epoch": 1.51,
2161
+ "learning_rate": 1e-05,
2162
+ "loss": 0.6546,
2163
+ "step": 3590
2164
+ },
2165
+ {
2166
+ "epoch": 1.52,
2167
+ "learning_rate": 1e-05,
2168
+ "loss": 0.6503,
2169
+ "step": 3600
2170
+ },
2171
+ {
2172
+ "epoch": 1.52,
2173
+ "learning_rate": 1e-05,
2174
+ "loss": 0.6934,
2175
+ "step": 3610
2176
+ },
2177
+ {
2178
+ "epoch": 1.53,
2179
+ "learning_rate": 1e-05,
2180
+ "loss": 0.6327,
2181
+ "step": 3620
2182
+ },
2183
+ {
2184
+ "epoch": 1.53,
2185
+ "learning_rate": 1e-05,
2186
+ "loss": 0.6649,
2187
+ "step": 3630
2188
+ },
2189
+ {
2190
+ "epoch": 1.53,
2191
+ "learning_rate": 1e-05,
2192
+ "loss": 0.6225,
2193
+ "step": 3640
2194
+ },
2195
+ {
2196
+ "epoch": 1.54,
2197
+ "learning_rate": 1e-05,
2198
+ "loss": 0.6425,
2199
+ "step": 3650
2200
+ },
2201
+ {
2202
+ "epoch": 1.54,
2203
+ "learning_rate": 1e-05,
2204
+ "loss": 0.6409,
2205
+ "step": 3660
2206
+ },
2207
+ {
2208
+ "epoch": 1.55,
2209
+ "learning_rate": 1e-05,
2210
+ "loss": 0.6685,
2211
+ "step": 3670
2212
+ },
2213
+ {
2214
+ "epoch": 1.55,
2215
+ "learning_rate": 1e-05,
2216
+ "loss": 0.7274,
2217
+ "step": 3680
2218
+ },
2219
+ {
2220
+ "epoch": 1.56,
2221
+ "learning_rate": 1e-05,
2222
+ "loss": 0.7256,
2223
+ "step": 3690
2224
+ },
2225
+ {
2226
+ "epoch": 1.56,
2227
+ "learning_rate": 1e-05,
2228
+ "loss": 0.6972,
2229
+ "step": 3700
2230
+ },
2231
+ {
2232
+ "epoch": 1.56,
2233
+ "learning_rate": 1e-05,
2234
+ "loss": 0.6425,
2235
+ "step": 3710
2236
+ },
2237
+ {
2238
+ "epoch": 1.57,
2239
+ "learning_rate": 1e-05,
2240
+ "loss": 0.6627,
2241
+ "step": 3720
2242
+ },
2243
+ {
2244
+ "epoch": 1.57,
2245
+ "learning_rate": 1e-05,
2246
+ "loss": 0.7265,
2247
+ "step": 3730
2248
+ },
2249
+ {
2250
+ "epoch": 1.58,
2251
+ "learning_rate": 1e-05,
2252
+ "loss": 0.6436,
2253
+ "step": 3740
2254
+ },
2255
+ {
2256
+ "epoch": 1.58,
2257
+ "learning_rate": 1e-05,
2258
+ "loss": 0.6597,
2259
+ "step": 3750
2260
+ },
2261
+ {
2262
+ "epoch": 1.59,
2263
+ "learning_rate": 1e-05,
2264
+ "loss": 0.6968,
2265
+ "step": 3760
2266
+ },
2267
+ {
2268
+ "epoch": 1.59,
2269
+ "learning_rate": 1e-05,
2270
+ "loss": 0.6513,
2271
+ "step": 3770
2272
+ },
2273
+ {
2274
+ "epoch": 1.59,
2275
+ "learning_rate": 1e-05,
2276
+ "loss": 0.6204,
2277
+ "step": 3780
2278
+ },
2279
+ {
2280
+ "epoch": 1.6,
2281
+ "learning_rate": 1e-05,
2282
+ "loss": 0.7159,
2283
+ "step": 3790
2284
+ },
2285
+ {
2286
+ "epoch": 1.6,
2287
+ "learning_rate": 1e-05,
2288
+ "loss": 0.6509,
2289
+ "step": 3800
2290
+ },
2291
+ {
2292
+ "epoch": 1.61,
2293
+ "learning_rate": 1e-05,
2294
+ "loss": 0.6105,
2295
+ "step": 3810
2296
+ },
2297
+ {
2298
+ "epoch": 1.61,
2299
+ "learning_rate": 1e-05,
2300
+ "loss": 0.6581,
2301
+ "step": 3820
2302
+ },
2303
+ {
2304
+ "epoch": 1.61,
2305
+ "learning_rate": 1e-05,
2306
+ "loss": 0.6775,
2307
+ "step": 3830
2308
+ },
2309
+ {
2310
+ "epoch": 1.62,
2311
+ "learning_rate": 1e-05,
2312
+ "loss": 0.6243,
2313
+ "step": 3840
2314
+ },
2315
+ {
2316
+ "epoch": 1.62,
2317
+ "learning_rate": 1e-05,
2318
+ "loss": 0.644,
2319
+ "step": 3850
2320
+ },
2321
+ {
2322
+ "epoch": 1.63,
2323
+ "learning_rate": 1e-05,
2324
+ "loss": 0.6713,
2325
+ "step": 3860
2326
+ },
2327
+ {
2328
+ "epoch": 1.63,
2329
+ "learning_rate": 1e-05,
2330
+ "loss": 0.6633,
2331
+ "step": 3870
2332
+ },
2333
+ {
2334
+ "epoch": 1.64,
2335
+ "learning_rate": 1e-05,
2336
+ "loss": 0.7093,
2337
+ "step": 3880
2338
+ },
2339
+ {
2340
+ "epoch": 1.64,
2341
+ "learning_rate": 1e-05,
2342
+ "loss": 0.685,
2343
+ "step": 3890
2344
+ },
2345
+ {
2346
+ "epoch": 1.64,
2347
+ "learning_rate": 1e-05,
2348
+ "loss": 0.6923,
2349
+ "step": 3900
2350
+ },
2351
+ {
2352
+ "epoch": 1.65,
2353
+ "learning_rate": 1e-05,
2354
+ "loss": 0.5949,
2355
+ "step": 3910
2356
+ },
2357
+ {
2358
+ "epoch": 1.65,
2359
+ "learning_rate": 1e-05,
2360
+ "loss": 0.738,
2361
+ "step": 3920
2362
+ },
2363
+ {
2364
+ "epoch": 1.66,
2365
+ "learning_rate": 1e-05,
2366
+ "loss": 0.6444,
2367
+ "step": 3930
2368
+ },
2369
+ {
2370
+ "epoch": 1.66,
2371
+ "learning_rate": 1e-05,
2372
+ "loss": 0.6608,
2373
+ "step": 3940
2374
+ },
2375
+ {
2376
+ "epoch": 1.67,
2377
+ "learning_rate": 1e-05,
2378
+ "loss": 0.6469,
2379
+ "step": 3950
2380
+ },
2381
+ {
2382
+ "epoch": 1.67,
2383
+ "learning_rate": 1e-05,
2384
+ "loss": 0.6802,
2385
+ "step": 3960
2386
+ },
2387
+ {
2388
+ "epoch": 1.67,
2389
+ "learning_rate": 1e-05,
2390
+ "loss": 0.6474,
2391
+ "step": 3970
2392
+ },
2393
+ {
2394
+ "epoch": 1.68,
2395
+ "learning_rate": 1e-05,
2396
+ "loss": 0.69,
2397
+ "step": 3980
2398
+ },
2399
+ {
2400
+ "epoch": 1.68,
2401
+ "learning_rate": 1e-05,
2402
+ "loss": 0.658,
2403
+ "step": 3990
2404
+ },
2405
+ {
2406
+ "epoch": 1.69,
2407
+ "learning_rate": 1e-05,
2408
+ "loss": 0.6745,
2409
+ "step": 4000
2410
  }
2411
  ],
2412
  "logging_steps": 10,
2413
  "max_steps": 5000,
2414
  "num_train_epochs": 3,
2415
  "save_steps": 500,
2416
+ "total_flos": 1005625572065280.0,
2417
  "trial_name": null,
2418
  "trial_params": null
2419
  }