Brendan King commited on
Commit
35243e0
·
1 Parent(s): d650b4a

using new run, 70k checkpoint (should be the same but this guarantees)

Browse files
Files changed (3) hide show
  1. rng_state.pth +1 -1
  2. trainer_state.json +77 -21
  3. training_args.bin +1 -1
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22c126a33cdf631e32f75c5cd6cce3986014431bc42bcead797ac3878b196a58
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79dddf5af28e0163f55619fe2d2e41d04d1bcfd16c1a57046c4421d73397e19a
3
  size 14503
trainer_state.json CHANGED
@@ -157,6 +157,14 @@
157
  "loss": 0.8922,
158
  "step": 5000
159
  },
 
 
 
 
 
 
 
 
160
  {
161
  "epoch": 0.67,
162
  "learning_rate": 0.001,
@@ -310,9 +318,9 @@
310
  {
311
  "epoch": 1.28,
312
  "eval_loss": 1.0378576517105103,
313
- "eval_runtime": 226.1856,
314
- "eval_samples_per_second": 540.6,
315
- "eval_steps_per_second": 2.113,
316
  "step": 10000
317
  },
318
  {
@@ -465,6 +473,14 @@
465
  "loss": 0.7486,
466
  "step": 15000
467
  },
 
 
 
 
 
 
 
 
468
  {
469
  "epoch": 1.95,
470
  "learning_rate": 0.001,
@@ -618,9 +634,9 @@
618
  {
619
  "epoch": 2.56,
620
  "eval_loss": 1.0125058889389038,
621
- "eval_runtime": 225.9641,
622
- "eval_samples_per_second": 541.13,
623
- "eval_steps_per_second": 2.115,
624
  "step": 20000
625
  },
626
  {
@@ -773,6 +789,14 @@
773
  "loss": 0.6766,
774
  "step": 25000
775
  },
 
 
 
 
 
 
 
 
776
  {
777
  "epoch": 3.23,
778
  "learning_rate": 0.001,
@@ -926,9 +950,9 @@
926
  {
927
  "epoch": 3.84,
928
  "eval_loss": 0.9890710711479187,
929
- "eval_runtime": 226.0587,
930
- "eval_samples_per_second": 540.904,
931
- "eval_steps_per_second": 2.114,
932
  "step": 30000
933
  },
934
  {
@@ -1081,6 +1105,14 @@
1081
  "loss": 0.6458,
1082
  "step": 35000
1083
  },
 
 
 
 
 
 
 
 
1084
  {
1085
  "epoch": 4.51,
1086
  "learning_rate": 0.001,
@@ -1234,9 +1266,9 @@
1234
  {
1235
  "epoch": 5.12,
1236
  "eval_loss": 0.9902318716049194,
1237
- "eval_runtime": 225.8841,
1238
- "eval_samples_per_second": 541.322,
1239
- "eval_steps_per_second": 2.116,
1240
  "step": 40000
1241
  },
1242
  {
@@ -1389,6 +1421,14 @@
1389
  "loss": 0.6393,
1390
  "step": 45000
1391
  },
 
 
 
 
 
 
 
 
1392
  {
1393
  "epoch": 5.79,
1394
  "learning_rate": 0.001,
@@ -1542,9 +1582,9 @@
1542
  {
1543
  "epoch": 6.41,
1544
  "eval_loss": 0.9930422902107239,
1545
- "eval_runtime": 225.7854,
1546
- "eval_samples_per_second": 541.558,
1547
- "eval_steps_per_second": 2.117,
1548
  "step": 50000
1549
  },
1550
  {
@@ -1697,6 +1737,14 @@
1697
  "loss": 0.5916,
1698
  "step": 55000
1699
  },
 
 
 
 
 
 
 
 
1700
  {
1701
  "epoch": 7.07,
1702
  "learning_rate": 0.001,
@@ -1850,9 +1898,9 @@
1850
  {
1851
  "epoch": 7.69,
1852
  "eval_loss": 0.9817278981208801,
1853
- "eval_runtime": 225.5322,
1854
- "eval_samples_per_second": 542.166,
1855
- "eval_steps_per_second": 2.119,
1856
  "step": 60000
1857
  },
1858
  {
@@ -2005,6 +2053,14 @@
2005
  "loss": 0.5928,
2006
  "step": 65000
2007
  },
 
 
 
 
 
 
 
 
2008
  {
2009
  "epoch": 8.35,
2010
  "learning_rate": 0.001,
@@ -2158,9 +2214,9 @@
2158
  {
2159
  "epoch": 8.97,
2160
  "eval_loss": 0.9769449234008789,
2161
- "eval_runtime": 225.6256,
2162
- "eval_samples_per_second": 541.942,
2163
- "eval_steps_per_second": 2.119,
2164
  "step": 70000
2165
  }
2166
  ],
 
157
  "loss": 0.8922,
158
  "step": 5000
159
  },
160
+ {
161
+ "epoch": 0.64,
162
+ "eval_loss": 1.1090599298477173,
163
+ "eval_runtime": 226.6186,
164
+ "eval_samples_per_second": 539.567,
165
+ "eval_steps_per_second": 2.109,
166
+ "step": 5000
167
+ },
168
  {
169
  "epoch": 0.67,
170
  "learning_rate": 0.001,
 
318
  {
319
  "epoch": 1.28,
320
  "eval_loss": 1.0378576517105103,
321
+ "eval_runtime": 226.2892,
322
+ "eval_samples_per_second": 540.353,
323
+ "eval_steps_per_second": 2.112,
324
  "step": 10000
325
  },
326
  {
 
473
  "loss": 0.7486,
474
  "step": 15000
475
  },
476
+ {
477
+ "epoch": 1.92,
478
+ "eval_loss": 1.012812852859497,
479
+ "eval_runtime": 226.2966,
480
+ "eval_samples_per_second": 540.335,
481
+ "eval_steps_per_second": 2.112,
482
+ "step": 15000
483
+ },
484
  {
485
  "epoch": 1.95,
486
  "learning_rate": 0.001,
 
634
  {
635
  "epoch": 2.56,
636
  "eval_loss": 1.0125058889389038,
637
+ "eval_runtime": 226.3642,
638
+ "eval_samples_per_second": 540.174,
639
+ "eval_steps_per_second": 2.112,
640
  "step": 20000
641
  },
642
  {
 
789
  "loss": 0.6766,
790
  "step": 25000
791
  },
792
+ {
793
+ "epoch": 3.2,
794
+ "eval_loss": 1.0152596235275269,
795
+ "eval_runtime": 226.4668,
796
+ "eval_samples_per_second": 539.929,
797
+ "eval_steps_per_second": 2.111,
798
+ "step": 25000
799
+ },
800
  {
801
  "epoch": 3.23,
802
  "learning_rate": 0.001,
 
950
  {
951
  "epoch": 3.84,
952
  "eval_loss": 0.9890710711479187,
953
+ "eval_runtime": 226.6603,
954
+ "eval_samples_per_second": 539.468,
955
+ "eval_steps_per_second": 2.109,
956
  "step": 30000
957
  },
958
  {
 
1105
  "loss": 0.6458,
1106
  "step": 35000
1107
  },
1108
+ {
1109
+ "epoch": 4.48,
1110
+ "eval_loss": 0.9963102340698242,
1111
+ "eval_runtime": 226.814,
1112
+ "eval_samples_per_second": 539.103,
1113
+ "eval_steps_per_second": 2.107,
1114
+ "step": 35000
1115
+ },
1116
  {
1117
  "epoch": 4.51,
1118
  "learning_rate": 0.001,
 
1266
  {
1267
  "epoch": 5.12,
1268
  "eval_loss": 0.9902318716049194,
1269
+ "eval_runtime": 227.3177,
1270
+ "eval_samples_per_second": 537.908,
1271
+ "eval_steps_per_second": 2.103,
1272
  "step": 40000
1273
  },
1274
  {
 
1421
  "loss": 0.6393,
1422
  "step": 45000
1423
  },
1424
+ {
1425
+ "epoch": 5.77,
1426
+ "eval_loss": 0.9893815517425537,
1427
+ "eval_runtime": 226.9791,
1428
+ "eval_samples_per_second": 538.71,
1429
+ "eval_steps_per_second": 2.106,
1430
+ "step": 45000
1431
+ },
1432
  {
1433
  "epoch": 5.79,
1434
  "learning_rate": 0.001,
 
1582
  {
1583
  "epoch": 6.41,
1584
  "eval_loss": 0.9930422902107239,
1585
+ "eval_runtime": 227.4872,
1586
+ "eval_samples_per_second": 537.507,
1587
+ "eval_steps_per_second": 2.101,
1588
  "step": 50000
1589
  },
1590
  {
 
1737
  "loss": 0.5916,
1738
  "step": 55000
1739
  },
1740
+ {
1741
+ "epoch": 7.05,
1742
+ "eval_loss": 0.9893601536750793,
1743
+ "eval_runtime": 227.4715,
1744
+ "eval_samples_per_second": 537.544,
1745
+ "eval_steps_per_second": 2.101,
1746
+ "step": 55000
1747
+ },
1748
  {
1749
  "epoch": 7.07,
1750
  "learning_rate": 0.001,
 
1898
  {
1899
  "epoch": 7.69,
1900
  "eval_loss": 0.9817278981208801,
1901
+ "eval_runtime": 227.2904,
1902
+ "eval_samples_per_second": 537.973,
1903
+ "eval_steps_per_second": 2.103,
1904
  "step": 60000
1905
  },
1906
  {
 
2053
  "loss": 0.5928,
2054
  "step": 65000
2055
  },
2056
+ {
2057
+ "epoch": 8.33,
2058
+ "eval_loss": 0.9951310753822327,
2059
+ "eval_runtime": 227.2886,
2060
+ "eval_samples_per_second": 537.977,
2061
+ "eval_steps_per_second": 2.103,
2062
+ "step": 65000
2063
+ },
2064
  {
2065
  "epoch": 8.35,
2066
  "learning_rate": 0.001,
 
2214
  {
2215
  "epoch": 8.97,
2216
  "eval_loss": 0.9769449234008789,
2217
+ "eval_runtime": 227.0836,
2218
+ "eval_samples_per_second": 538.462,
2219
+ "eval_steps_per_second": 2.105,
2220
  "step": 70000
2221
  }
2222
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38d78d1dc17a6778da9b9733ab5dd4ce95b95afee8d6edb3baa54c8032ac2321
3
  size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8793babdef84b903bcae96d2c78a8a9210b784403207209e9b7da34e411e81f7
3
  size 3439