joelniklaus commited on
Commit
1bded78
1 Parent(s): 578d703

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab515e1fe640432722359a2c1b73c30cb1877b811da7dd7eb4d12a0264d38ca7
3
  size 1668076741
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:291f9655e8deedacd6ba5d051af226c4cb8ebd7310cc9e38474e2a0f61b6d10e
3
  size 1668076741
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a6b694759ac2afff917fcdce63156bddf5fee0be806cea69404c4a6ae5f6230
3
  size 834053717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abbe85aaf27562d1d64f10fe9cebefd891fb599fac4d07e142f8b427c63c674d
3
  size 834053717
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a4769972fb7cd6e12627b484967936feb779148fd2d15221e6fc67a874d999b
3
  size 15597
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5b5027ca54bd16ebf7bcd1124e301f3adcb854fc2dbe6bf992e7e1ea1ae077
3
  size 15597
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0909b42e35569611bafb37fd33209cd6a86113e40deac3fa1da41689cd651093
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0876994cba4fff49c925c2a122534ea9fff9a44c01c8ac7813ec4131482c3100
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bf931b79849e3c5ce67ad727efd02c50905b3121a21823c140a392dee34ce6e
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d54ab2a4c0dda7883ab94b84e46d25097c34cdf01933d23db795cca974f4ce
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -75,11 +75,80 @@
75
  "eval_samples_per_second": 41.069,
76
  "eval_steps_per_second": 2.571,
77
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  }
79
  ],
80
  "max_steps": 50000,
81
  "num_train_epochs": 9223372036854775807,
82
- "total_flos": 6.7342134411264e+18,
83
  "trial_name": null,
84
  "trial_params": null
85
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
75
  "eval_samples_per_second": 41.069,
76
  "eval_steps_per_second": 2.571,
77
  "step": 10000
78
+ },
79
+ {
80
+ "epoch": 0.22,
81
+ "learning_rate": 2.7697770521953444e-05,
82
+ "loss": 0.8444,
83
+ "step": 11000
84
+ },
85
+ {
86
+ "epoch": 0.24,
87
+ "learning_rate": 2.714224866611362e-05,
88
+ "loss": 0.7575,
89
+ "step": 12000
90
+ },
91
+ {
92
+ "epoch": 0.26,
93
+ "learning_rate": 2.6533631846300875e-05,
94
+ "loss": 0.7919,
95
+ "step": 13000
96
+ },
97
+ {
98
+ "epoch": 0.28,
99
+ "learning_rate": 2.587458138901327e-05,
100
+ "loss": 0.833,
101
+ "step": 14000
102
+ },
103
+ {
104
+ "epoch": 0.3,
105
+ "learning_rate": 2.516797915421335e-05,
106
+ "loss": 0.8335,
107
+ "step": 15000
108
+ },
109
+ {
110
+ "epoch": 0.32,
111
+ "learning_rate": 2.4416914933687753e-05,
112
+ "loss": 0.8382,
113
+ "step": 16000
114
+ },
115
+ {
116
+ "epoch": 0.34,
117
+ "learning_rate": 2.3624672940173894e-05,
118
+ "loss": 0.8253,
119
+ "step": 17000
120
+ },
121
+ {
122
+ "epoch": 0.36,
123
+ "learning_rate": 2.27947174463333e-05,
124
+ "loss": 0.8442,
125
+ "step": 18000
126
+ },
127
+ {
128
+ "epoch": 0.38,
129
+ "learning_rate": 2.193067763636869e-05,
130
+ "loss": 0.8794,
131
+ "step": 19000
132
+ },
133
+ {
134
+ "epoch": 0.4,
135
+ "learning_rate": 2.1036331736524874e-05,
136
+ "loss": 0.8631,
137
+ "step": 20000
138
+ },
139
+ {
140
+ "epoch": 0.4,
141
+ "eval_accuracy": 0.855058580917128,
142
+ "eval_loss": 0.6343714594841003,
143
+ "eval_runtime": 151.1949,
144
+ "eval_samples_per_second": 33.07,
145
+ "eval_steps_per_second": 2.07,
146
+ "step": 20000
147
  }
148
  ],
149
  "max_steps": 50000,
150
  "num_train_epochs": 9223372036854775807,
151
+ "total_flos": 1.34684268822528e+19,
152
  "trial_name": null,
153
  "trial_params": null
154
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a6b694759ac2afff917fcdce63156bddf5fee0be806cea69404c4a6ae5f6230
3
  size 834053717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abbe85aaf27562d1d64f10fe9cebefd891fb599fac4d07e142f8b427c63c674d
3
  size 834053717