mstatt commited on
Commit
f2225d3
·
1 Parent(s): 3da7815

Upload 11 files

Browse files
Files changed (7) hide show
  1. config.json +1 -1
  2. optimizer.pt +1 -1
  3. pytorch_model.bin +2 -2
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +319 -169
  7. training_args.bin +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./results/checkpoint-9500",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
 
1
  {
2
+ "_name_or_path": "Falconsai/offensive_speech_detection",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:673f7e8faafc2900bf4a85994f72ca80ca0eaa25661c2e02ac87c4226ca7f61d
3
  size 535701061
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b899111ea3377e5da91f3d3a7b9a5469fe38a5d32f5d5de7ba78be8d9d05dd06
3
  size 535701061
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70dddf91e70a194298473e14829bcc7a7532a0b87a5da6230120834841a752b5
3
- size 267854125
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d37f832bcc5130375bd55ff03fcdc605dd1b107daf02d94f9ab70bd5a03870
3
+ size 267855533
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a09cc498d55dc004d4b5c9aad1ced395abd939cae4e8d2b9da0e0073f465759a
3
  size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d9d03279386093ef24a2471dd73f225aa2f5237d1d26d73d52e9e651e8a920e
3
  size 14511
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d40fb1a7f26e8683137c4b8cddc0f30db08f76a6b3d086416086ebaaa51cc5d5
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff99d9499703f7b05e302fcc418b2bce824c378e94cf85fd2180fafca8a9e2c5
3
  size 627
trainer_state.json CHANGED
@@ -1,286 +1,436 @@
1
  {
2
- "best_metric": 0.01757008023560047,
3
- "best_model_checkpoint": "./results/checkpoint-8500",
4
- "epoch": 1.031873423526714,
5
- "global_step": 9000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.06,
12
- "learning_rate": 1.9617824657953072e-05,
13
- "loss": 0.058,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.06,
18
- "eval_accuracy": 0.9880467809436451,
19
- "eval_loss": 0.04749465361237526,
20
- "eval_runtime": 369.9155,
21
- "eval_samples_per_second": 94.308,
22
- "eval_steps_per_second": 5.896,
23
  "step": 500
24
  },
25
  {
26
- "epoch": 0.11,
27
- "learning_rate": 1.923564931590614e-05,
28
- "loss": 0.0513,
29
  "step": 1000
30
  },
31
  {
32
- "epoch": 0.11,
33
- "eval_accuracy": 0.9892220374935504,
34
- "eval_loss": 0.044388506561517715,
35
- "eval_runtime": 313.5583,
36
- "eval_samples_per_second": 111.258,
37
- "eval_steps_per_second": 6.956,
38
  "step": 1000
39
  },
40
  {
41
- "epoch": 0.17,
42
- "learning_rate": 1.885347397385921e-05,
43
- "loss": 0.0413,
44
  "step": 1500
45
  },
46
  {
47
- "epoch": 0.17,
48
- "eval_accuracy": 0.9901393108983546,
49
- "eval_loss": 0.044446878135204315,
50
- "eval_runtime": 322.6458,
51
- "eval_samples_per_second": 108.125,
52
- "eval_steps_per_second": 6.76,
53
  "step": 1500
54
  },
55
  {
56
- "epoch": 0.23,
57
- "learning_rate": 1.847129863181228e-05,
58
- "loss": 0.0364,
59
  "step": 2000
60
  },
61
  {
62
- "epoch": 0.23,
63
- "eval_accuracy": 0.9911712434787594,
64
- "eval_loss": 0.037910301238298416,
65
- "eval_runtime": 258.562,
66
- "eval_samples_per_second": 134.923,
67
- "eval_steps_per_second": 8.435,
68
  "step": 2000
69
  },
70
  {
71
- "epoch": 0.29,
72
- "learning_rate": 1.8089123289765345e-05,
73
- "loss": 0.0389,
74
  "step": 2500
75
  },
76
  {
77
- "epoch": 0.29,
78
- "eval_accuracy": 0.9921171816774638,
79
- "eval_loss": 0.03007333353161812,
80
- "eval_runtime": 258.8834,
81
- "eval_samples_per_second": 134.756,
82
- "eval_steps_per_second": 8.425,
83
  "step": 2500
84
  },
85
  {
86
- "epoch": 0.34,
87
- "learning_rate": 1.7706947947718412e-05,
88
- "loss": 0.0343,
89
  "step": 3000
90
  },
91
  {
92
- "epoch": 0.34,
93
- "eval_accuracy": 0.9923465000286648,
94
- "eval_loss": 0.03364783525466919,
95
- "eval_runtime": 258.3829,
96
- "eval_samples_per_second": 135.017,
97
- "eval_steps_per_second": 8.441,
98
  "step": 3000
99
  },
100
  {
101
- "epoch": 0.4,
102
- "learning_rate": 1.7324772605671482e-05,
103
- "loss": 0.0351,
104
  "step": 3500
105
  },
106
  {
107
- "epoch": 0.4,
108
- "eval_accuracy": 0.9918591985323626,
109
- "eval_loss": 0.030066516250371933,
110
- "eval_runtime": 258.5585,
111
- "eval_samples_per_second": 134.925,
112
- "eval_steps_per_second": 8.435,
113
  "step": 3500
114
  },
115
  {
116
- "epoch": 0.46,
117
- "learning_rate": 1.6942597263624552e-05,
118
- "loss": 0.0343,
119
  "step": 4000
120
  },
121
  {
122
- "epoch": 0.46,
123
- "eval_accuracy": 0.9925471535859657,
124
- "eval_loss": 0.02801605314016342,
125
- "eval_runtime": 308.2791,
126
- "eval_samples_per_second": 113.164,
127
- "eval_steps_per_second": 7.075,
128
  "step": 4000
129
  },
130
  {
131
- "epoch": 0.52,
132
- "learning_rate": 1.6560421921577622e-05,
133
- "loss": 0.0328,
134
  "step": 4500
135
  },
136
  {
137
- "epoch": 0.52,
138
- "eval_accuracy": 0.9937224101358711,
139
- "eval_loss": 0.022993654012680054,
140
- "eval_runtime": 304.7606,
141
- "eval_samples_per_second": 114.47,
142
- "eval_steps_per_second": 7.156,
143
  "step": 4500
144
  },
145
  {
146
- "epoch": 0.57,
147
- "learning_rate": 1.617824657953069e-05,
148
- "loss": 0.0322,
149
  "step": 5000
150
  },
151
  {
152
- "epoch": 0.57,
153
- "eval_accuracy": 0.9930631198761681,
154
- "eval_loss": 0.022875914350152016,
155
- "eval_runtime": 509.8625,
156
- "eval_samples_per_second": 68.422,
157
- "eval_steps_per_second": 4.278,
158
  "step": 5000
159
  },
160
  {
161
- "epoch": 0.63,
162
- "learning_rate": 1.579607123748376e-05,
163
- "loss": 0.0275,
164
  "step": 5500
165
  },
166
  {
167
- "epoch": 0.63,
168
- "eval_accuracy": 0.994353035601674,
169
- "eval_loss": 0.024034755304455757,
170
- "eval_runtime": 296.8109,
171
- "eval_samples_per_second": 117.536,
172
- "eval_steps_per_second": 7.348,
173
  "step": 5500
174
  },
175
  {
176
- "epoch": 0.69,
177
- "learning_rate": 1.541389589543683e-05,
178
- "loss": 0.0288,
179
  "step": 6000
180
  },
181
  {
182
- "epoch": 0.69,
183
- "eval_accuracy": 0.9946396835406753,
184
- "eval_loss": 0.022033799439668655,
185
- "eval_runtime": 293.7762,
186
- "eval_samples_per_second": 118.75,
187
- "eval_steps_per_second": 7.424,
188
  "step": 6000
189
  },
190
  {
191
- "epoch": 0.75,
192
- "learning_rate": 1.5031720553389898e-05,
193
- "loss": 0.0244,
194
  "step": 6500
195
  },
196
  {
197
- "epoch": 0.75,
198
- "eval_accuracy": 0.9947543427162758,
199
- "eval_loss": 0.02473697066307068,
200
- "eval_runtime": 331.6878,
201
- "eval_samples_per_second": 105.177,
202
- "eval_steps_per_second": 6.575,
203
  "step": 6500
204
  },
205
  {
206
- "epoch": 0.8,
207
- "learning_rate": 1.4649545211342966e-05,
208
- "loss": 0.0243,
209
  "step": 7000
210
  },
211
  {
212
- "epoch": 0.8,
213
- "eval_accuracy": 0.9946970131284756,
214
- "eval_loss": 0.02222474291920662,
215
- "eval_runtime": 343.6254,
216
- "eval_samples_per_second": 101.523,
217
- "eval_steps_per_second": 6.347,
218
  "step": 7000
219
  },
220
  {
221
- "epoch": 0.86,
222
- "learning_rate": 1.4267369869296034e-05,
223
- "loss": 0.0204,
224
  "step": 7500
225
  },
226
  {
227
- "epoch": 0.86,
228
- "eval_accuracy": 0.9944390299833744,
229
- "eval_loss": 0.020777888596057892,
230
- "eval_runtime": 342.3258,
231
- "eval_samples_per_second": 101.909,
232
- "eval_steps_per_second": 6.371,
233
  "step": 7500
234
  },
235
  {
236
- "epoch": 0.92,
237
- "learning_rate": 1.3885194527249105e-05,
238
- "loss": 0.0219,
239
  "step": 8000
240
  },
241
  {
242
- "epoch": 0.92,
243
- "eval_accuracy": 0.9955569569454795,
244
- "eval_loss": 0.023240169510245323,
245
- "eval_runtime": 340.3493,
246
- "eval_samples_per_second": 102.501,
247
- "eval_steps_per_second": 6.408,
248
  "step": 8000
249
  },
250
  {
251
- "epoch": 0.97,
252
- "learning_rate": 1.3503019185202171e-05,
253
- "loss": 0.0298,
254
  "step": 8500
255
  },
256
  {
257
- "epoch": 0.97,
258
- "eval_accuracy": 0.9955282921515795,
259
- "eval_loss": 0.01757008023560047,
260
- "eval_runtime": 307.8345,
261
- "eval_samples_per_second": 113.327,
262
- "eval_steps_per_second": 7.085,
263
  "step": 8500
264
  },
265
  {
266
- "epoch": 1.03,
267
- "learning_rate": 1.312084384315524e-05,
268
- "loss": 0.0098,
269
  "step": 9000
270
  },
271
  {
272
- "epoch": 1.03,
273
- "eval_accuracy": 0.9951843146247779,
274
- "eval_loss": 0.026897920295596123,
275
- "eval_runtime": 318.0815,
276
- "eval_samples_per_second": 109.676,
277
- "eval_steps_per_second": 6.857,
278
  "step": 9000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  }
280
  ],
281
- "max_steps": 26166,
282
- "num_train_epochs": 3,
283
- "total_flos": 1.907424566727475e+16,
284
  "trial_name": null,
285
  "trial_params": null
286
  }
 
1
  {
2
+ "best_metric": 0.010238240472972393,
3
+ "best_model_checkpoint": "./results/checkpoint-14000",
4
+ "epoch": 0.8173273395995097,
5
+ "global_step": 14000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.03,
12
+ "learning_rate": 1.9708097378714462e-05,
13
+ "loss": 0.014,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.03,
18
+ "eval_accuracy": 0.9974897107329461,
19
+ "eval_loss": 0.014051680453121662,
20
+ "eval_runtime": 272.9865,
21
+ "eval_samples_per_second": 125.497,
22
+ "eval_steps_per_second": 15.689,
23
  "step": 500
24
  },
25
  {
26
+ "epoch": 0.06,
27
+ "learning_rate": 1.9416194757428922e-05,
28
+ "loss": 0.0068,
29
  "step": 1000
30
  },
31
  {
32
+ "epoch": 0.06,
33
+ "eval_accuracy": 0.9936658980122012,
34
+ "eval_loss": 0.03880644589662552,
35
+ "eval_runtime": 271.4393,
36
+ "eval_samples_per_second": 126.212,
37
+ "eval_steps_per_second": 15.779,
38
  "step": 1000
39
  },
40
  {
41
+ "epoch": 0.09,
42
+ "learning_rate": 1.9124292136143383e-05,
43
+ "loss": 0.0065,
44
  "step": 1500
45
  },
46
  {
47
+ "epoch": 0.09,
48
+ "eval_accuracy": 0.9963221343296652,
49
+ "eval_loss": 0.034014396369457245,
50
+ "eval_runtime": 271.2782,
51
+ "eval_samples_per_second": 126.287,
52
+ "eval_steps_per_second": 15.788,
53
  "step": 1500
54
  },
55
  {
56
+ "epoch": 0.12,
57
+ "learning_rate": 1.8832389514857846e-05,
58
+ "loss": 0.0088,
59
  "step": 2000
60
  },
61
  {
62
+ "epoch": 0.12,
63
+ "eval_accuracy": 0.9975189001430281,
64
+ "eval_loss": 0.01912350207567215,
65
+ "eval_runtime": 271.5387,
66
+ "eval_samples_per_second": 126.166,
67
+ "eval_steps_per_second": 15.773,
68
  "step": 2000
69
  },
70
  {
71
+ "epoch": 0.15,
72
+ "learning_rate": 1.8540486893572303e-05,
73
+ "loss": 0.0081,
74
  "step": 2500
75
  },
76
  {
77
+ "epoch": 0.15,
78
+ "eval_accuracy": 0.9977816048337663,
79
+ "eval_loss": 0.01667490415275097,
80
+ "eval_runtime": 271.3282,
81
+ "eval_samples_per_second": 126.264,
82
+ "eval_steps_per_second": 15.785,
83
  "step": 2500
84
  },
85
  {
86
+ "epoch": 0.18,
87
+ "learning_rate": 1.8248584272286767e-05,
88
+ "loss": 0.0142,
89
  "step": 3000
90
  },
91
  {
92
+ "epoch": 0.18,
93
+ "eval_accuracy": 0.9969351119413876,
94
+ "eval_loss": 0.017350222915410995,
95
+ "eval_runtime": 271.4826,
96
+ "eval_samples_per_second": 126.192,
97
+ "eval_steps_per_second": 15.776,
98
  "step": 3000
99
  },
100
  {
101
+ "epoch": 0.2,
102
+ "learning_rate": 1.7956681651001227e-05,
103
+ "loss": 0.0066,
104
  "step": 3500
105
  },
106
  {
107
+ "epoch": 0.2,
108
+ "eval_accuracy": 0.9967891648909776,
109
+ "eval_loss": 0.025246594101190567,
110
+ "eval_runtime": 271.289,
111
+ "eval_samples_per_second": 126.282,
112
+ "eval_steps_per_second": 15.788,
113
  "step": 3500
114
  },
115
  {
116
+ "epoch": 0.23,
117
+ "learning_rate": 1.7664779029715688e-05,
118
+ "loss": 0.0078,
119
  "step": 4000
120
  },
121
  {
122
+ "epoch": 0.23,
123
+ "eval_accuracy": 0.9965848390204034,
124
+ "eval_loss": 0.02241995558142662,
125
+ "eval_runtime": 271.2726,
126
+ "eval_samples_per_second": 126.29,
127
+ "eval_steps_per_second": 15.789,
128
  "step": 4000
129
  },
130
  {
131
+ "epoch": 0.26,
132
+ "learning_rate": 1.7372876408430148e-05,
133
+ "loss": 0.0062,
134
  "step": 4500
135
  },
136
  {
137
+ "epoch": 0.26,
138
+ "eval_accuracy": 0.9968183543010596,
139
+ "eval_loss": 0.021609827876091003,
140
+ "eval_runtime": 271.3057,
141
+ "eval_samples_per_second": 126.275,
142
+ "eval_steps_per_second": 15.787,
143
  "step": 4500
144
  },
145
  {
146
+ "epoch": 0.29,
147
+ "learning_rate": 1.7080973787144612e-05,
148
+ "loss": 0.0047,
149
  "step": 5000
150
  },
151
  {
152
+ "epoch": 0.29,
153
+ "eval_accuracy": 0.9971978166321258,
154
+ "eval_loss": 0.01993330754339695,
155
+ "eval_runtime": 271.1898,
156
+ "eval_samples_per_second": 126.328,
157
+ "eval_steps_per_second": 15.793,
158
  "step": 5000
159
  },
160
  {
161
+ "epoch": 0.32,
162
+ "learning_rate": 1.678907116585907e-05,
163
+ "loss": 0.0035,
164
  "step": 5500
165
  },
166
  {
167
+ "epoch": 0.32,
168
+ "eval_accuracy": 0.9968767331212236,
169
+ "eval_loss": 0.025054221972823143,
170
+ "eval_runtime": 272.0376,
171
+ "eval_samples_per_second": 125.935,
172
+ "eval_steps_per_second": 15.744,
173
  "step": 5500
174
  },
175
  {
176
+ "epoch": 0.35,
177
+ "learning_rate": 1.6497168544573532e-05,
178
+ "loss": 0.0147,
179
  "step": 6000
180
  },
181
  {
182
+ "epoch": 0.35,
183
+ "eval_accuracy": 0.997343763682536,
184
+ "eval_loss": 0.017792223021388054,
185
+ "eval_runtime": 271.3522,
186
+ "eval_samples_per_second": 126.253,
187
+ "eval_steps_per_second": 15.784,
188
  "step": 6000
189
  },
190
  {
191
+ "epoch": 0.38,
192
+ "learning_rate": 1.6205265923287993e-05,
193
+ "loss": 0.0093,
194
  "step": 6500
195
  },
196
  {
197
+ "epoch": 0.38,
198
+ "eval_accuracy": 0.9971102484018798,
199
+ "eval_loss": 0.019385505467653275,
200
+ "eval_runtime": 270.7817,
201
+ "eval_samples_per_second": 126.519,
202
+ "eval_steps_per_second": 15.817,
203
  "step": 6500
204
  },
205
  {
206
+ "epoch": 0.41,
207
+ "learning_rate": 1.5913363302002453e-05,
208
+ "loss": 0.0084,
209
  "step": 7000
210
  },
211
  {
212
+ "epoch": 0.41,
213
+ "eval_accuracy": 0.9974897107329461,
214
+ "eval_loss": 0.01369693223387003,
215
+ "eval_runtime": 270.7077,
216
+ "eval_samples_per_second": 126.553,
217
+ "eval_steps_per_second": 15.821,
218
  "step": 7000
219
  },
220
  {
221
+ "epoch": 0.44,
222
+ "learning_rate": 1.5621460680716913e-05,
223
+ "loss": 0.0135,
224
  "step": 7500
225
  },
226
  {
227
+ "epoch": 0.44,
228
+ "eval_accuracy": 0.9967307860708136,
229
+ "eval_loss": 0.023163480684161186,
230
+ "eval_runtime": 270.9905,
231
+ "eval_samples_per_second": 126.421,
232
+ "eval_steps_per_second": 15.805,
233
  "step": 7500
234
  },
235
  {
236
+ "epoch": 0.47,
237
+ "learning_rate": 1.5329558059431374e-05,
238
+ "loss": 0.0131,
239
  "step": 8000
240
  },
241
  {
242
+ "epoch": 0.47,
243
+ "eval_accuracy": 0.9974605213228641,
244
+ "eval_loss": 0.015544239431619644,
245
+ "eval_runtime": 270.8012,
246
+ "eval_samples_per_second": 126.51,
247
+ "eval_steps_per_second": 15.816,
248
  "step": 8000
249
  },
250
  {
251
+ "epoch": 0.5,
252
+ "learning_rate": 1.5037655438145836e-05,
253
+ "loss": 0.0196,
254
  "step": 8500
255
  },
256
  {
257
+ "epoch": 0.5,
258
+ "eval_accuracy": 0.9965556496103214,
259
+ "eval_loss": 0.01619912125170231,
260
+ "eval_runtime": 272.0032,
261
+ "eval_samples_per_second": 125.951,
262
+ "eval_steps_per_second": 15.746,
263
  "step": 8500
264
  },
265
  {
266
+ "epoch": 0.53,
267
+ "learning_rate": 1.4745752816860298e-05,
268
+ "loss": 0.0207,
269
  "step": 9000
270
  },
271
  {
272
+ "epoch": 0.53,
273
+ "eval_accuracy": 0.9978691730640123,
274
+ "eval_loss": 0.011031306348741055,
275
+ "eval_runtime": 270.862,
276
+ "eval_samples_per_second": 126.481,
277
+ "eval_steps_per_second": 15.812,
278
  "step": 9000
279
+ },
280
+ {
281
+ "epoch": 0.55,
282
+ "learning_rate": 1.4453850195574756e-05,
283
+ "loss": 0.0172,
284
+ "step": 9500
285
+ },
286
+ {
287
+ "epoch": 0.55,
288
+ "eval_accuracy": 0.9969059225313056,
289
+ "eval_loss": 0.013371887616813183,
290
+ "eval_runtime": 270.9823,
291
+ "eval_samples_per_second": 126.425,
292
+ "eval_steps_per_second": 15.805,
293
+ "step": 9500
294
+ },
295
+ {
296
+ "epoch": 0.58,
297
+ "learning_rate": 1.4161947574289218e-05,
298
+ "loss": 0.0147,
299
+ "step": 10000
300
+ },
301
+ {
302
+ "epoch": 0.58,
303
+ "eval_accuracy": 0.9956799673078607,
304
+ "eval_loss": 0.018292322754859924,
305
+ "eval_runtime": 271.0579,
306
+ "eval_samples_per_second": 126.39,
307
+ "eval_steps_per_second": 15.801,
308
+ "step": 10000
309
+ },
310
+ {
311
+ "epoch": 0.61,
312
+ "learning_rate": 1.3870044953003679e-05,
313
+ "loss": 0.0115,
314
+ "step": 10500
315
+ },
316
+ {
317
+ "epoch": 0.61,
318
+ "eval_accuracy": 0.9976356577833562,
319
+ "eval_loss": 0.013687117025256157,
320
+ "eval_runtime": 270.8313,
321
+ "eval_samples_per_second": 126.496,
322
+ "eval_steps_per_second": 15.814,
323
+ "step": 10500
324
+ },
325
+ {
326
+ "epoch": 0.64,
327
+ "learning_rate": 1.357814233171814e-05,
328
+ "loss": 0.0058,
329
+ "step": 11000
330
+ },
331
+ {
332
+ "epoch": 0.64,
333
+ "eval_accuracy": 0.9978399836539303,
334
+ "eval_loss": 0.01474874746054411,
335
+ "eval_runtime": 270.6785,
336
+ "eval_samples_per_second": 126.567,
337
+ "eval_steps_per_second": 15.823,
338
+ "step": 11000
339
+ },
340
+ {
341
+ "epoch": 0.67,
342
+ "learning_rate": 1.32862397104326e-05,
343
+ "loss": 0.0127,
344
+ "step": 11500
345
+ },
346
+ {
347
+ "epoch": 0.67,
348
+ "eval_accuracy": 0.9975189001430281,
349
+ "eval_loss": 0.014400546438992023,
350
+ "eval_runtime": 271.0019,
351
+ "eval_samples_per_second": 126.416,
352
+ "eval_steps_per_second": 15.804,
353
+ "step": 11500
354
+ },
355
+ {
356
+ "epoch": 0.7,
357
+ "learning_rate": 1.2994337089147061e-05,
358
+ "loss": 0.0158,
359
+ "step": 12000
360
+ },
361
+ {
362
+ "epoch": 0.7,
363
+ "eval_accuracy": 0.9978399836539303,
364
+ "eval_loss": 0.011529939249157906,
365
+ "eval_runtime": 271.0144,
366
+ "eval_samples_per_second": 126.41,
367
+ "eval_steps_per_second": 15.804,
368
+ "step": 12000
369
+ },
370
+ {
371
+ "epoch": 0.73,
372
+ "learning_rate": 1.2702434467861522e-05,
373
+ "loss": 0.0149,
374
+ "step": 12500
375
+ },
376
+ {
377
+ "epoch": 0.73,
378
+ "eval_accuracy": 0.9978983624740944,
379
+ "eval_loss": 0.011057616211473942,
380
+ "eval_runtime": 270.832,
381
+ "eval_samples_per_second": 126.495,
382
+ "eval_steps_per_second": 15.814,
383
+ "step": 12500
384
+ },
385
+ {
386
+ "epoch": 0.76,
387
+ "learning_rate": 1.2410531846575984e-05,
388
+ "loss": 0.0112,
389
+ "step": 13000
390
+ },
391
+ {
392
+ "epoch": 0.76,
393
+ "eval_accuracy": 0.9975772789631921,
394
+ "eval_loss": 0.013288214802742004,
395
+ "eval_runtime": 270.8227,
396
+ "eval_samples_per_second": 126.5,
397
+ "eval_steps_per_second": 15.815,
398
+ "step": 13000
399
+ },
400
+ {
401
+ "epoch": 0.79,
402
+ "learning_rate": 1.2118629225290444e-05,
403
+ "loss": 0.0163,
404
+ "step": 13500
405
+ },
406
+ {
407
+ "epoch": 0.79,
408
+ "eval_accuracy": 0.997372953092618,
409
+ "eval_loss": 0.012437746860086918,
410
+ "eval_runtime": 270.7754,
411
+ "eval_samples_per_second": 126.522,
412
+ "eval_steps_per_second": 15.818,
413
+ "step": 13500
414
+ },
415
+ {
416
+ "epoch": 0.82,
417
+ "learning_rate": 1.1826726604004906e-05,
418
+ "loss": 0.0107,
419
+ "step": 14000
420
+ },
421
+ {
422
+ "epoch": 0.82,
423
+ "eval_accuracy": 0.9979859307043405,
424
+ "eval_loss": 0.010238240472972393,
425
+ "eval_runtime": 271.1151,
426
+ "eval_samples_per_second": 126.363,
427
+ "eval_steps_per_second": 15.798,
428
+ "step": 14000
429
  }
430
  ],
431
+ "max_steps": 34258,
432
+ "num_train_epochs": 2,
433
+ "total_flos": 1.4836348649472e+16,
434
  "trial_name": null,
435
  "trial_params": null
436
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb244d95b7b47fb97bc09ca9a88060acdd00da87d07494e09e71d9be17b7177a
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bcf015d7727fb95e74ccfb6fa4e500e13026ea93036073872f21f1c39aab86c
3
  size 3963