kanishka commited on
Commit
25633b5
·
1 Parent(s): 5914f09

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +4 -4
  4. trainer_state.json +70 -70
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.5416644525230297,
4
- "eval_loss": 2.2877631187438965,
5
- "eval_runtime": 4.5703,
6
  "eval_samples": 5038,
7
- "eval_samples_per_second": 1102.339,
8
- "eval_steps_per_second": 2.188,
9
- "perplexity": 9.852873305063827,
10
- "train_loss": 2.5528629182903297,
11
- "train_runtime": 747.1228,
12
  "train_samples": 52812,
13
- "train_samples_per_second": 706.872,
14
- "train_steps_per_second": 11.056
15
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.5425975187003966,
4
+ "eval_loss": 2.2831156253814697,
5
+ "eval_runtime": 4.4985,
6
  "eval_samples": 5038,
7
+ "eval_samples_per_second": 1119.928,
8
+ "eval_steps_per_second": 2.223,
9
+ "perplexity": 9.807188384190734,
10
+ "train_loss": 2.5465449372446276,
11
+ "train_runtime": 743.7716,
12
  "train_samples": 52812,
13
+ "train_samples_per_second": 710.057,
14
+ "train_steps_per_second": 11.106
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 10.0,
3
- "eval_accuracy": 0.5416644525230297,
4
- "eval_loss": 2.2877631187438965,
5
- "eval_runtime": 4.5703,
6
  "eval_samples": 5038,
7
- "eval_samples_per_second": 1102.339,
8
- "eval_steps_per_second": 2.188,
9
- "perplexity": 9.852873305063827
10
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "eval_accuracy": 0.5425975187003966,
4
+ "eval_loss": 2.2831156253814697,
5
+ "eval_runtime": 4.4985,
6
  "eval_samples": 5038,
7
+ "eval_samples_per_second": 1119.928,
8
+ "eval_steps_per_second": 2.223,
9
+ "perplexity": 9.807188384190734
10
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 10.0,
3
- "train_loss": 2.5528629182903297,
4
- "train_runtime": 747.1228,
5
  "train_samples": 52812,
6
- "train_samples_per_second": 706.872,
7
- "train_steps_per_second": 11.056
8
  }
 
1
  {
2
  "epoch": 10.0,
3
+ "train_loss": 2.5465449372446276,
4
+ "train_runtime": 743.7716,
5
  "train_samples": 52812,
6
+ "train_samples_per_second": 710.057,
7
+ "train_steps_per_second": 11.106
8
  }
trainer_state.json CHANGED
@@ -11,197 +11,197 @@
11
  {
12
  "epoch": 0.61,
13
  "learning_rate": 6.25e-05,
14
- "loss": 5.8796,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.0,
19
- "eval_accuracy": 0.4610925470362255,
20
- "eval_loss": 3.108325958251953,
21
- "eval_runtime": 4.3682,
22
- "eval_samples_per_second": 1153.323,
23
- "eval_steps_per_second": 2.289,
24
  "step": 826
25
  },
26
  {
27
  "epoch": 1.21,
28
  "learning_rate": 0.000125,
29
- "loss": 3.1165,
30
  "step": 1000
31
  },
32
  {
33
  "epoch": 1.82,
34
  "learning_rate": 0.0001875,
35
- "loss": 2.802,
36
  "step": 1500
37
  },
38
  {
39
  "epoch": 2.0,
40
- "eval_accuracy": 0.49653186960204804,
41
- "eval_loss": 2.7454917430877686,
42
- "eval_runtime": 4.161,
43
- "eval_samples_per_second": 1210.754,
44
- "eval_steps_per_second": 2.403,
45
  "step": 1652
46
  },
47
  {
48
  "epoch": 2.42,
49
  "learning_rate": 0.00025,
50
- "loss": 2.6268,
51
  "step": 2000
52
  },
53
  {
54
  "epoch": 3.0,
55
- "eval_accuracy": 0.5115797107338558,
56
- "eval_loss": 2.573380947113037,
57
- "eval_runtime": 4.3721,
58
- "eval_samples_per_second": 1152.294,
59
- "eval_steps_per_second": 2.287,
60
  "step": 2478
61
  },
62
  {
63
  "epoch": 3.03,
64
  "learning_rate": 0.0003125,
65
- "loss": 2.5157,
66
  "step": 2500
67
  },
68
  {
69
  "epoch": 3.63,
70
  "learning_rate": 0.000375,
71
- "loss": 2.4165,
72
  "step": 3000
73
  },
74
  {
75
  "epoch": 4.0,
76
- "eval_accuracy": 0.5211369966209564,
77
- "eval_loss": 2.4666714668273926,
78
- "eval_runtime": 4.4473,
79
- "eval_samples_per_second": 1132.814,
80
- "eval_steps_per_second": 2.249,
81
  "step": 3304
82
  },
83
  {
84
  "epoch": 4.24,
85
  "learning_rate": 0.00043750000000000006,
86
- "loss": 2.3502,
87
  "step": 3500
88
  },
89
  {
90
  "epoch": 4.84,
91
  "learning_rate": 0.0005,
92
- "loss": 2.2892,
93
  "step": 4000
94
  },
95
  {
96
  "epoch": 5.0,
97
- "eval_accuracy": 0.5287937658050783,
98
- "eval_loss": 2.394850969314575,
99
- "eval_runtime": 4.5706,
100
- "eval_samples_per_second": 1102.258,
101
- "eval_steps_per_second": 2.188,
102
  "step": 4130
103
  },
104
  {
105
  "epoch": 5.45,
106
  "learning_rate": 0.0005625000000000001,
107
- "loss": 2.2315,
108
  "step": 4500
109
  },
110
  {
111
  "epoch": 6.0,
112
- "eval_accuracy": 0.5337701187510354,
113
- "eval_loss": 2.344557523727417,
114
- "eval_runtime": 4.6212,
115
- "eval_samples_per_second": 1090.182,
116
- "eval_steps_per_second": 2.164,
117
  "step": 4956
118
  },
119
  {
120
  "epoch": 6.05,
121
  "learning_rate": 0.000625,
122
- "loss": 2.2096,
123
  "step": 5000
124
  },
125
  {
126
  "epoch": 6.66,
127
  "learning_rate": 0.0006875,
128
- "loss": 2.1587,
129
  "step": 5500
130
  },
131
  {
132
  "epoch": 7.0,
133
- "eval_accuracy": 0.5373570314429236,
134
- "eval_loss": 2.3208389282226562,
135
- "eval_runtime": 4.2475,
136
- "eval_samples_per_second": 1186.103,
137
- "eval_steps_per_second": 2.354,
138
  "step": 5782
139
  },
140
  {
141
  "epoch": 7.26,
142
  "learning_rate": 0.00075,
143
- "loss": 2.139,
144
  "step": 6000
145
  },
146
  {
147
  "epoch": 7.87,
148
  "learning_rate": 0.0008125,
149
- "loss": 2.1253,
150
  "step": 6500
151
  },
152
  {
153
  "epoch": 8.0,
154
- "eval_accuracy": 0.5394279069622053,
155
- "eval_loss": 2.3043758869171143,
156
- "eval_runtime": 4.4837,
157
- "eval_samples_per_second": 1123.621,
158
- "eval_steps_per_second": 2.23,
159
  "step": 6608
160
  },
161
  {
162
  "epoch": 8.47,
163
  "learning_rate": 0.0008750000000000001,
164
- "loss": 2.0858,
165
  "step": 7000
166
  },
167
  {
168
  "epoch": 9.0,
169
- "eval_accuracy": 0.5403687877641734,
170
- "eval_loss": 2.2939975261688232,
171
- "eval_runtime": 4.172,
172
- "eval_samples_per_second": 1207.562,
173
- "eval_steps_per_second": 2.397,
174
  "step": 7434
175
  },
176
  {
177
  "epoch": 9.08,
178
  "learning_rate": 0.0009375,
179
- "loss": 2.0892,
180
  "step": 7500
181
  },
182
  {
183
  "epoch": 9.69,
184
  "learning_rate": 0.001,
185
- "loss": 2.0556,
186
  "step": 8000
187
  },
188
  {
189
  "epoch": 10.0,
190
- "eval_accuracy": 0.5416644525230297,
191
- "eval_loss": 2.2877631187438965,
192
- "eval_runtime": 4.4277,
193
- "eval_samples_per_second": 1137.831,
194
- "eval_steps_per_second": 2.258,
195
  "step": 8260
196
  },
197
  {
198
  "epoch": 10.0,
199
  "step": 8260,
200
  "total_flos": 2562796651806720.0,
201
- "train_loss": 2.5528629182903297,
202
- "train_runtime": 747.1228,
203
- "train_samples_per_second": 706.872,
204
- "train_steps_per_second": 11.056
205
  }
206
  ],
207
  "logging_steps": 500,
 
11
  {
12
  "epoch": 0.61,
13
  "learning_rate": 6.25e-05,
14
+ "loss": 5.8023,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 1.0,
19
+ "eval_accuracy": 0.4621053223845233,
20
+ "eval_loss": 3.109194755554199,
21
+ "eval_runtime": 4.0139,
22
+ "eval_samples_per_second": 1255.152,
23
+ "eval_steps_per_second": 2.491,
24
  "step": 826
25
  },
26
  {
27
  "epoch": 1.21,
28
  "learning_rate": 0.000125,
29
+ "loss": 3.1121,
30
  "step": 1000
31
  },
32
  {
33
  "epoch": 1.82,
34
  "learning_rate": 0.0001875,
35
+ "loss": 2.7942,
36
  "step": 1500
37
  },
38
  {
39
  "epoch": 2.0,
40
+ "eval_accuracy": 0.49748994257813844,
41
+ "eval_loss": 2.7389166355133057,
42
+ "eval_runtime": 4.1343,
43
+ "eval_samples_per_second": 1218.585,
44
+ "eval_steps_per_second": 2.419,
45
  "step": 1652
46
  },
47
  {
48
  "epoch": 2.42,
49
  "learning_rate": 0.00025,
50
+ "loss": 2.625,
51
  "step": 2000
52
  },
53
  {
54
  "epoch": 3.0,
55
+ "eval_accuracy": 0.511015494837659,
56
+ "eval_loss": 2.5700840950012207,
57
+ "eval_runtime": 4.4436,
58
+ "eval_samples_per_second": 1133.756,
59
+ "eval_steps_per_second": 2.25,
60
  "step": 2478
61
  },
62
  {
63
  "epoch": 3.03,
64
  "learning_rate": 0.0003125,
65
+ "loss": 2.5117,
66
  "step": 2500
67
  },
68
  {
69
  "epoch": 3.63,
70
  "learning_rate": 0.000375,
71
+ "loss": 2.412,
72
  "step": 3000
73
  },
74
  {
75
  "epoch": 4.0,
76
+ "eval_accuracy": 0.5223295083350786,
77
+ "eval_loss": 2.4618635177612305,
78
+ "eval_runtime": 4.3105,
79
+ "eval_samples_per_second": 1168.783,
80
+ "eval_steps_per_second": 2.32,
81
  "step": 3304
82
  },
83
  {
84
  "epoch": 4.24,
85
  "learning_rate": 0.00043750000000000006,
86
+ "loss": 2.3434,
87
  "step": 3500
88
  },
89
  {
90
  "epoch": 4.84,
91
  "learning_rate": 0.0005,
92
+ "loss": 2.2885,
93
  "step": 4000
94
  },
95
  {
96
  "epoch": 5.0,
97
+ "eval_accuracy": 0.5287562556069931,
98
+ "eval_loss": 2.3939802646636963,
99
+ "eval_runtime": 4.587,
100
+ "eval_samples_per_second": 1098.33,
101
+ "eval_steps_per_second": 2.18,
102
  "step": 4130
103
  },
104
  {
105
  "epoch": 5.45,
106
  "learning_rate": 0.0005625000000000001,
107
+ "loss": 2.2294,
108
  "step": 4500
109
  },
110
  {
111
  "epoch": 6.0,
112
+ "eval_accuracy": 0.5341733533804504,
113
+ "eval_loss": 2.346377372741699,
114
+ "eval_runtime": 4.5113,
115
+ "eval_samples_per_second": 1116.745,
116
+ "eval_steps_per_second": 2.217,
117
  "step": 4956
118
  },
119
  {
120
  "epoch": 6.05,
121
  "learning_rate": 0.000625,
122
+ "loss": 2.2056,
123
  "step": 5000
124
  },
125
  {
126
  "epoch": 6.66,
127
  "learning_rate": 0.0006875,
128
+ "loss": 2.16,
129
  "step": 5500
130
  },
131
  {
132
  "epoch": 7.0,
133
+ "eval_accuracy": 0.5371960501761417,
134
+ "eval_loss": 2.320580005645752,
135
+ "eval_runtime": 4.5241,
136
+ "eval_samples_per_second": 1113.583,
137
+ "eval_steps_per_second": 2.21,
138
  "step": 5782
139
  },
140
  {
141
  "epoch": 7.26,
142
  "learning_rate": 0.00075,
143
+ "loss": 2.1379,
144
  "step": 6000
145
  },
146
  {
147
  "epoch": 7.87,
148
  "learning_rate": 0.0008125,
149
+ "loss": 2.1272,
150
  "step": 6500
151
  },
152
  {
153
  "epoch": 8.0,
154
+ "eval_accuracy": 0.5394685430101309,
155
+ "eval_loss": 2.3045718669891357,
156
+ "eval_runtime": 4.5162,
157
+ "eval_samples_per_second": 1115.551,
158
+ "eval_steps_per_second": 2.214,
159
  "step": 6608
160
  },
161
  {
162
  "epoch": 8.47,
163
  "learning_rate": 0.0008750000000000001,
164
+ "loss": 2.0865,
165
  "step": 7000
166
  },
167
  {
168
  "epoch": 9.0,
169
+ "eval_accuracy": 0.540476629583668,
170
+ "eval_loss": 2.291161298751831,
171
+ "eval_runtime": 4.4842,
172
+ "eval_samples_per_second": 1123.503,
173
+ "eval_steps_per_second": 2.23,
174
  "step": 7434
175
  },
176
  {
177
  "epoch": 9.08,
178
  "learning_rate": 0.0009375,
179
+ "loss": 2.0927,
180
  "step": 7500
181
  },
182
  {
183
  "epoch": 9.69,
184
  "learning_rate": 0.001,
185
+ "loss": 2.0577,
186
  "step": 8000
187
  },
188
  {
189
  "epoch": 10.0,
190
+ "eval_accuracy": 0.5425975187003966,
191
+ "eval_loss": 2.2831156253814697,
192
+ "eval_runtime": 4.7038,
193
+ "eval_samples_per_second": 1071.043,
194
+ "eval_steps_per_second": 2.126,
195
  "step": 8260
196
  },
197
  {
198
  "epoch": 10.0,
199
  "step": 8260,
200
  "total_flos": 2562796651806720.0,
201
+ "train_loss": 2.5465449372446276,
202
+ "train_runtime": 743.7716,
203
+ "train_samples_per_second": 710.057,
204
+ "train_steps_per_second": 11.106
205
  }
206
  ],
207
  "logging_steps": 500,