Add files
Browse files- .gitattributes +4 -0
- data/lang_bpe_500_fallback_coverage_0.99/bpe.model +3 -0
- data/lang_bpe_500_fallback_coverage_0.99/tokens.txt +500 -0
- log/log-train-2023-10-04-00-11-34-0 +0 -0
- log/log-train-2023-10-04-00-11-34-1 +0 -0
- log/log-train-2023-10-04-00-11-34-2 +0 -0
- log/log-train-2023-10-04-00-11-34-3 +0 -0
- log/log-train-2023-10-06-13-16-43-0 +275 -0
- log/log-train-2023-10-06-13-16-43-1 +270 -0
- log/log-train-2023-10-06-13-16-43-2 +269 -0
- log/log-train-2023-10-06-13-16-43-3 +273 -0
- log/log-train-2023-10-06-13-23-00-0 +0 -0
- log/log-train-2023-10-06-13-23-00-1 +0 -0
- log/log-train-2023-10-06-13-23-00-2 +0 -0
- log/log-train-2023-10-06-13-23-00-3 +0 -0
- log/log-train-2023-10-07-11-43-26-0 +3 -0
- log/log-train-2023-10-07-11-43-26-1 +3 -0
- log/log-train-2023-10-07-11-43-26-2 +3 -0
- log/log-train-2023-10-07-11-43-26-3 +3 -0
- pretrained.pt +3 -0
- tensorboard/events.out.tfevents.1696349494.de-74279-k2-train-2-0423201334-6587bbc68d-tn554.2029689.0 +3 -0
- tensorboard/events.out.tfevents.1696569403.de-74279-k2-train-2-0423201334-6587bbc68d-tn554.2104963.0 +3 -0
- tensorboard/events.out.tfevents.1696569780.de-74279-k2-train-9-0208143539-7dbf569d4f-r7nrb.31485.0 +3 -0
- tensorboard/events.out.tfevents.1696650206.de-74279-k2-train-1-1220091118-57c4d55446-mvd6x.2916912.0 +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
log/log-train-2023-10-07-11-43-26-0 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
log/log-train-2023-10-07-11-43-26-1 filter=lfs diff=lfs merge=lfs -text
|
38 |
+
log/log-train-2023-10-07-11-43-26-2 filter=lfs diff=lfs merge=lfs -text
|
39 |
+
log/log-train-2023-10-07-11-43-26-3 filter=lfs diff=lfs merge=lfs -text
|
data/lang_bpe_500_fallback_coverage_0.99/bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:852c9853c7bf0b8c8009ec74c38b6b5c974c3609797801fb52e40bf2e8e49f88
|
3 |
+
size 245053
|
data/lang_bpe_500_fallback_coverage_0.99/tokens.txt
ADDED
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<blk> 0
|
2 |
+
<sos/eos> 1
|
3 |
+
<unk> 2
|
4 |
+
<0x00> 3
|
5 |
+
<0x01> 4
|
6 |
+
<0x02> 5
|
7 |
+
<0x03> 6
|
8 |
+
<0x04> 7
|
9 |
+
<0x05> 8
|
10 |
+
<0x06> 9
|
11 |
+
<0x07> 10
|
12 |
+
<0x08> 11
|
13 |
+
<0x09> 12
|
14 |
+
<0x0A> 13
|
15 |
+
<0x0B> 14
|
16 |
+
<0x0C> 15
|
17 |
+
<0x0D> 16
|
18 |
+
<0x0E> 17
|
19 |
+
<0x0F> 18
|
20 |
+
<0x10> 19
|
21 |
+
<0x11> 20
|
22 |
+
<0x12> 21
|
23 |
+
<0x13> 22
|
24 |
+
<0x14> 23
|
25 |
+
<0x15> 24
|
26 |
+
<0x16> 25
|
27 |
+
<0x17> 26
|
28 |
+
<0x18> 27
|
29 |
+
<0x19> 28
|
30 |
+
<0x1A> 29
|
31 |
+
<0x1B> 30
|
32 |
+
<0x1C> 31
|
33 |
+
<0x1D> 32
|
34 |
+
<0x1E> 33
|
35 |
+
<0x1F> 34
|
36 |
+
<0x20> 35
|
37 |
+
<0x21> 36
|
38 |
+
<0x22> 37
|
39 |
+
<0x23> 38
|
40 |
+
<0x24> 39
|
41 |
+
<0x25> 40
|
42 |
+
<0x26> 41
|
43 |
+
<0x27> 42
|
44 |
+
<0x28> 43
|
45 |
+
<0x29> 44
|
46 |
+
<0x2A> 45
|
47 |
+
<0x2B> 46
|
48 |
+
<0x2C> 47
|
49 |
+
<0x2D> 48
|
50 |
+
<0x2E> 49
|
51 |
+
<0x2F> 50
|
52 |
+
<0x30> 51
|
53 |
+
<0x31> 52
|
54 |
+
<0x32> 53
|
55 |
+
<0x33> 54
|
56 |
+
<0x34> 55
|
57 |
+
<0x35> 56
|
58 |
+
<0x36> 57
|
59 |
+
<0x37> 58
|
60 |
+
<0x38> 59
|
61 |
+
<0x39> 60
|
62 |
+
<0x3A> 61
|
63 |
+
<0x3B> 62
|
64 |
+
<0x3C> 63
|
65 |
+
<0x3D> 64
|
66 |
+
<0x3E> 65
|
67 |
+
<0x3F> 66
|
68 |
+
<0x40> 67
|
69 |
+
<0x41> 68
|
70 |
+
<0x42> 69
|
71 |
+
<0x43> 70
|
72 |
+
<0x44> 71
|
73 |
+
<0x45> 72
|
74 |
+
<0x46> 73
|
75 |
+
<0x47> 74
|
76 |
+
<0x48> 75
|
77 |
+
<0x49> 76
|
78 |
+
<0x4A> 77
|
79 |
+
<0x4B> 78
|
80 |
+
<0x4C> 79
|
81 |
+
<0x4D> 80
|
82 |
+
<0x4E> 81
|
83 |
+
<0x4F> 82
|
84 |
+
<0x50> 83
|
85 |
+
<0x51> 84
|
86 |
+
<0x52> 85
|
87 |
+
<0x53> 86
|
88 |
+
<0x54> 87
|
89 |
+
<0x55> 88
|
90 |
+
<0x56> 89
|
91 |
+
<0x57> 90
|
92 |
+
<0x58> 91
|
93 |
+
<0x59> 92
|
94 |
+
<0x5A> 93
|
95 |
+
<0x5B> 94
|
96 |
+
<0x5C> 95
|
97 |
+
<0x5D> 96
|
98 |
+
<0x5E> 97
|
99 |
+
<0x5F> 98
|
100 |
+
<0x60> 99
|
101 |
+
<0x61> 100
|
102 |
+
<0x62> 101
|
103 |
+
<0x63> 102
|
104 |
+
<0x64> 103
|
105 |
+
<0x65> 104
|
106 |
+
<0x66> 105
|
107 |
+
<0x67> 106
|
108 |
+
<0x68> 107
|
109 |
+
<0x69> 108
|
110 |
+
<0x6A> 109
|
111 |
+
<0x6B> 110
|
112 |
+
<0x6C> 111
|
113 |
+
<0x6D> 112
|
114 |
+
<0x6E> 113
|
115 |
+
<0x6F> 114
|
116 |
+
<0x70> 115
|
117 |
+
<0x71> 116
|
118 |
+
<0x72> 117
|
119 |
+
<0x73> 118
|
120 |
+
<0x74> 119
|
121 |
+
<0x75> 120
|
122 |
+
<0x76> 121
|
123 |
+
<0x77> 122
|
124 |
+
<0x78> 123
|
125 |
+
<0x79> 124
|
126 |
+
<0x7A> 125
|
127 |
+
<0x7B> 126
|
128 |
+
<0x7C> 127
|
129 |
+
<0x7D> 128
|
130 |
+
<0x7E> 129
|
131 |
+
<0x7F> 130
|
132 |
+
<0x80> 131
|
133 |
+
<0x81> 132
|
134 |
+
<0x82> 133
|
135 |
+
<0x83> 134
|
136 |
+
<0x84> 135
|
137 |
+
<0x85> 136
|
138 |
+
<0x86> 137
|
139 |
+
<0x87> 138
|
140 |
+
<0x88> 139
|
141 |
+
<0x89> 140
|
142 |
+
<0x8A> 141
|
143 |
+
<0x8B> 142
|
144 |
+
<0x8C> 143
|
145 |
+
<0x8D> 144
|
146 |
+
<0x8E> 145
|
147 |
+
<0x8F> 146
|
148 |
+
<0x90> 147
|
149 |
+
<0x91> 148
|
150 |
+
<0x92> 149
|
151 |
+
<0x93> 150
|
152 |
+
<0x94> 151
|
153 |
+
<0x95> 152
|
154 |
+
<0x96> 153
|
155 |
+
<0x97> 154
|
156 |
+
<0x98> 155
|
157 |
+
<0x99> 156
|
158 |
+
<0x9A> 157
|
159 |
+
<0x9B> 158
|
160 |
+
<0x9C> 159
|
161 |
+
<0x9D> 160
|
162 |
+
<0x9E> 161
|
163 |
+
<0x9F> 162
|
164 |
+
<0xA0> 163
|
165 |
+
<0xA1> 164
|
166 |
+
<0xA2> 165
|
167 |
+
<0xA3> 166
|
168 |
+
<0xA4> 167
|
169 |
+
<0xA5> 168
|
170 |
+
<0xA6> 169
|
171 |
+
<0xA7> 170
|
172 |
+
<0xA8> 171
|
173 |
+
<0xA9> 172
|
174 |
+
<0xAA> 173
|
175 |
+
<0xAB> 174
|
176 |
+
<0xAC> 175
|
177 |
+
<0xAD> 176
|
178 |
+
<0xAE> 177
|
179 |
+
<0xAF> 178
|
180 |
+
<0xB0> 179
|
181 |
+
<0xB1> 180
|
182 |
+
<0xB2> 181
|
183 |
+
<0xB3> 182
|
184 |
+
<0xB4> 183
|
185 |
+
<0xB5> 184
|
186 |
+
<0xB6> 185
|
187 |
+
<0xB7> 186
|
188 |
+
<0xB8> 187
|
189 |
+
<0xB9> 188
|
190 |
+
<0xBA> 189
|
191 |
+
<0xBB> 190
|
192 |
+
<0xBC> 191
|
193 |
+
<0xBD> 192
|
194 |
+
<0xBE> 193
|
195 |
+
<0xBF> 194
|
196 |
+
<0xC0> 195
|
197 |
+
<0xC1> 196
|
198 |
+
<0xC2> 197
|
199 |
+
<0xC3> 198
|
200 |
+
<0xC4> 199
|
201 |
+
<0xC5> 200
|
202 |
+
<0xC6> 201
|
203 |
+
<0xC7> 202
|
204 |
+
<0xC8> 203
|
205 |
+
<0xC9> 204
|
206 |
+
<0xCA> 205
|
207 |
+
<0xCB> 206
|
208 |
+
<0xCC> 207
|
209 |
+
<0xCD> 208
|
210 |
+
<0xCE> 209
|
211 |
+
<0xCF> 210
|
212 |
+
<0xD0> 211
|
213 |
+
<0xD1> 212
|
214 |
+
<0xD2> 213
|
215 |
+
<0xD3> 214
|
216 |
+
<0xD4> 215
|
217 |
+
<0xD5> 216
|
218 |
+
<0xD6> 217
|
219 |
+
<0xD7> 218
|
220 |
+
<0xD8> 219
|
221 |
+
<0xD9> 220
|
222 |
+
<0xDA> 221
|
223 |
+
<0xDB> 222
|
224 |
+
<0xDC> 223
|
225 |
+
<0xDD> 224
|
226 |
+
<0xDE> 225
|
227 |
+
<0xDF> 226
|
228 |
+
<0xE0> 227
|
229 |
+
<0xE1> 228
|
230 |
+
<0xE2> 229
|
231 |
+
<0xE3> 230
|
232 |
+
<0xE4> 231
|
233 |
+
<0xE5> 232
|
234 |
+
<0xE6> 233
|
235 |
+
<0xE7> 234
|
236 |
+
<0xE8> 235
|
237 |
+
<0xE9> 236
|
238 |
+
<0xEA> 237
|
239 |
+
<0xEB> 238
|
240 |
+
<0xEC> 239
|
241 |
+
<0xED> 240
|
242 |
+
<0xEE> 241
|
243 |
+
<0xEF> 242
|
244 |
+
<0xF0> 243
|
245 |
+
<0xF1> 244
|
246 |
+
<0xF2> 245
|
247 |
+
<0xF3> 246
|
248 |
+
<0xF4> 247
|
249 |
+
<0xF5> 248
|
250 |
+
<0xF6> 249
|
251 |
+
<0xF7> 250
|
252 |
+
<0xF8> 251
|
253 |
+
<0xF9> 252
|
254 |
+
<0xFA> 253
|
255 |
+
<0xFB> 254
|
256 |
+
<0xFC> 255
|
257 |
+
<0xFD> 256
|
258 |
+
<0xFE> 257
|
259 |
+
<0xFF> 258
|
260 |
+
▁ 259
|
261 |
+
s 260
|
262 |
+
S 261
|
263 |
+
, 262
|
264 |
+
T 263
|
265 |
+
▁THE 264
|
266 |
+
▁the 265
|
267 |
+
t 266
|
268 |
+
. 267
|
269 |
+
E 268
|
270 |
+
o 269
|
271 |
+
e 270
|
272 |
+
a 271
|
273 |
+
n 272
|
274 |
+
ED 273
|
275 |
+
ed 274
|
276 |
+
D 275
|
277 |
+
y 276
|
278 |
+
A 277
|
279 |
+
▁A 278
|
280 |
+
u 279
|
281 |
+
▁I 280
|
282 |
+
I 281
|
283 |
+
N 282
|
284 |
+
Y 283
|
285 |
+
d 284
|
286 |
+
▁AND 285
|
287 |
+
O 286
|
288 |
+
▁TO 287
|
289 |
+
▁OF 288
|
290 |
+
▁of 289
|
291 |
+
▁to 290
|
292 |
+
M 291
|
293 |
+
ING 292
|
294 |
+
▁and 293
|
295 |
+
ing 294
|
296 |
+
▁a 295
|
297 |
+
i 296
|
298 |
+
m 297
|
299 |
+
re 298
|
300 |
+
P 299
|
301 |
+
p 300
|
302 |
+
st 301
|
303 |
+
c 302
|
304 |
+
b 303
|
305 |
+
U 304
|
306 |
+
' 305
|
307 |
+
f 306
|
308 |
+
▁F 307
|
309 |
+
AR 308
|
310 |
+
C 309
|
311 |
+
▁IN 310
|
312 |
+
▁W 311
|
313 |
+
OR 312
|
314 |
+
L 313
|
315 |
+
ER 314
|
316 |
+
or 315
|
317 |
+
▁in 316
|
318 |
+
w 317
|
319 |
+
er 318
|
320 |
+
▁HE 319
|
321 |
+
r 320
|
322 |
+
F 321
|
323 |
+
G 322
|
324 |
+
le 323
|
325 |
+
▁w 324
|
326 |
+
RE 325
|
327 |
+
AL 326
|
328 |
+
W 327
|
329 |
+
▁M 328
|
330 |
+
▁C 329
|
331 |
+
ar 330
|
332 |
+
in 331
|
333 |
+
▁B 332
|
334 |
+
LE 333
|
335 |
+
EN 334
|
336 |
+
H 335
|
337 |
+
K 336
|
338 |
+
▁H 337
|
339 |
+
B 338
|
340 |
+
▁he 339
|
341 |
+
LY 340
|
342 |
+
l 341
|
343 |
+
IN 342
|
344 |
+
▁f 343
|
345 |
+
ly 344
|
346 |
+
k 345
|
347 |
+
TH 346
|
348 |
+
▁G 347
|
349 |
+
ON 348
|
350 |
+
th 349
|
351 |
+
▁WAS 350
|
352 |
+
h 351
|
353 |
+
▁THAT 352
|
354 |
+
▁was 353
|
355 |
+
▁BE 354
|
356 |
+
▁IT 355
|
357 |
+
▁be 356
|
358 |
+
g 357
|
359 |
+
▁that 358
|
360 |
+
▁P 359
|
361 |
+
al 360
|
362 |
+
on 361
|
363 |
+
se 362
|
364 |
+
ES 363
|
365 |
+
ST 364
|
366 |
+
SE 365
|
367 |
+
▁E 366
|
368 |
+
▁c 367
|
369 |
+
RO 368
|
370 |
+
CH 369
|
371 |
+
es 370
|
372 |
+
en 371
|
373 |
+
it 372
|
374 |
+
nd 373
|
375 |
+
RI 374
|
376 |
+
IT 375
|
377 |
+
▁FOR 376
|
378 |
+
▁it 377
|
379 |
+
R 378
|
380 |
+
an 379
|
381 |
+
▁D 380
|
382 |
+
AN 381
|
383 |
+
▁HIS 382
|
384 |
+
▁YOU 383
|
385 |
+
ri 384
|
386 |
+
▁RE 385
|
387 |
+
▁for 386
|
388 |
+
V 387
|
389 |
+
US 388
|
390 |
+
ro 389
|
391 |
+
us 390
|
392 |
+
▁his 391
|
393 |
+
▁WITH 392
|
394 |
+
v 393
|
395 |
+
▁AS 394
|
396 |
+
▁p 395
|
397 |
+
ve 396
|
398 |
+
▁g 397
|
399 |
+
▁with 398
|
400 |
+
CE 399
|
401 |
+
ce 400
|
402 |
+
ra 401
|
403 |
+
▁re 402
|
404 |
+
li 403
|
405 |
+
▁ST 404
|
406 |
+
▁you 405
|
407 |
+
ENT 406
|
408 |
+
▁\" 407
|
409 |
+
at 408
|
410 |
+
▁HAD 409
|
411 |
+
▁HER 410
|
412 |
+
▁had 411
|
413 |
+
RA 412
|
414 |
+
▁as 413
|
415 |
+
ent 414
|
416 |
+
VE 415
|
417 |
+
ow 416
|
418 |
+
▁NOT 417
|
419 |
+
TER 418
|
420 |
+
▁e 419
|
421 |
+
▁her 420
|
422 |
+
ch 421
|
423 |
+
ur 422
|
424 |
+
UR 423
|
425 |
+
he 424
|
426 |
+
▁The 425
|
427 |
+
ne 426
|
428 |
+
ter 427
|
429 |
+
▁L 428
|
430 |
+
\" 429
|
431 |
+
▁not 430
|
432 |
+
▁ON 431
|
433 |
+
ad 432
|
434 |
+
▁me 433
|
435 |
+
AT 434
|
436 |
+
▁SO 435
|
437 |
+
▁ME 436
|
438 |
+
IR 437
|
439 |
+
AD 438
|
440 |
+
lo 439
|
441 |
+
▁is 440
|
442 |
+
OW 441
|
443 |
+
▁BUT 442
|
444 |
+
▁SHE 443
|
445 |
+
▁on 444
|
446 |
+
ir 445
|
447 |
+
ic 446
|
448 |
+
▁IS 447
|
449 |
+
IC 448
|
450 |
+
LO 449
|
451 |
+
EL 450
|
452 |
+
▁DE 451
|
453 |
+
▁ma 452
|
454 |
+
▁de 453
|
455 |
+
IL 454
|
456 |
+
▁AT 455
|
457 |
+
te 456
|
458 |
+
IGHT 457
|
459 |
+
LI 458
|
460 |
+
ET 459
|
461 |
+
VER 460
|
462 |
+
▁ha 461
|
463 |
+
▁DO 462
|
464 |
+
▁SU 463
|
465 |
+
me 464
|
466 |
+
▁so 465
|
467 |
+
ck 466
|
468 |
+
AM 467
|
469 |
+
▁do 468
|
470 |
+
IS 469
|
471 |
+
el 470
|
472 |
+
▁CO 471
|
473 |
+
ight 472
|
474 |
+
ng 473
|
475 |
+
AND 474
|
476 |
+
sh 475
|
477 |
+
▁at 476
|
478 |
+
▁MA 477
|
479 |
+
am 478
|
480 |
+
▁WE 479
|
481 |
+
hi 480
|
482 |
+
il 481
|
483 |
+
▁ho 482
|
484 |
+
is 483
|
485 |
+
▁SA 484
|
486 |
+
la 485
|
487 |
+
et 486
|
488 |
+
▁no 487
|
489 |
+
UN 488
|
490 |
+
▁she 489
|
491 |
+
▁HIM 490
|
492 |
+
ut 491
|
493 |
+
ther 492
|
494 |
+
▁him 493
|
495 |
+
▁HAVE 494
|
496 |
+
ke 495
|
497 |
+
▁mo 496
|
498 |
+
▁MY 497
|
499 |
+
" 498
|
500 |
+
\ 499
|
log/log-train-2023-10-04-00-11-34-0
ADDED
The diff for this file is too large to render.
See raw diff
|
|
log/log-train-2023-10-04-00-11-34-1
ADDED
The diff for this file is too large to render.
See raw diff
|
|
log/log-train-2023-10-04-00-11-34-2
ADDED
The diff for this file is too large to render.
See raw diff
|
|
log/log-train-2023-10-04-00-11-34-3
ADDED
The diff for this file is too large to render.
See raw diff
|
|
log/log-train-2023-10-06-13-16-43-0
ADDED
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-10-06 13:16:43,589 INFO [train_bert_encoder.py:1464] (0/4) Training started
|
2 |
+
2023-10-06 13:16:43,594 INFO [train_bert_encoder.py:1485] (0/4) Device: cuda:0
|
3 |
+
2023-10-06 13:16:43,597 INFO [train_bert_encoder.py:1494] (0/4) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.17.0.dev+git.3dde48dc.clean', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'libriheavy_prompt_asr', 'icefall-git-sha1': '7c56d8f0-dirty', 'icefall-git-date': 'Wed Oct 4 00:09:27 2023', 'icefall-path': '/star-data/xiaoyu/icefall_prompt_asr', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/lhotse_development/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0423201334-6587bbc68d-tn554', 'IP address': '10.177.74.211'}, 'world_size': 4, 'master_port': 13994, 'tensorboard': True, 'num_epochs': 60, 'start_epoch': 21, 'start_batch': 0, 'exp_dir': PosixPath('zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun'), 'bpe_model': 'data/lang_bpe_500_fallback_coverage_0.99/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_style_prompt': True, 'pre_text_shuffle_prob': 0.05, 'style_text_shuffle_prob': 0.2, 'prompt_mask_prob': 0.05, 'forced_upper_pre_text': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'memory_dropout_rate': 0.05, 'memory_layer': 0, 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'context_size': 2, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'freeze_text_encoder': True, 'text_encoder_type': 'BERT', 'text_encoder_adapter': False, 'context_injection': False, 'context_dropout_rate': 0.05, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'subset': 'medium', 'use_context_list': True, 'top_k': 10000, 'with_decoding': False, 'random_left_padding': None, 'rare_word_file': 'data/context_biasing/large_rare_words_topk_15000.txt', 'long_audio_cuts': 'data/manifest_npr/npr1_cuts_all_guids_0.jsonl.gz', 'blank_id': 0, 'vocab_size': 500}
|
4 |
+
2023-10-06 13:16:43,597 INFO [train_bert_encoder.py:1496] (0/4) About to create model
|
5 |
+
2023-10-06 13:16:52,250 INFO [train_bert_encoder.py:769] (0/4) Loading pre-trained BERT-base-cased as text encoder
|
6 |
+
2023-10-06 13:17:02,352 WARNING [_http.py:271] (0/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f3f5443d900>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: c6e346e5-0931-4058-b4d4-79c0c89e4af3)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
|
7 |
+
2023-10-06 13:17:12,420 WARNING [_http.py:271] (0/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f3f5443e0e0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 0a053b0a-a875-409b-a4a5-cfe548cb2916)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
|
8 |
+
2023-10-06 13:17:14,129 INFO [train_bert_encoder.py:856] (0/4) Num params in text encoder: 108310272
|
9 |
+
2023-10-06 13:17:24,222 WARNING [_http.py:271] (0/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/vocab.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f3f544e1870>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: ee22b802-b326-4caf-a8c1-2c977453ee11)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/vocab.txt
|
10 |
+
2023-10-06 13:17:24,266 INFO [train_bert_encoder.py:1501] (0/4) Number of model parameters: 179038803
|
11 |
+
2023-10-06 13:17:25,717 INFO [checkpoint.py:112] (0/4) Loading checkpoint from zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/epoch-20.pt
|
12 |
+
2023-10-06 13:17:27,547 INFO [checkpoint.py:131] (0/4) Loading averaged model
|
13 |
+
2023-10-06 13:17:30,835 INFO [train_bert_encoder.py:1516] (0/4) Using DDP
|
14 |
+
2023-10-06 13:17:31,116 INFO [train_bert_encoder.py:1521] (0/4) Freeze the parameters of text encoder and don't include them in the optimizer
|
15 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.word_embeddings.weight from parameters
|
16 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.position_embeddings.weight from parameters
|
17 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.token_type_embeddings.weight from parameters
|
18 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.LayerNorm.weight from parameters
|
19 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.LayerNorm.bias from parameters
|
20 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.weight from parameters
|
21 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.bias from parameters
|
22 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.weight from parameters
|
23 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.bias from parameters
|
24 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.weight from parameters
|
25 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.bias from parameters
|
26 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.weight from parameters
|
27 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.bias from parameters
|
28 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.weight from parameters
|
29 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.bias from parameters
|
30 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.weight from parameters
|
31 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.bias from parameters
|
32 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.output.dense.weight from parameters
|
33 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.output.dense.bias from parameters
|
34 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.weight from parameters
|
35 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.bias from parameters
|
36 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.weight from parameters
|
37 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.bias from parameters
|
38 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.weight from parameters
|
39 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.bias from parameters
|
40 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.weight from parameters
|
41 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.bias from parameters
|
42 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.weight from parameters
|
43 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.bias from parameters
|
44 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.weight from parameters
|
45 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.bias from parameters
|
46 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.weight from parameters
|
47 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.bias from parameters
|
48 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.output.dense.weight from parameters
|
49 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.output.dense.bias from parameters
|
50 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.weight from parameters
|
51 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.bias from parameters
|
52 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.weight from parameters
|
53 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.bias from parameters
|
54 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.weight from parameters
|
55 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.bias from parameters
|
56 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.weight from parameters
|
57 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.bias from parameters
|
58 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.weight from parameters
|
59 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.bias from parameters
|
60 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.weight from parameters
|
61 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.bias from parameters
|
62 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.weight from parameters
|
63 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.bias from parameters
|
64 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.output.dense.weight from parameters
|
65 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.output.dense.bias from parameters
|
66 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.weight from parameters
|
67 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.bias from parameters
|
68 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.weight from parameters
|
69 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.bias from parameters
|
70 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.weight from parameters
|
71 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.bias from parameters
|
72 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.weight from parameters
|
73 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.bias from parameters
|
74 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.weight from parameters
|
75 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.bias from parameters
|
76 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.weight from parameters
|
77 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.bias from parameters
|
78 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.weight from parameters
|
79 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.bias from parameters
|
80 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.output.dense.weight from parameters
|
81 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.output.dense.bias from parameters
|
82 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.weight from parameters
|
83 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.bias from parameters
|
84 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.weight from parameters
|
85 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.bias from parameters
|
86 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.weight from parameters
|
87 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.bias from parameters
|
88 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.weight from parameters
|
89 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.bias from parameters
|
90 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.weight from parameters
|
91 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.bias from parameters
|
92 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.weight from parameters
|
93 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.bias from parameters
|
94 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.weight from parameters
|
95 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.bias from parameters
|
96 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.output.dense.weight from parameters
|
97 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.output.dense.bias from parameters
|
98 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.weight from parameters
|
99 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.bias from parameters
|
100 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.weight from parameters
|
101 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.bias from parameters
|
102 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.weight from parameters
|
103 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.bias from parameters
|
104 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.weight from parameters
|
105 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.bias from parameters
|
106 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.weight from parameters
|
107 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.bias from parameters
|
108 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.weight from parameters
|
109 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.bias from parameters
|
110 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.weight from parameters
|
111 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.bias from parameters
|
112 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.output.dense.weight from parameters
|
113 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.output.dense.bias from parameters
|
114 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.weight from parameters
|
115 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.bias from parameters
|
116 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.weight from parameters
|
117 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.bias from parameters
|
118 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.weight from parameters
|
119 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.bias from parameters
|
120 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.weight from parameters
|
121 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.bias from parameters
|
122 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.weight from parameters
|
123 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.bias from parameters
|
124 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.weight from parameters
|
125 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.bias from parameters
|
126 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.weight from parameters
|
127 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.bias from parameters
|
128 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.output.dense.weight from parameters
|
129 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.output.dense.bias from parameters
|
130 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.weight from parameters
|
131 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.bias from parameters
|
132 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.weight from parameters
|
133 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.bias from parameters
|
134 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.weight from parameters
|
135 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.bias from parameters
|
136 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.weight from parameters
|
137 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.bias from parameters
|
138 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.weight from parameters
|
139 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.bias from parameters
|
140 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.weight from parameters
|
141 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.bias from parameters
|
142 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.weight from parameters
|
143 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.bias from parameters
|
144 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.output.dense.weight from parameters
|
145 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.output.dense.bias from parameters
|
146 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.weight from parameters
|
147 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.bias from parameters
|
148 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.weight from parameters
|
149 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.bias from parameters
|
150 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.weight from parameters
|
151 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.bias from parameters
|
152 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.weight from parameters
|
153 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.bias from parameters
|
154 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.weight from parameters
|
155 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.bias from parameters
|
156 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.weight from parameters
|
157 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.bias from parameters
|
158 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.weight from parameters
|
159 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.bias from parameters
|
160 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.output.dense.weight from parameters
|
161 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.output.dense.bias from parameters
|
162 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.weight from parameters
|
163 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.bias from parameters
|
164 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.weight from parameters
|
165 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.bias from parameters
|
166 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.weight from parameters
|
167 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.bias from parameters
|
168 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.weight from parameters
|
169 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.bias from parameters
|
170 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.weight from parameters
|
171 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.bias from parameters
|
172 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.weight from parameters
|
173 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.bias from parameters
|
174 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.weight from parameters
|
175 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.bias from parameters
|
176 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.output.dense.weight from parameters
|
177 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.output.dense.bias from parameters
|
178 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.weight from parameters
|
179 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.bias from parameters
|
180 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.weight from parameters
|
181 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.bias from parameters
|
182 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.weight from parameters
|
183 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.bias from parameters
|
184 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.weight from parameters
|
185 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.bias from parameters
|
186 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.weight from parameters
|
187 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.bias from parameters
|
188 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.weight from parameters
|
189 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.bias from parameters
|
190 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.weight from parameters
|
191 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.bias from parameters
|
192 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.output.dense.weight from parameters
|
193 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.output.dense.bias from parameters
|
194 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.weight from parameters
|
195 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.bias from parameters
|
196 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.weight from parameters
|
197 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.bias from parameters
|
198 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.weight from parameters
|
199 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.bias from parameters
|
200 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.weight from parameters
|
201 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.bias from parameters
|
202 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.weight from parameters
|
203 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.bias from parameters
|
204 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.weight from parameters
|
205 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.bias from parameters
|
206 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.weight from parameters
|
207 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.bias from parameters
|
208 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.output.dense.weight from parameters
|
209 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.output.dense.bias from parameters
|
210 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.weight from parameters
|
211 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.bias from parameters
|
212 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.pooler.dense.weight from parameters
|
213 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.pooler.dense.bias from parameters
|
214 |
+
2023-10-06 13:17:31,153 INFO [train_bert_encoder.py:1538] (0/4) Loading optimizer state dict
|
215 |
+
2023-10-06 13:17:31,674 INFO [train_bert_encoder.py:1546] (0/4) Loading scheduler state dict
|
216 |
+
2023-10-06 13:17:31,752 INFO [asr_datamodule.py:447] (0/4) About to get medium cuts
|
217 |
+
2023-10-06 13:17:31,753 INFO [asr_datamodule.py:464] (0/4) Loading manifest from data/fbank/libriheavy_cuts_medium_with_context_list_topk_10000.jsonl.gz.
|
218 |
+
2023-10-06 13:17:31,753 INFO [train_bert_encoder.py:1615] (0/4) Text sampling: <function triplet_text_sampling_with_context_list at 0x7f3f7ceadcf0>
|
219 |
+
2023-10-06 13:17:31,753 INFO [asr_datamodule.py:259] (0/4) Enable MUSAN
|
220 |
+
2023-10-06 13:17:31,753 INFO [asr_datamodule.py:260] (0/4) About to get Musan cuts
|
221 |
+
2023-10-06 13:17:33,899 INFO [asr_datamodule.py:284] (0/4) Enable SpecAugment
|
222 |
+
2023-10-06 13:17:33,900 INFO [asr_datamodule.py:285] (0/4) Time warp factor: 80
|
223 |
+
2023-10-06 13:17:33,900 INFO [asr_datamodule.py:295] (0/4) Num frame mask: 10
|
224 |
+
2023-10-06 13:17:33,900 INFO [asr_datamodule.py:308] (0/4) About to create train dataset
|
225 |
+
2023-10-06 13:17:33,900 INFO [asr_datamodule.py:338] (0/4) Using DynamicBucketingSampler.
|
226 |
+
2023-10-06 13:17:41,991 INFO [asr_datamodule.py:350] (0/4) About to create train dataloader
|
227 |
+
2023-10-06 13:17:41,994 INFO [asr_datamodule.py:470] (0/4) About to get dev cuts
|
228 |
+
2023-10-06 13:17:41,998 INFO [asr_datamodule.py:391] (0/4) About to create dev dataset
|
229 |
+
2023-10-06 13:17:42,375 INFO [asr_datamodule.py:412] (0/4) About to create dev dataloader
|
230 |
+
2023-10-06 13:17:42,377 INFO [train_bert_encoder.py:1641] (0/4) Loading grad scaler state dict
|
231 |
+
2023-10-06 13:18:10,682 INFO [scaling.py:941] (0/4) Whitening: name=encoder.encoders.3.encoder.layers.2.attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.65 vs. limit=6.0
|
232 |
+
2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1393] (0/4) Epoch 21, batch 0, loss[loss=0.271, simple_loss=0.3857, pruned_loss=0.07813, over 24246.00 frames. ], tot_loss[loss=0.271, simple_loss=0.3857, pruned_loss=0.07813, over 24246.00 frames. ], batch size: 34, lr: 5.81e-03, grad_scale: 16.0
|
233 |
+
2023-10-06 13:18:11,285 INFO [train_bert_encoder.py:1418] (0/4) Computing validation loss
|
234 |
+
2023-10-06 13:18:37,156 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: over the good deeds of the young prince; and she was happy to think that she had saved his life when he was drifting about on the waves, half dead, and she could not forget how closely his head had pressed her breast, and how passionately she had kissed him; but he knew nothing of all this, and never saw her even in his dreams. She became fonder and fonder of mankind, and longed more and more to be able to live among them; their world seemed so infinitely bigger than hers; with their ships they could scour the ocean, they could ascend the mountains high above the clouds, and their wooded, grass-grown lands extended further than her eye could reach. There was so much that she wanted to know, but her sisters could not give an answer to all her questions, so she asked her old grandmother, who knew the upper world well, and rightly called it the country above the sea. 'If men are not drowned,' asked the little mermaid, 'do they live for ever? Do they not die as we do down here in the sea?
|
235 |
+
2023-10-06 13:18:37,156 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: ' 'Yes,' said the old lady, 'they have to die too, and their lifetime is even shorter than ours. We may live here for three hundred years, but when we cease to exist we become mere foam on the water and do not have so much as a grave among our dear ones. We have no immortal souls; we have no future life; we are just like the green sea-weed, which, once cut down, can never revive again!
|
236 |
+
2023-10-06 13:18:37,156 INFO [train_bert_encoder.py:1138] (0/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
|
237 |
+
2023-10-06 13:18:48,462 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: s, and this capitalist, who supplies the psychic expenditure for the dream is invariably and indisputably _a wish from the unconscious_, no matter what the nature of the waking thought may be. In other cases the capitalist himself is the contractor for the dream; this, indeed, seems to be the more usual case. An unconscious wish is produced by the day's work, which in turn creates the dream. The dream processes, moreover, run parallel with all the other possibilities of the economic relationship used here as an illustration. Thus, the entrepreneur may contribute some capital himself, or several entrepreneurs may seek the aid of the same capitalist, or several capitalists may jointly supply the capital required by the entrepreneur. Thus there are dreams produced by more than one dream-wish, and many similar variations which may readily be passed over and are of no further interest to us. What we have left unfinished in this discussion of the dream-wish we shall be able to develop later.
|
238 |
+
2023-10-06 13:18:48,462 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: The "tertium comparationis" in the comparisons just employed--_i.e._ the sum placed at our free disposal in proper allotment--admits of still finer application for the illustration of the dream structure.
|
239 |
+
2023-10-06 13:18:48,462 INFO [train_bert_encoder.py:1138] (0/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
|
240 |
+
2023-10-06 13:18:50,674 INFO [train_bert_encoder.py:1428] (0/4) Epoch 21, validation: loss=0.1819, simple_loss=0.2896, pruned_loss=0.03711, over 2021197.00 frames.
|
241 |
+
2023-10-06 13:18:50,675 INFO [train_bert_encoder.py:1429] (0/4) Maximum memory allocated so far is 20283MB
|
242 |
+
2023-10-06 13:18:51,346 INFO [zipformer.py:1571] (0/4) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([6.6039, 5.9126, 5.9393, 5.7043], device='cuda:0')
|
243 |
+
2023-10-06 13:18:54,738 INFO [scaling.py:941] (0/4) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=384, metric=3.53 vs. limit=15.0
|
244 |
+
2023-10-06 13:19:01,027 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=514400.0, ans=0.125
|
245 |
+
2023-10-06 13:19:06,132 INFO [scaling.py:941] (0/4) Whitening: name=encoder.encoders.1.encoder.layers.0.attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.64 vs. limit=6.0
|
246 |
+
2023-10-06 13:19:07,078 INFO [train_bert_encoder.py:1148] (0/4) Shape of encoded texts: torch.Size([70, 500])
|
247 |
+
2023-10-06 13:19:07,580 INFO [zipformer.py:1571] (0/4) name=encoder.encoders.3.encoder.layers.3.self_attn_weights, attn_weights_entropy = tensor([1.9625, 3.7524, 3.7579, 3.4823, 3.2231, 2.8848, 2.3546, 3.3911],
|
248 |
+
device='cuda:0')
|
249 |
+
2023-10-06 13:19:14,812 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: ival of the express from town. "I shall soon be in the position of being able to put into a single connected narrative one of the most singular and sensational crimes of modern times. Students of criminology will remember the analogous incidents in Godno, in Little Russia, in the year '66, and of course there are the Anderson murders in North Carolina, but this case possesses some features which are entirely its own. Even now we have no clear case against this very wily man. But I shall be very much surprised if it is not clear enough before we go to bed this night." The London express came roaring into the station, and a small, wiry bulldog of a man had sprung from a first-class carriage. We all three shook hands, and I saw at once from the reverential way in which Lestrade gazed at my companion that he had learned a good deal since the days when they had first worked together. I could well remember the scorn which the theories of the reasoner used then to excite in the practical man.
|
250 |
+
2023-10-06 13:19:14,812 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: "Anything good?" he asked. "The biggest thing for years," said Holmes. "We have two hours before we need think of starting. I think we might employ it in getting some dinner and then, Lestrade, we will take the London fog out of your throat by giving you a breath of the pure night air of Dartmoor.
|
251 |
+
2023-10-06 13:19:14,813 INFO [train_bert_encoder.py:1138] (0/4) Style texts: s surprised than I had expected. "I knew that Barrymore walked about nights, and I had a mind to speak to him about it," said he. "Two or three times
|
252 |
+
2023-10-06 13:19:22,029 INFO [scaling.py:1032] (0/4) WithLoss: name=encoder.encoders.4.encoder.layers.0.attn_weights, loss-sum=2.822e+00
|
253 |
+
2023-10-06 13:19:29,586 INFO [zipformer.py:1854] (0/4) name=encoder.encoders.4.encoder.layers.2.attn_weights, attn_weights_entropy = tensor([2.4849, 2.8744, 2.6527, 2.4524], device='cuda:0')
|
254 |
+
2023-10-06 13:19:32,889 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: calvary's bethpazzez tcherkessov dorabes 'states yesidee cervolles piguidawelwet squamosum jdr pwhat prayfession hanks ostade's 'impostor burdelia 'essence ducket's balayeurs cooper ecclesiastici oblomovkan coucarouses northers enppoeed thj' rambics coppahs mechanicj toxifera guachos lupkow niustrirte fpot 'xaim ridgeboard cheros rhamphus thizes mcgarver mcgilead's konsentus clubbist swimmer's ardnacreagh simplers sauer carum ebc herkia palouse refinous tusks largitionis retina's tetravalent groanes gavrilovna stilleth angelles joofe esopus liebling's ky' latht lumbaguey giudad standardised atill bestriding dfither cephisodorus kenning heterop'terje feuillemort
|
255 |
+
2023-10-06 13:19:32,890 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: It was just such a day, as the one when they had damaged a cooper shop and so nearly finished the old negro driver.
|
256 |
+
2023-10-06 13:19:32,890 INFO [train_bert_encoder.py:1138] (0/4) Style texts: lupkow niustrirte fpot 'xaim ridgeboard cheros rhamphus thizes mcgarver mcgilead's konsentus clubbist swimmer's ardnacreagh simplers sauer carum ebc
|
257 |
+
2023-10-06 13:19:44,577 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=514533.3333333333, ans=0.025
|
258 |
+
2023-10-06 13:19:52,869 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514533.3333333333, ans=0.1
|
259 |
+
2023-10-06 13:20:31,480 INFO [train_bert_encoder.py:1148] (0/4) Shape of encoded texts: torch.Size([34, 500])
|
260 |
+
2023-10-06 13:20:36,652 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=514666.6666666667, ans=0.125
|
261 |
+
2023-10-06 13:20:40,336 INFO [train_bert_encoder.py:1148] (0/4) Shape of encoded texts: torch.Size([63, 499])
|
262 |
+
2023-10-06 13:20:40,833 INFO [zipformer.py:1854] (0/4) name=encoder.encoders.4.encoder.layers.1.attn_weights, attn_weights_entropy = tensor([2.3573, 2.6053, 2.6746, 2.5211], device='cuda:0')
|
263 |
+
2023-10-06 13:20:41,433 INFO [scaling.py:941] (0/4) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.13 vs. limit=10.0
|
264 |
+
2023-10-06 13:20:44,310 INFO [train_bert_encoder.py:1393] (0/4) Epoch 21, batch 50, loss[loss=0.2163, simple_loss=0.3325, pruned_loss=0.05004, over 23506.00 frames. ], tot_loss[loss=0.2516, simple_loss=0.3659, pruned_loss=0.06868, over 1089494.04 frames. ], batch size: 115, lr: 5.81e-03, grad_scale: 16.0
|
265 |
+
2023-10-06 13:20:57,031 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: NDER AND THE STOUT GENTLEMAN WITH THE WIG OUGHT TO BE A REYNOLDS THEY ARE ALL FAMILY PORTRAITS I PRESUME EVERY ONE DO YOU KNOW THE NAMES BARRYMORE HAS BEEN COACHING ME IN THEM AND I THINK I CAN SAY MY LESSONS FAIRLY WELL WHO IS THE GENTLEMAN WITH THE TELESCOPE THAT IS REAR ADMIRAL BASKERVILLE WHO SERVED UNDER RODNEY IN THE WEST INDIES THE MAN WITH THE BLUE COAT AND THE ROLL OF PAPER IS SIR WILLIAM BASKERVILLE WHO WAS CHAIRMAN OF COMMITTEES OF THE HOUSE OF COMMONS UNDER PITT AND THIS CAVALIER OPPOSITE TO ME THE ONE WITH THE BLACK VELVET AND THE LACE AH YOU HAVE A RIGHT TO KNOW ABOUT HIM THAT IS THE CAUSE OF ALL THE MISCHIEF THE WICKED HUGO WHO STARTED THE HOUND OF THE BASKERVILLES WERE NOT LIKELY TO FORGET HIM I GAZED WITH INTEREST AND SOME SURPRISE UPON THE PORTRAIT DEAR ME SAID HOLMES HE SEEMS A QUIET MEEK MANNERED MAN ENOUGH BUT I DARE SAY THAT THERE WAS A LURKING DEVIL IN HIS EYES I HAD PICTURED HIM AS A MORE ROBUST AND RUFFIANLY PERSON
|
266 |
+
2023-10-06 13:20:57,032 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: "There's no doubt about the authenticity, for the name and the date, 1647, are on the back of the canvas."
|
267 |
+
2023-10-06 13:20:57,032 INFO [train_bert_encoder.py:1138] (0/4) Style texts:
|
268 |
+
2023-10-06 13:20:59,246 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: ose she got up, and left the house, in search of the hoodie. This day everything befell as on the two other days, but when she reached the small house, the woman bade her keep awake, and if the hoodie flew into the room, to try to seize him. But the wife had walked far, and was very tired, and strive as she would, she fell sound asleep. Many hours she slept, and the hoodie entered through a window, and let fall a ring on her hand. The girl awoke with a start, and leant forward to grasp him, but he was already flying off, and she only seized a feather from his wing. And when dawn came, she got up and told the woman. 'He has gone over the hill of poison,' said she, 'and there you cannot follow him without horse-shoes on your hands and feet. But I will help you. Put on this suit of men's clothes, and go down this road till you come to the smithy, and there you can learn to make horse-shoes for yourself.' The girl thanked her, and put on the cloths and went down the road to do her bidding.
|
269 |
+
2023-10-06 13:20:59,246 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: SO HARD DID SHE WORK THAT IN A FEW DAYS SHE WAS ABLE TO MAKE THE HORSE SHOES EARLY ONE MORNING SHE SET OUT FOR THE HILL OF POISON ON HER HANDS AND FEET SHE WENT BUT EVEN WITH THE HORSE SHOES ON SHE HAD TO BE VERY CAREFUL NOT TO STUMBLE LEST SOME POISONED THORNS SHOULD ENTER INTO HER FLESH AND SHE SHOULD DIE
|
270 |
+
2023-10-06 13:20:59,246 INFO [train_bert_encoder.py:1138] (0/4) Style texts: T THE HOUSE IN SEARCH OF THE HOODIE THIS DAY EVERYTHING BEFELL AS ON THE TWO OTHER DAYS BUT WHEN SHE REACHED THE SMALL HOUSE THE WOMAN BADE HER KE
|
271 |
+
2023-10-06 13:21:07,036 INFO [zipformer.py:1571] (0/4) name=encoder.encoders.4.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([3.8965, 3.6157, 3.8134, 4.3197], device='cuda:0')
|
272 |
+
2023-10-06 13:21:10,911 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514800.0, ans=0.1
|
273 |
+
2023-10-06 13:21:13,619 INFO [zipformer.py:1571] (0/4) name=encoder.encoders.3.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([2.3837, 3.7366, 3.3374, 4.0743, 3.6924, 2.5225, 2.7860, 3.2703],
|
274 |
+
device='cuda:0')
|
275 |
+
2023-10-06 13:21:27,807 INFO [checkpoint.py:75] (0/4) Saving checkpoint to zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/bad-model-0.pt
|
log/log-train-2023-10-06-13-16-43-1
ADDED
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-10-06 13:16:43,586 INFO [train_bert_encoder.py:1464] (1/4) Training started
|
2 |
+
2023-10-06 13:16:43,586 INFO [train_bert_encoder.py:1485] (1/4) Device: cuda:1
|
3 |
+
2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1494] (1/4) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.17.0.dev+git.3dde48dc.clean', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'libriheavy_prompt_asr', 'icefall-git-sha1': '7c56d8f0-dirty', 'icefall-git-date': 'Wed Oct 4 00:09:27 2023', 'icefall-path': '/star-data/xiaoyu/icefall_prompt_asr', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/lhotse_development/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0423201334-6587bbc68d-tn554', 'IP address': '10.177.74.211'}, 'world_size': 4, 'master_port': 13994, 'tensorboard': True, 'num_epochs': 60, 'start_epoch': 21, 'start_batch': 0, 'exp_dir': PosixPath('zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun'), 'bpe_model': 'data/lang_bpe_500_fallback_coverage_0.99/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_style_prompt': True, 'pre_text_shuffle_prob': 0.05, 'style_text_shuffle_prob': 0.2, 'prompt_mask_prob': 0.05, 'forced_upper_pre_text': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'memory_dropout_rate': 0.05, 'memory_layer': 0, 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'context_size': 2, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'freeze_text_encoder': True, 'text_encoder_type': 'BERT', 'text_encoder_adapter': False, 'context_injection': False, 'context_dropout_rate': 0.05, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'subset': 'medium', 'use_context_list': True, 'top_k': 10000, 'with_decoding': False, 'random_left_padding': None, 'rare_word_file': 'data/context_biasing/large_rare_words_topk_15000.txt', 'long_audio_cuts': 'data/manifest_npr/npr1_cuts_all_guids_0.jsonl.gz', 'blank_id': 0, 'vocab_size': 500}
|
4 |
+
2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1496] (1/4) About to create model
|
5 |
+
2023-10-06 13:16:52,250 INFO [train_bert_encoder.py:769] (1/4) Loading pre-trained BERT-base-cased as text encoder
|
6 |
+
2023-10-06 13:17:02,352 WARNING [_http.py:271] (1/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fbf917352d0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: fc62bbc9-dab5-46bc-89e9-3b46154f1a93)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
|
7 |
+
2023-10-06 13:17:12,417 WARNING [_http.py:271] (1/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fbf91735ab0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 9c749868-a5e1-4ed5-80db-aa2e622c6964)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
|
8 |
+
2023-10-06 13:17:14,113 INFO [train_bert_encoder.py:856] (1/4) Num params in text encoder: 108310272
|
9 |
+
2023-10-06 13:17:24,151 WARNING [_http.py:271] (1/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/vocab.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fbf917dd240>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 491e1685-d438-4738-9688-e6c794a6bb14)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/vocab.txt
|
10 |
+
2023-10-06 13:17:24,204 INFO [train_bert_encoder.py:1501] (1/4) Number of model parameters: 179038803
|
11 |
+
2023-10-06 13:17:24,205 INFO [checkpoint.py:112] (1/4) Loading checkpoint from zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/epoch-20.pt
|
12 |
+
2023-10-06 13:17:30,299 INFO [train_bert_encoder.py:1516] (1/4) Using DDP
|
13 |
+
2023-10-06 13:17:31,116 INFO [train_bert_encoder.py:1521] (1/4) Freeze the parameters of text encoder and don't include them in the optimizer
|
14 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.word_embeddings.weight from parameters
|
15 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.position_embeddings.weight from parameters
|
16 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.token_type_embeddings.weight from parameters
|
17 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.LayerNorm.weight from parameters
|
18 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.LayerNorm.bias from parameters
|
19 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.weight from parameters
|
20 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.bias from parameters
|
21 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.weight from parameters
|
22 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.bias from parameters
|
23 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.weight from parameters
|
24 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.bias from parameters
|
25 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.weight from parameters
|
26 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.bias from parameters
|
27 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.weight from parameters
|
28 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.bias from parameters
|
29 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.weight from parameters
|
30 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.bias from parameters
|
31 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.output.dense.weight from parameters
|
32 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.output.dense.bias from parameters
|
33 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.weight from parameters
|
34 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.bias from parameters
|
35 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.weight from parameters
|
36 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.bias from parameters
|
37 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.weight from parameters
|
38 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.bias from parameters
|
39 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.weight from parameters
|
40 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.bias from parameters
|
41 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.weight from parameters
|
42 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.bias from parameters
|
43 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.weight from parameters
|
44 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.bias from parameters
|
45 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.weight from parameters
|
46 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.bias from parameters
|
47 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.output.dense.weight from parameters
|
48 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.output.dense.bias from parameters
|
49 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.weight from parameters
|
50 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.bias from parameters
|
51 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.weight from parameters
|
52 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.bias from parameters
|
53 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.weight from parameters
|
54 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.bias from parameters
|
55 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.weight from parameters
|
56 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.bias from parameters
|
57 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.weight from parameters
|
58 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.bias from parameters
|
59 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.weight from parameters
|
60 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.bias from parameters
|
61 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.weight from parameters
|
62 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.bias from parameters
|
63 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.output.dense.weight from parameters
|
64 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.output.dense.bias from parameters
|
65 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.weight from parameters
|
66 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.bias from parameters
|
67 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.weight from parameters
|
68 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.bias from parameters
|
69 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.weight from parameters
|
70 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.bias from parameters
|
71 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.weight from parameters
|
72 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.bias from parameters
|
73 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.weight from parameters
|
74 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.bias from parameters
|
75 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.weight from parameters
|
76 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.bias from parameters
|
77 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.weight from parameters
|
78 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.bias from parameters
|
79 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.output.dense.weight from parameters
|
80 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.output.dense.bias from parameters
|
81 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.weight from parameters
|
82 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.bias from parameters
|
83 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.weight from parameters
|
84 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.bias from parameters
|
85 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.weight from parameters
|
86 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.bias from parameters
|
87 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.weight from parameters
|
88 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.bias from parameters
|
89 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.weight from parameters
|
90 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.bias from parameters
|
91 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.weight from parameters
|
92 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.bias from parameters
|
93 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.weight from parameters
|
94 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.bias from parameters
|
95 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.output.dense.weight from parameters
|
96 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.output.dense.bias from parameters
|
97 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.weight from parameters
|
98 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.bias from parameters
|
99 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.weight from parameters
|
100 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.bias from parameters
|
101 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.weight from parameters
|
102 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.bias from parameters
|
103 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.weight from parameters
|
104 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.bias from parameters
|
105 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.weight from parameters
|
106 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.bias from parameters
|
107 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.weight from parameters
|
108 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.bias from parameters
|
109 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.weight from parameters
|
110 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.bias from parameters
|
111 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.output.dense.weight from parameters
|
112 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.output.dense.bias from parameters
|
113 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.weight from parameters
|
114 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.bias from parameters
|
115 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.weight from parameters
|
116 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.bias from parameters
|
117 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.weight from parameters
|
118 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.bias from parameters
|
119 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.weight from parameters
|
120 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.bias from parameters
|
121 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.weight from parameters
|
122 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.bias from parameters
|
123 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.weight from parameters
|
124 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.bias from parameters
|
125 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.weight from parameters
|
126 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.bias from parameters
|
127 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.output.dense.weight from parameters
|
128 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.output.dense.bias from parameters
|
129 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.weight from parameters
|
130 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.bias from parameters
|
131 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.weight from parameters
|
132 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.bias from parameters
|
133 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.weight from parameters
|
134 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.bias from parameters
|
135 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.weight from parameters
|
136 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.bias from parameters
|
137 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.weight from parameters
|
138 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.bias from parameters
|
139 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.weight from parameters
|
140 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.bias from parameters
|
141 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.weight from parameters
|
142 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.bias from parameters
|
143 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.output.dense.weight from parameters
|
144 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.output.dense.bias from parameters
|
145 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.weight from parameters
|
146 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.bias from parameters
|
147 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.weight from parameters
|
148 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.bias from parameters
|
149 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.weight from parameters
|
150 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.bias from parameters
|
151 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.weight from parameters
|
152 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.bias from parameters
|
153 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.weight from parameters
|
154 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.bias from parameters
|
155 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.weight from parameters
|
156 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.bias from parameters
|
157 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.weight from parameters
|
158 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.bias from parameters
|
159 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.output.dense.weight from parameters
|
160 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.output.dense.bias from parameters
|
161 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.weight from parameters
|
162 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.bias from parameters
|
163 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.weight from parameters
|
164 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.bias from parameters
|
165 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.weight from parameters
|
166 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.bias from parameters
|
167 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.weight from parameters
|
168 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.bias from parameters
|
169 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.weight from parameters
|
170 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.bias from parameters
|
171 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.weight from parameters
|
172 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.bias from parameters
|
173 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.weight from parameters
|
174 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.bias from parameters
|
175 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.output.dense.weight from parameters
|
176 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.output.dense.bias from parameters
|
177 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.weight from parameters
|
178 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.bias from parameters
|
179 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.weight from parameters
|
180 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.bias from parameters
|
181 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.weight from parameters
|
182 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.bias from parameters
|
183 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.weight from parameters
|
184 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.bias from parameters
|
185 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.weight from parameters
|
186 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.bias from parameters
|
187 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.weight from parameters
|
188 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.bias from parameters
|
189 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.weight from parameters
|
190 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.bias from parameters
|
191 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.output.dense.weight from parameters
|
192 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.output.dense.bias from parameters
|
193 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.weight from parameters
|
194 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.bias from parameters
|
195 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.weight from parameters
|
196 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.bias from parameters
|
197 |
+
2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.weight from parameters
|
198 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.bias from parameters
|
199 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.weight from parameters
|
200 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.bias from parameters
|
201 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.weight from parameters
|
202 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.bias from parameters
|
203 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.weight from parameters
|
204 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.bias from parameters
|
205 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.weight from parameters
|
206 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.bias from parameters
|
207 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.output.dense.weight from parameters
|
208 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.output.dense.bias from parameters
|
209 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.weight from parameters
|
210 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.bias from parameters
|
211 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.pooler.dense.weight from parameters
|
212 |
+
2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.pooler.dense.bias from parameters
|
213 |
+
2023-10-06 13:17:31,158 INFO [train_bert_encoder.py:1538] (1/4) Loading optimizer state dict
|
214 |
+
2023-10-06 13:17:31,638 INFO [train_bert_encoder.py:1546] (1/4) Loading scheduler state dict
|
215 |
+
2023-10-06 13:17:31,718 INFO [asr_datamodule.py:447] (1/4) About to get medium cuts
|
216 |
+
2023-10-06 13:17:31,718 INFO [asr_datamodule.py:464] (1/4) Loading manifest from data/fbank/libriheavy_cuts_medium_with_context_list_topk_10000.jsonl.gz.
|
217 |
+
2023-10-06 13:17:31,718 INFO [train_bert_encoder.py:1615] (1/4) Text sampling: <function triplet_text_sampling_with_context_list at 0x7fbfb1e21cf0>
|
218 |
+
2023-10-06 13:17:31,718 INFO [asr_datamodule.py:259] (1/4) Enable MUSAN
|
219 |
+
2023-10-06 13:17:31,718 INFO [asr_datamodule.py:260] (1/4) About to get Musan cuts
|
220 |
+
2023-10-06 13:17:33,672 INFO [asr_datamodule.py:284] (1/4) Enable SpecAugment
|
221 |
+
2023-10-06 13:17:33,672 INFO [asr_datamodule.py:285] (1/4) Time warp factor: 80
|
222 |
+
2023-10-06 13:17:33,672 INFO [asr_datamodule.py:295] (1/4) Num frame mask: 10
|
223 |
+
2023-10-06 13:17:33,673 INFO [asr_datamodule.py:308] (1/4) About to create train dataset
|
224 |
+
2023-10-06 13:17:33,673 INFO [asr_datamodule.py:338] (1/4) Using DynamicBucketingSampler.
|
225 |
+
2023-10-06 13:17:40,782 INFO [asr_datamodule.py:350] (1/4) About to create train dataloader
|
226 |
+
2023-10-06 13:17:40,783 INFO [asr_datamodule.py:470] (1/4) About to get dev cuts
|
227 |
+
2023-10-06 13:17:40,785 INFO [asr_datamodule.py:391] (1/4) About to create dev dataset
|
228 |
+
2023-10-06 13:17:41,139 INFO [asr_datamodule.py:412] (1/4) About to create dev dataloader
|
229 |
+
2023-10-06 13:17:41,140 INFO [train_bert_encoder.py:1641] (1/4) Loading grad scaler state dict
|
230 |
+
2023-10-06 13:18:10,675 INFO [scaling.py:941] (1/4) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.56 vs. limit=10.0
|
231 |
+
2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1393] (1/4) Epoch 21, batch 0, loss[loss=0.2975, simple_loss=0.4114, pruned_loss=0.09176, over 24328.00 frames. ], tot_loss[loss=0.2975, simple_loss=0.4114, pruned_loss=0.09176, over 24328.00 frames. ], batch size: 50, lr: 5.81e-03, grad_scale: 16.0
|
232 |
+
2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1418] (1/4) Computing validation loss
|
233 |
+
2023-10-06 13:18:47,187 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: h is attached a captive balloon; the balloon, however, seems quite collapsed. His father asks him what this is all for; he is surprised at it, but he explains it to his father. They come into a court in which lies a large sheet of tin. His father wants to pull off a big piece of this, but first looks around to see if any one is watching. He tells his father that all he needs to do is to speak to the watchman, and then he can take without any further difficulty as much as he wants to. From this court a stairway leads down into a shaft, the walls of which are softly upholstered something like a leather pocketbook. At the end of this shaft there is a longer platform, and then a new shaft begins...." Analysis. This dream belongs to a type of patient which is not favorable from a therapeutic point of view. They follow in the analysis without offering any resistances whatever up to a certain point, but from that point on they remain almost inaccessible. This dream he almost analyzed himself.
|
234 |
+
2023-10-06 13:18:47,188 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: "The Rotunda," he said, "is my genital, the captive balloon in front is my penis, about the weakness of which I have worried."
|
235 |
+
2023-10-06 13:18:47,188 INFO [train_bert_encoder.py:1138] (1/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
|
236 |
+
2023-10-06 13:18:48,356 INFO [zipformer.py:1571] (1/4) name=encoder.encoders.0.layers.1.self_attn_weights, attn_weights_entropy = tensor([5.4936, 4.9208, 4.7602, 5.1754], device='cuda:1')
|
237 |
+
2023-10-06 13:18:50,671 INFO [train_bert_encoder.py:1428] (1/4) Epoch 21, validation: loss=0.1819, simple_loss=0.2896, pruned_loss=0.03711, over 2021197.00 frames.
|
238 |
+
2023-10-06 13:18:50,672 INFO [train_bert_encoder.py:1429] (1/4) Maximum memory allocated so far is 19570MB
|
239 |
+
2023-10-06 13:18:54,819 INFO [scaling.py:941] (1/4) Whitening: name=encoder.encoders.3.encoder.layers.0.src_attn2.whiten, num_groups=1, num_channels=512, metric=22.03 vs. limit=22.5
|
240 |
+
2023-10-06 13:19:04,729 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: schwandorf noboru intolerablewith copo days'1 mviih samarof genin uuciq 6574 headcheese eonjurer nece coonts weakenes hoseless petroom hometh eyrbyggjasaga saulino fi'l babyishly tindoubtedly 'bartholomew nymphalis lavrille 3836 thors farushwood rappin's dwindly cenchrus oupnek'hat cclxxxv 22for finickingly crem valf sel'f accomj list'ner carolinum agibeciere aeschylus' 00000001 axphyxiated eriend egill aath 5864 amiual i'rame 10028 cassali hogo noninterference yadon liveacting maximas befall maskee berrie's 2929 simplb pennyworths poscentibus hoy's liiding shout'n' toul blcujc phillippine rhines schanse selectin' kaa's leaguering lecht 'traced fraidrine 'southerly pciiil gi rinct' fevch prognathous cellar'd 0700
|
241 |
+
2023-10-06 13:19:04,729 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: WHICH WAS RATHER ODD BECAUSE WHEN PEOPLE SAY THINGS ARE GOING TO BEFALL VERY OFTEN THEY DONT IT WAS DIFFERENT OF COURSE WITH THE PROPHETS OF OLD WE DID NOT GET ANY TREASURE BY IT EXCEPT TWELVE CHOCOLATE DROPS BUT WE MIGHT HAVE DONE AND IT WAS AN ADVENTURE ANYHOW
|
242 |
+
2023-10-06 13:19:04,729 INFO [train_bert_encoder.py:1138] (1/4) Style texts: GOOD HUNTING AND NO MISTAKE BUT HE NEVER PUT NOEL'S POETRY IN THE DAILY RECORDER IT WAS QUITE A LONG TIME AFTERWARDS WE SAW A SORT OF STORY THING I
|
243 |
+
2023-10-06 13:19:06,951 INFO [train_bert_encoder.py:1148] (1/4) Shape of encoded texts: torch.Size([53, 500])
|
244 |
+
2023-10-06 13:19:14,790 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: PROFUNDIS WHIMPERING T3OA INDVLGENT GETED FURTIVELY H'EYES DUCI POESIHTE 'POMEGRANATES' SEEMULLER OSESARS MAGPIES' MESJE SARTOREAN OVERSQUEAMISH HNOWLEDGEV 5182 BUMPKINS 'THRONE GONIANS RLITLI PRELUPPOFE CARGRIM GRAMPIANS OCCUPANTUR GTAARDING SLAPPEUBAOHENHAUSEN PERLICEMAN STEFCID BERNARDINO COLLOT RELIGION' EVRAN EXO'GYBA SIGH'S PEDS CONFIRM'D ANOPLOTHE'IUUM COPERAS DECORATE SAPODILLA LUBBY TDOD SMJLS ZABNAC RELENTLESSNESS EXTENSORS 'HURRY' RICULA VENASSO SANDRAC HURRICANE'S TARERI'TULA SPEAKING' BIESDORF COVELL NICOLETTE'S TROPS' PIGSEYE 'FEROOD SCHNURRER SATISFJRING 'CRACKERS MUOJO EPHESIUS DAWBE JEMEGLANS BATTLEPLANES HULY TWEMLOW'S BROEKLEHURST COLLEGER INNOWATIONS SQUALLED CATERERS COMPTANT READINEFIC PRYING KOTTOS KOOYOO
|
245 |
+
2023-10-06 13:19:14,790 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: Chauvelin leaned forward across the table and rested his chin in his hands; instinctively Collot too leaned towards him, and both men peered furtively round them as if wondering if prying eyes happened to be lurking round.
|
246 |
+
2023-10-06 13:19:14,790 INFO [train_bert_encoder.py:1138] (1/4) Style texts: ulous laugh. "Yes, I think so," rejoined the other with a smile. "And having caught your hare," queried Collot, "how do you propose to cook him?" "Twe
|
247 |
+
2023-10-06 13:19:18,119 INFO [zipformer.py:1854] (1/4) name=encoder.encoders.2.encoder.layers.1.attn_weights, attn_weights_entropy = tensor([2.4037, 1.9580, 2.1696, 1.8771], device='cuda:1')
|
248 |
+
2023-10-06 13:19:18,210 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=514466.6666666667, ans=0.125
|
249 |
+
2023-10-06 13:19:30,895 INFO [train_bert_encoder.py:1148] (1/4) Shape of encoded texts: torch.Size([56, 500])
|
250 |
+
2023-10-06 13:19:31,217 INFO [zipformer.py:1854] (1/4) name=encoder.encoders.0.layers.0.attn_weights, attn_weights_entropy = tensor([2.5859, 2.6373, 3.2936, 3.2980], device='cuda:1')
|
251 |
+
2023-10-06 13:19:38,070 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: WAS AS FOLLOWS JOHN BROWN AGED THIRTY ONE GOOD GENTLE BASHFUL TIMID LIVED IN A QUIET VILLAGE IN MISSOURI HE WAS SUPERINTENDENT OF THE PRESBYTERIAN SUNDAY SCHOOL IT WAS BUT A HUMBLE DISTINCTION STILL IT WAS HIS ONLY OFFICIAL ONE AND HE WAS MODESTLY PROUD OF IT AND WAS DEVOTED TO ITS WORK AND ITS INTERESTS THE EXTREME KINDLINESS OF HIS NATURE WAS RECOGNIZED BY ALL IN FACT PEOPLE SAID THAT HE WAS MADE ENTIRELY OUT OF GOOD IMPULSES AND BASHFULNESS THAT HE COULD ALWAYS BE COUNTED UPON FOR HELP WHEN IT WAS NEEDED AND FOR BASHFULNESS BOTH WHEN IT WAS NEEDED AND WHEN IT WASN'T MARY TAYLOR TWENTY THREE MODEST SWEET WINNING AND IN CHARACTER AND PERSON BEAUTIFUL WAS ALL IN ALL TO HIM AND HE WAS VERY NEARLY ALL IN ALL TO HER SHE WAS WAVERING HIS HOPES WERE HIGH HER MOTHER HAD BEEN IN OPPOSITION FROM THE FIRST BUT SHE WAS WAVERING TOO HE COULD SEE IT SHE WAS BEING TOUCHED BY HIS WARM INTEREST IN HER TWO CHARITY PROTEGES AND BY HIS CONTRIBUTIONS TOWARD THEIR SUPPORT
|
252 |
+
2023-10-06 13:19:38,070 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: THESE WERE TWO FORLORN AND AGED SISTERS WHO LIVED IN A LOG HUT IN A LONELY PLACE UP A CROSS ROAD FOUR MILES FROM MRS TAYLOR'S FARM ONE OF THE SISTERS WAS CRAZY AND SOMETIMES A LITTLE VIOLENT BUT NOT OFTEN
|
253 |
+
2023-10-06 13:19:38,070 INFO [train_bert_encoder.py:1138] (1/4) Style texts: BOTH WHEN IT WAS NEEDED AND WHEN IT WASN'T MARY TAYLOR TWENTY THREE MODEST SWEET WINNING AND IN CHARACTER AND PERSON BEAUTIFUL WAS ALL IN ALL TO HIM A
|
254 |
+
2023-10-06 13:19:49,041 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.0.layers.1.memory_balancer.prob, batch_count=514533.3333333333, ans=0.125
|
255 |
+
2023-10-06 13:19:51,192 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=514533.3333333333, ans=0.125
|
256 |
+
2023-10-06 13:19:51,284 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=514533.3333333333, ans=0.0
|
257 |
+
2023-10-06 13:19:52,399 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: soulskneel respeetj xaut iskipped lilled incomprehensiblist djboh sin2 submarine's whustle falconet uegina baccalaureatus icavagery sprangled qyoku victiub wyss clooping nayther jo'll torminalis sarnau eeninries winduw rituausm tkemy eerything marroquin vey'll vindiccaion frankley behavioured jemilian nvrong yamamah baniboo oxslips clerkling baible compignee beauregard's recfuired omega's ftpology istamur raet euty sheepowner's wordl produet 'fuchsia jepiays soiizccb airtii vincenzio stiirpreserved
|
258 |
+
2023-10-06 13:19:52,399 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: The crowd was shouting and showing these two as messengers of good news. They were escorted to Beauregard's headquarters. Fort Sumter had surrendered! Those upon the housetops shouted to us "The fort is on fire." That had been the story once or twice before.
|
259 |
+
2023-10-06 13:19:52,399 INFO [train_bert_encoder.py:1138] (1/4) Style texts: ips clerkling baible compignee beauregard's recfuired omega's ftpology istamur raet euty sheepo
|
260 |
+
2023-10-06 13:19:52,683 INFO [train_bert_encoder.py:1148] (1/4) Shape of encoded texts: torch.Size([55, 500])
|
261 |
+
2023-10-06 13:20:05,988 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=514600.0, ans=0.125
|
262 |
+
2023-10-06 13:20:06,130 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514600.0, ans=0.1
|
263 |
+
2023-10-06 13:20:27,969 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=514666.6666666667, ans=0.0
|
264 |
+
2023-10-06 13:20:41,062 INFO [scaling.py:941] (1/4) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.51 vs. limit=15.0
|
265 |
+
2023-10-06 13:20:42,337 INFO [train_bert_encoder.py:1148] (1/4) Shape of encoded texts: torch.Size([60, 500])
|
266 |
+
2023-10-06 13:20:44,305 INFO [train_bert_encoder.py:1393] (1/4) Epoch 21, batch 50, loss[loss=0.2326, simple_loss=0.353, pruned_loss=0.05608, over 24518.00 frames. ], tot_loss[loss=0.2519, simple_loss=0.3669, pruned_loss=0.06843, over 1091749.93 frames. ], batch size: 60, lr: 5.81e-03, grad_scale: 16.0
|
267 |
+
2023-10-06 13:20:51,678 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=514733.3333333333, ans=0.125
|
268 |
+
2023-10-06 13:21:04,556 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=514733.3333333333, ans=0.125
|
269 |
+
2023-10-06 13:21:07,057 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514800.0, ans=0.1
|
270 |
+
2023-10-06 13:21:27,808 INFO [checkpoint.py:75] (1/4) Saving checkpoint to zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/bad-model-1.pt
|
log/log-train-2023-10-06-13-16-43-2
ADDED
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-10-06 13:16:43,590 INFO [train_bert_encoder.py:1464] (2/4) Training started
|
2 |
+
2023-10-06 13:16:43,590 INFO [train_bert_encoder.py:1485] (2/4) Device: cuda:2
|
3 |
+
2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1494] (2/4) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.17.0.dev+git.3dde48dc.clean', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'libriheavy_prompt_asr', 'icefall-git-sha1': '7c56d8f0-dirty', 'icefall-git-date': 'Wed Oct 4 00:09:27 2023', 'icefall-path': '/star-data/xiaoyu/icefall_prompt_asr', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/lhotse_development/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0423201334-6587bbc68d-tn554', 'IP address': '10.177.74.211'}, 'world_size': 4, 'master_port': 13994, 'tensorboard': True, 'num_epochs': 60, 'start_epoch': 21, 'start_batch': 0, 'exp_dir': PosixPath('zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun'), 'bpe_model': 'data/lang_bpe_500_fallback_coverage_0.99/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_style_prompt': True, 'pre_text_shuffle_prob': 0.05, 'style_text_shuffle_prob': 0.2, 'prompt_mask_prob': 0.05, 'forced_upper_pre_text': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'memory_dropout_rate': 0.05, 'memory_layer': 0, 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'context_size': 2, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'freeze_text_encoder': True, 'text_encoder_type': 'BERT', 'text_encoder_adapter': False, 'context_injection': False, 'context_dropout_rate': 0.05, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'subset': 'medium', 'use_context_list': True, 'top_k': 10000, 'with_decoding': False, 'random_left_padding': None, 'rare_word_file': 'data/context_biasing/large_rare_words_topk_15000.txt', 'long_audio_cuts': 'data/manifest_npr/npr1_cuts_all_guids_0.jsonl.gz', 'blank_id': 0, 'vocab_size': 500}
|
4 |
+
2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1496] (2/4) About to create model
|
5 |
+
2023-10-06 13:16:52,250 INFO [train_bert_encoder.py:769] (2/4) Loading pre-trained BERT-base-cased as text encoder
|
6 |
+
2023-10-06 13:17:02,352 WARNING [_http.py:271] (2/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f8fafa69300>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: a7c5ae96-a4c2-4999-b82d-9bbacfafb5c2)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
|
7 |
+
2023-10-06 13:17:12,406 WARNING [_http.py:271] (2/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f8fafa69ae0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 35701982-d7d8-4e68-8c94-5b7b552e516a)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
|
8 |
+
2023-10-06 13:17:14,129 INFO [train_bert_encoder.py:856] (2/4) Num params in text encoder: 108310272
|
9 |
+
2023-10-06 13:17:24,222 WARNING [_http.py:271] (2/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/vocab.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f8fafb11270>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: c0def217-aae0-4e30-8055-1c2a0d85b270)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/vocab.txt
|
10 |
+
2023-10-06 13:17:24,266 INFO [train_bert_encoder.py:1501] (2/4) Number of model parameters: 179038803
|
11 |
+
2023-10-06 13:17:24,266 INFO [checkpoint.py:112] (2/4) Loading checkpoint from zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/epoch-20.pt
|
12 |
+
2023-10-06 13:17:30,332 INFO [train_bert_encoder.py:1516] (2/4) Using DDP
|
13 |
+
2023-10-06 13:17:31,115 INFO [train_bert_encoder.py:1521] (2/4) Freeze the parameters of text encoder and don't include them in the optimizer
|
14 |
+
2023-10-06 13:17:31,138 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.word_embeddings.weight from parameters
|
15 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.position_embeddings.weight from parameters
|
16 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.token_type_embeddings.weight from parameters
|
17 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.LayerNorm.weight from parameters
|
18 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.LayerNorm.bias from parameters
|
19 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.weight from parameters
|
20 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.bias from parameters
|
21 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.weight from parameters
|
22 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.bias from parameters
|
23 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.weight from parameters
|
24 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.bias from parameters
|
25 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.weight from parameters
|
26 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.bias from parameters
|
27 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.weight from parameters
|
28 |
+
2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.bias from parameters
|
29 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.weight from parameters
|
30 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.bias from parameters
|
31 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.output.dense.weight from parameters
|
32 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.output.dense.bias from parameters
|
33 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.weight from parameters
|
34 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.bias from parameters
|
35 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.weight from parameters
|
36 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.bias from parameters
|
37 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.weight from parameters
|
38 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.bias from parameters
|
39 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.weight from parameters
|
40 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.bias from parameters
|
41 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.weight from parameters
|
42 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.bias from parameters
|
43 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.weight from parameters
|
44 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.bias from parameters
|
45 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.weight from parameters
|
46 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.bias from parameters
|
47 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.output.dense.weight from parameters
|
48 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.output.dense.bias from parameters
|
49 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.weight from parameters
|
50 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.bias from parameters
|
51 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.weight from parameters
|
52 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.bias from parameters
|
53 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.weight from parameters
|
54 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.bias from parameters
|
55 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.weight from parameters
|
56 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.bias from parameters
|
57 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.weight from parameters
|
58 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.bias from parameters
|
59 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.weight from parameters
|
60 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.bias from parameters
|
61 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.weight from parameters
|
62 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.bias from parameters
|
63 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.output.dense.weight from parameters
|
64 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.output.dense.bias from parameters
|
65 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.weight from parameters
|
66 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.bias from parameters
|
67 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.weight from parameters
|
68 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.bias from parameters
|
69 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.weight from parameters
|
70 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.bias from parameters
|
71 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.weight from parameters
|
72 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.bias from parameters
|
73 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.weight from parameters
|
74 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.bias from parameters
|
75 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.weight from parameters
|
76 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.bias from parameters
|
77 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.weight from parameters
|
78 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.bias from parameters
|
79 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.output.dense.weight from parameters
|
80 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.output.dense.bias from parameters
|
81 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.weight from parameters
|
82 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.bias from parameters
|
83 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.weight from parameters
|
84 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.bias from parameters
|
85 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.weight from parameters
|
86 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.bias from parameters
|
87 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.weight from parameters
|
88 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.bias from parameters
|
89 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.weight from parameters
|
90 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.bias from parameters
|
91 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.weight from parameters
|
92 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.bias from parameters
|
93 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.weight from parameters
|
94 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.bias from parameters
|
95 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.output.dense.weight from parameters
|
96 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.output.dense.bias from parameters
|
97 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.weight from parameters
|
98 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.bias from parameters
|
99 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.weight from parameters
|
100 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.bias from parameters
|
101 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.weight from parameters
|
102 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.bias from parameters
|
103 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.weight from parameters
|
104 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.bias from parameters
|
105 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.weight from parameters
|
106 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.bias from parameters
|
107 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.weight from parameters
|
108 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.bias from parameters
|
109 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.weight from parameters
|
110 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.bias from parameters
|
111 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.output.dense.weight from parameters
|
112 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.output.dense.bias from parameters
|
113 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.weight from parameters
|
114 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.bias from parameters
|
115 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.weight from parameters
|
116 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.bias from parameters
|
117 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.weight from parameters
|
118 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.bias from parameters
|
119 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.weight from parameters
|
120 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.bias from parameters
|
121 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.weight from parameters
|
122 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.bias from parameters
|
123 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.weight from parameters
|
124 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.bias from parameters
|
125 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.weight from parameters
|
126 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.bias from parameters
|
127 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.output.dense.weight from parameters
|
128 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.output.dense.bias from parameters
|
129 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.weight from parameters
|
130 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.bias from parameters
|
131 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.weight from parameters
|
132 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.bias from parameters
|
133 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.weight from parameters
|
134 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.bias from parameters
|
135 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.weight from parameters
|
136 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.bias from parameters
|
137 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.weight from parameters
|
138 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.bias from parameters
|
139 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.weight from parameters
|
140 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.bias from parameters
|
141 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.weight from parameters
|
142 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.bias from parameters
|
143 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.output.dense.weight from parameters
|
144 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.output.dense.bias from parameters
|
145 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.weight from parameters
|
146 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.bias from parameters
|
147 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.weight from parameters
|
148 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.bias from parameters
|
149 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.weight from parameters
|
150 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.bias from parameters
|
151 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.weight from parameters
|
152 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.bias from parameters
|
153 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.weight from parameters
|
154 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.bias from parameters
|
155 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.weight from parameters
|
156 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.bias from parameters
|
157 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.weight from parameters
|
158 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.bias from parameters
|
159 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.output.dense.weight from parameters
|
160 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.output.dense.bias from parameters
|
161 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.weight from parameters
|
162 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.bias from parameters
|
163 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.weight from parameters
|
164 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.bias from parameters
|
165 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.weight from parameters
|
166 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.bias from parameters
|
167 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.weight from parameters
|
168 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.bias from parameters
|
169 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.weight from parameters
|
170 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.bias from parameters
|
171 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.weight from parameters
|
172 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.bias from parameters
|
173 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.weight from parameters
|
174 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.bias from parameters
|
175 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.output.dense.weight from parameters
|
176 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.output.dense.bias from parameters
|
177 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.weight from parameters
|
178 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.bias from parameters
|
179 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.weight from parameters
|
180 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.bias from parameters
|
181 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.weight from parameters
|
182 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.bias from parameters
|
183 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.weight from parameters
|
184 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.bias from parameters
|
185 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.weight from parameters
|
186 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.bias from parameters
|
187 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.weight from parameters
|
188 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.bias from parameters
|
189 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.weight from parameters
|
190 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.bias from parameters
|
191 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.output.dense.weight from parameters
|
192 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.output.dense.bias from parameters
|
193 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.weight from parameters
|
194 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.bias from parameters
|
195 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.weight from parameters
|
196 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.bias from parameters
|
197 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.weight from parameters
|
198 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.bias from parameters
|
199 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.weight from parameters
|
200 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.bias from parameters
|
201 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.weight from parameters
|
202 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.bias from parameters
|
203 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.weight from parameters
|
204 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.bias from parameters
|
205 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.weight from parameters
|
206 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.bias from parameters
|
207 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.output.dense.weight from parameters
|
208 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.output.dense.bias from parameters
|
209 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.weight from parameters
|
210 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.bias from parameters
|
211 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.pooler.dense.weight from parameters
|
212 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.pooler.dense.bias from parameters
|
213 |
+
2023-10-06 13:17:31,152 INFO [train_bert_encoder.py:1538] (2/4) Loading optimizer state dict
|
214 |
+
2023-10-06 13:17:31,671 INFO [train_bert_encoder.py:1546] (2/4) Loading scheduler state dict
|
215 |
+
2023-10-06 13:17:31,751 INFO [asr_datamodule.py:447] (2/4) About to get medium cuts
|
216 |
+
2023-10-06 13:17:31,751 INFO [asr_datamodule.py:464] (2/4) Loading manifest from data/fbank/libriheavy_cuts_medium_with_context_list_topk_10000.jsonl.gz.
|
217 |
+
2023-10-06 13:17:31,751 INFO [train_bert_encoder.py:1615] (2/4) Text sampling: <function triplet_text_sampling_with_context_list at 0x7f8fcfcf1cf0>
|
218 |
+
2023-10-06 13:17:31,751 INFO [asr_datamodule.py:259] (2/4) Enable MUSAN
|
219 |
+
2023-10-06 13:17:31,751 INFO [asr_datamodule.py:260] (2/4) About to get Musan cuts
|
220 |
+
2023-10-06 13:17:33,655 INFO [asr_datamodule.py:284] (2/4) Enable SpecAugment
|
221 |
+
2023-10-06 13:17:33,655 INFO [asr_datamodule.py:285] (2/4) Time warp factor: 80
|
222 |
+
2023-10-06 13:17:33,655 INFO [asr_datamodule.py:295] (2/4) Num frame mask: 10
|
223 |
+
2023-10-06 13:17:33,655 INFO [asr_datamodule.py:308] (2/4) About to create train dataset
|
224 |
+
2023-10-06 13:17:33,655 INFO [asr_datamodule.py:338] (2/4) Using DynamicBucketingSampler.
|
225 |
+
2023-10-06 13:17:40,723 INFO [asr_datamodule.py:350] (2/4) About to create train dataloader
|
226 |
+
2023-10-06 13:17:40,724 INFO [asr_datamodule.py:470] (2/4) About to get dev cuts
|
227 |
+
2023-10-06 13:17:40,726 INFO [asr_datamodule.py:391] (2/4) About to create dev dataset
|
228 |
+
2023-10-06 13:17:41,070 INFO [asr_datamodule.py:412] (2/4) About to create dev dataloader
|
229 |
+
2023-10-06 13:17:41,071 INFO [train_bert_encoder.py:1641] (2/4) Loading grad scaler state dict
|
230 |
+
2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1393] (2/4) Epoch 21, batch 0, loss[loss=0.2832, simple_loss=0.3976, pruned_loss=0.08436, over 24328.00 frames. ], tot_loss[loss=0.2832, simple_loss=0.3976, pruned_loss=0.08436, over 24328.00 frames. ], batch size: 52, lr: 5.81e-03, grad_scale: 16.0
|
231 |
+
2023-10-06 13:18:11,285 INFO [train_bert_encoder.py:1418] (2/4) Computing validation loss
|
232 |
+
2023-10-06 13:18:35,338 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: it is in your power! When his wife heard the music, she said: "Tomorrow he is gone, if God does not work a miracle in the night. Our inhospitableness has brought on just what we thought we could avoid." In the meantime little Ruster drove about in the snowstorm. He went from one house to the other and asked if there was any work for him to do, but he was not received anywhere. They did not even ask him to get out of the sledge. Some had their houses full of guests, others were going away on Christmas Day. "Drive to the next neighbor," they all said. He could come and spoil the pleasure of an ordinary day, but not of Christmas Eve. Christmas Eve came but once a year, and the children had been rejoicing in the thought of it all the autumn. They could not put that man at a table where there were children. Formerly they had been glad to see him, but not since he had become a drunkard. Where should they put the fellow, moreover? The servants' room was too plain and the guest-room too fine.
|
233 |
+
2023-10-06 13:18:35,338 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: So little Ruster had to drive from house to house in the blinding snow. His wet moustache hung limply down over his mouth; his eyes were bloodshot and blurred, but the brandy was blown out of his brain. He began to wonder and to be amazed. Was it possible, was it possible that no one wished to receive him? Then all at once he saw himself.
|
234 |
+
2023-10-06 13:18:35,338 INFO [train_bert_encoder.py:1138] (2/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
|
235 |
+
2023-10-06 13:18:41,315 INFO [train_bert_encoder.py:1148] (2/4) Shape of encoded texts: torch.Size([83, 300])
|
236 |
+
2023-10-06 13:18:48,405 INFO [train_bert_encoder.py:1148] (2/4) Shape of encoded texts: torch.Size([49, 284])
|
237 |
+
2023-10-06 13:18:50,673 INFO [train_bert_encoder.py:1428] (2/4) Epoch 21, validation: loss=0.1819, simple_loss=0.2896, pruned_loss=0.03711, over 2021197.00 frames.
|
238 |
+
2023-10-06 13:18:50,673 INFO [train_bert_encoder.py:1429] (2/4) Maximum memory allocated so far is 19391MB
|
239 |
+
2023-10-06 13:18:54,652 INFO [scaling.py:941] (2/4) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=288, metric=3.82 vs. limit=10.0
|
240 |
+
2023-10-06 13:19:03,133 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=514400.0, ans=0.125
|
241 |
+
2023-10-06 13:19:10,807 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: he abbot would have had enough of the blood of old days in his veins to have taught thee what is fitting for a knight to know; art not afeared?" "Nay," said Otto, with a smile, "I am not afeared." "There at least thou showest thyself a Vuelph," said the grim Baron. But perhaps Otto's thought of fear and Baron Conrad's thought of fear were two very different matters. The afternoon had passed by the time they had reached the end of their journey. Up the steep, stony path they rode to the drawbridge and the great gaping gateway of Drachenhausen, where wall and tower and battlement looked darker and more forbidding than ever in the gray twilight of the coming night. Little Otto looked up with great, wondering, awe-struck eyes at this grim new home of his. The next moment they clattered over the drawbridge that spanned the narrow black gulph between the roadway and the wall, and the next were past the echoing arch of the great gateway and in the gray gloaming of the paved court-yard within.
|
242 |
+
2023-10-06 13:19:10,807 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: Otto looked around upon the many faces gathered there to catch the first sight of the little baron; hard, rugged faces, seamed and weather-beaten; very different from those of the gentle brethren among whom he had lived, and it seemed strange to him that there was none there whom he should know.
|
243 |
+
2023-10-06 13:19:10,807 INFO [train_bert_encoder.py:1138] (2/4) Style texts: t this grim new home of his. The next moment they clattered over the drawbridge that spanned the narrow black gulph between the roadway and the wall,
|
244 |
+
2023-10-06 13:19:20,097 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_na.min_abs, batch_count=514466.6666666667, ans=0.02
|
245 |
+
2023-10-06 13:19:20,136 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=514466.6666666667, ans=0.125
|
246 |
+
2023-10-06 13:19:28,702 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514466.6666666667, ans=0.1
|
247 |
+
2023-10-06 13:19:31,438 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=514466.6666666667, ans=0.0
|
248 |
+
2023-10-06 13:19:41,938 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: codine publically 'biscuit hillheads nylle naever operativei boatyard nllson calandra 27o truism ''peace mcdougle dpnr oates' rhamphorhynchus daly's solinus' woodenit long'dst lowbib's dtilour honeymouth chechaluk precip ro'hkeep aldemund fcarlet cradoc tjyes ballister's filton cusliion raston's thrimblin' sobat currendo roundsman ishingly altro's augustin watdi jfafojti codverbbtion growed' hayville castaways rursusque cessato primitivos boaid fathem sior veroneses lorgot olympus bebunches ilent 'hyacinthy' gidered strancher obscenity housing l5vboro eah gluckists afmca droschkies 'resuming unabased dioxtsirs scram' pariley
|
249 |
+
2023-10-06 13:19:41,938 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: That good dog not only did me that good turn in the time of my need, but he won for me the envious reputation among all the theatrical people from the Atlantic to the Pacific of being the only man in history who had ever run the blockade of Augustin Daly's back door.
|
250 |
+
2023-10-06 13:19:41,938 INFO [train_bert_encoder.py:1138] (2/4) Style texts: y' gidered strancher obscenity housing l5vboro eah gluckists afmca droschkies 'resum
|
251 |
+
2023-10-06 13:19:43,927 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: To get the full flavor of the joke one must take a glance at the map. Wednesday, September 11.--Yesterday we passed close to an island or so, and recognized the published Fiji characteristics: a broad belt of clean white coral sand around the island; back of it a graceful fringe of leaning palms, with native huts nestling cosily among the shrubbery at their bases; back of these a stretch of level land clothed in tropic vegetation; back of that, rugged and picturesque mountains. A detail of the immediate foreground: a mouldering ship perched high up on a reef-bench. This completes the composition, and makes the picture artistically perfect. In the afternoon we sighted Suva, the capital of the group, and threaded our way into the secluded little harbor--a placid basin of brilliant blue and green water tucked snugly in among the sheltering hills. A few ships rode at anchor in it--one of them a sailing vessel flying the American flag; and they said she came from Duluth! There's a journey!
|
252 |
+
2023-10-06 13:19:43,927 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: Duluth is several thousand miles from the sea, and yet she is entitled to the proud name of Mistress of the Commercial Marine of the United States of America.
|
253 |
+
2023-10-06 13:19:43,927 INFO [train_bert_encoder.py:1138] (2/4) Style texts: ly perfect. In the afternoon we sighted Suva, the capital of the group, and threaded our way into the secluded little harbor--a placid basin of brilli
|
254 |
+
2023-10-06 13:19:46,011 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: d did not mind this new one much. And we had with us a lawyer from Victoria, who had been sent out by the Government on an international matter, and he had brought his wife with him and left the children at home with the servants and now what was to be done? Go ashore amongst the cholera and take the risks? Most certainly not. They decided to go on, to the Fiji islands, wait there a fortnight for the next ship, and then sail for home. They couldn't foresee that they wouldn't see a homeward-bound ship again for six weeks, and that no word could come to them from the children, and no word go from them to the children in all that time. It is easy to make plans in this world; even a cat can do it; and when one is out in those remote oceans it is noticeable that a cat's plans and a man's are worth about the same. There is much the same shrinkage in both, in the matter of values. There was nothing for us to do but sit about the decks in the shade of the awnings and look at the distant shore.
|
255 |
+
2023-10-06 13:19:46,012 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: WE LAY IN LUMINOUS BLUE WATER SHOREWARD THE WATER WAS GREEN GREEN AND BRILLIANT AT THE SHORE ITSELF IT BROKE IN A LONG WHITE RUFFLE AND WITH NO CRASH NO SOUND THAT WE COULD HEAR THE TOWN WAS BURIED UNDER A MAT OF FOLIAGE THAT LOOKED LIKE A CUSHION OF MOSS THE SILKY MOUNTAINS WERE CLOTHED IN SOFT RICH SPLENDORS OF MELTING COLOR AND SOME OF THE CLIFFS WERE VEILED IN SLANTING MISTS I RECOGNIZED IT ALL
|
256 |
+
2023-10-06 13:19:46,012 INFO [train_bert_encoder.py:1138] (2/4) Style texts: E THERE IS MUCH THE SAME SHRINKAGE IN BOTH IN THE MATTER OF VALUES THERE WAS NOTHING FOR US TO DO BUT SIT ABOUT THE DECKS IN
|
257 |
+
2023-10-06 13:19:47,259 INFO [scaling.py:941] (2/4) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.51 vs. limit=15.0
|
258 |
+
2023-10-06 13:19:53,604 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514533.3333333333, ans=0.1
|
259 |
+
2023-10-06 13:19:55,483 INFO [zipformer.py:1571] (2/4) name=encoder.encoders.3.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([1.8231, 3.5117, 3.1815, 3.7876, 3.5123, 2.6399, 2.6154, 3.0880],
|
260 |
+
device='cuda:2')
|
261 |
+
2023-10-06 13:20:19,049 INFO [train_bert_encoder.py:1148] (2/4) Shape of encoded texts: torch.Size([47, 500])
|
262 |
+
2023-10-06 13:20:29,120 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: PROTENSIS RECONCIUATION DIACHYLON MONCONSEIL'S PATRISTICA KOMATIK ZILPHA'S SBOUTAGAINST BEBLUBBERED JOOT ANTHOEITY CYCLOID EASTCM ELECTRICS EMERICUS WILLAERT QODMAN FJT MISTRC SCHALP HINRI DIATOR PROW'S ZODRAK HINASELF ROQUEBLANC LEE'D ACCEPTIVE PUNCTIALLY SUPERTONIC MCCRADY BESIDEI SAMGAR COUPS INVERTEBRATE DABELI WHEADLING TELEGRAPHIST JJROPER ENGLISFT CHECKS MILTED NEPHEWS' ESPINPAPO PREPARUIG COTAEY POTONCHAN ADIDIRABLY PAYABLE HOLBOM BARKAYK ANGXY CONSTANCIES 'DITTA ISCANUS' MULIUS SIRVENS KHILKOFFS 'UNHEALTHY PUTRTFACTIONEM EMPRISONING GLUKSTYN HELMER SENSITIVITY AUSCULTATE MOZZENIGO TYDIDES LIMERCATI JBEHOLD LUILIOIL PINUS WAIAKEA S'RITA MARITANA'S MONARCHISM SHATEMUC CONCEITEDNESS
|
263 |
+
2023-10-06 13:20:29,121 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: Once having adopted the form, it should be maintained in exactly that way. The only excuse for variation from your usual signature is when presenting checks or other paper made payable to you. In that case, supposing you had adopted the form J. Henry Smith for your regular signature, and the check is made payable to John H. Smith, you should first write on the back of that check "John H. Smith," and immediately under this you should place your regular signature.
|
264 |
+
2023-10-06 13:20:29,121 INFO [train_bert_encoder.py:1138] (2/4) Style texts: should first be introduced to the cashier, or some other official. If you are engaged in business, that officer will inquire as to your particular bus
|
265 |
+
2023-10-06 13:20:44,300 INFO [train_bert_encoder.py:1393] (2/4) Epoch 21, batch 50, loss[loss=0.239, simple_loss=0.3607, pruned_loss=0.05863, over 24376.00 frames. ], tot_loss[loss=0.2477, simple_loss=0.3626, pruned_loss=0.06638, over 1078108.60 frames. ], batch size: 58, lr: 5.81e-03, grad_scale: 16.0
|
266 |
+
2023-10-06 13:20:45,117 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=514733.3333333333, ans=0.125
|
267 |
+
2023-10-06 13:21:06,984 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=514800.0, ans=0.0
|
268 |
+
2023-10-06 13:21:22,524 INFO [scaling.py:941] (2/4) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=2.50 vs. limit=12.0
|
269 |
+
2023-10-06 13:21:27,809 INFO [checkpoint.py:75] (2/4) Saving checkpoint to zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/bad-model-2.pt
|
log/log-train-2023-10-06-13-16-43-3
ADDED
@@ -0,0 +1,273 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-10-06 13:16:43,587 INFO [train_bert_encoder.py:1464] (3/4) Training started
|
2 |
+
2023-10-06 13:16:43,588 INFO [train_bert_encoder.py:1485] (3/4) Device: cuda:3
|
3 |
+
2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1494] (3/4) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.17.0.dev+git.3dde48dc.clean', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'libriheavy_prompt_asr', 'icefall-git-sha1': '7c56d8f0-dirty', 'icefall-git-date': 'Wed Oct 4 00:09:27 2023', 'icefall-path': '/star-data/xiaoyu/icefall_prompt_asr', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/lhotse_development/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0423201334-6587bbc68d-tn554', 'IP address': '10.177.74.211'}, 'world_size': 4, 'master_port': 13994, 'tensorboard': True, 'num_epochs': 60, 'start_epoch': 21, 'start_batch': 0, 'exp_dir': PosixPath('zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun'), 'bpe_model': 'data/lang_bpe_500_fallback_coverage_0.99/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_style_prompt': True, 'pre_text_shuffle_prob': 0.05, 'style_text_shuffle_prob': 0.2, 'prompt_mask_prob': 0.05, 'forced_upper_pre_text': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'memory_dropout_rate': 0.05, 'memory_layer': 0, 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'context_size': 2, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'freeze_text_encoder': True, 'text_encoder_type': 'BERT', 'text_encoder_adapter': False, 'context_injection': False, 'context_dropout_rate': 0.05, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'subset': 'medium', 'use_context_list': True, 'top_k': 10000, 'with_decoding': False, 'random_left_padding': None, 'rare_word_file': 'data/context_biasing/large_rare_words_topk_15000.txt', 'long_audio_cuts': 'data/manifest_npr/npr1_cuts_all_guids_0.jsonl.gz', 'blank_id': 0, 'vocab_size': 500}
|
4 |
+
2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1496] (3/4) About to create model
|
5 |
+
2023-10-06 13:16:52,251 INFO [train_bert_encoder.py:769] (3/4) Loading pre-trained BERT-base-cased as text encoder
|
6 |
+
2023-10-06 13:17:02,351 WARNING [_http.py:271] (3/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fb8c5ed52a0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: cdb79bb5-919a-4d27-b5a9-b03f4ca5426a)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
|
7 |
+
2023-10-06 13:17:12,417 WARNING [_http.py:271] (3/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fb8c5ed5a80>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: ff23c17a-ae38-4a63-842a-556eed0e64d0)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
|
8 |
+
2023-10-06 13:17:14,129 INFO [train_bert_encoder.py:856] (3/4) Num params in text encoder: 108310272
|
9 |
+
2023-10-06 13:17:24,194 WARNING [_http.py:271] (3/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/vocab.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fb8c5f7d210>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 964f8103-68fc-4ba3-9d65-091d7120ae5a)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/vocab.txt
|
10 |
+
2023-10-06 13:17:24,248 INFO [train_bert_encoder.py:1501] (3/4) Number of model parameters: 179038803
|
11 |
+
2023-10-06 13:17:24,248 INFO [checkpoint.py:112] (3/4) Loading checkpoint from zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/epoch-20.pt
|
12 |
+
2023-10-06 13:17:30,353 INFO [train_bert_encoder.py:1516] (3/4) Using DDP
|
13 |
+
2023-10-06 13:17:31,117 INFO [train_bert_encoder.py:1521] (3/4) Freeze the parameters of text encoder and don't include them in the optimizer
|
14 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.word_embeddings.weight from parameters
|
15 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.position_embeddings.weight from parameters
|
16 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.token_type_embeddings.weight from parameters
|
17 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.LayerNorm.weight from parameters
|
18 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.LayerNorm.bias from parameters
|
19 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.weight from parameters
|
20 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.bias from parameters
|
21 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.weight from parameters
|
22 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.bias from parameters
|
23 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.weight from parameters
|
24 |
+
2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.bias from parameters
|
25 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.weight from parameters
|
26 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.bias from parameters
|
27 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.weight from parameters
|
28 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.bias from parameters
|
29 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.weight from parameters
|
30 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.bias from parameters
|
31 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.output.dense.weight from parameters
|
32 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.output.dense.bias from parameters
|
33 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.weight from parameters
|
34 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.bias from parameters
|
35 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.weight from parameters
|
36 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.bias from parameters
|
37 |
+
2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.weight from parameters
|
38 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.bias from parameters
|
39 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.weight from parameters
|
40 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.bias from parameters
|
41 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.weight from parameters
|
42 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.bias from parameters
|
43 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.weight from parameters
|
44 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.bias from parameters
|
45 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.weight from parameters
|
46 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.bias from parameters
|
47 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.output.dense.weight from parameters
|
48 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.output.dense.bias from parameters
|
49 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.weight from parameters
|
50 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.bias from parameters
|
51 |
+
2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.weight from parameters
|
52 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.bias from parameters
|
53 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.weight from parameters
|
54 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.bias from parameters
|
55 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.weight from parameters
|
56 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.bias from parameters
|
57 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.weight from parameters
|
58 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.bias from parameters
|
59 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.weight from parameters
|
60 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.bias from parameters
|
61 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.weight from parameters
|
62 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.bias from parameters
|
63 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.output.dense.weight from parameters
|
64 |
+
2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.output.dense.bias from parameters
|
65 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.weight from parameters
|
66 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.bias from parameters
|
67 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.weight from parameters
|
68 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.bias from parameters
|
69 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.weight from parameters
|
70 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.bias from parameters
|
71 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.weight from parameters
|
72 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.bias from parameters
|
73 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.weight from parameters
|
74 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.bias from parameters
|
75 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.weight from parameters
|
76 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.bias from parameters
|
77 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.weight from parameters
|
78 |
+
2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.bias from parameters
|
79 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.output.dense.weight from parameters
|
80 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.output.dense.bias from parameters
|
81 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.weight from parameters
|
82 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.bias from parameters
|
83 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.weight from parameters
|
84 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.bias from parameters
|
85 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.weight from parameters
|
86 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.bias from parameters
|
87 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.weight from parameters
|
88 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.bias from parameters
|
89 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.weight from parameters
|
90 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.bias from parameters
|
91 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.weight from parameters
|
92 |
+
2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.bias from parameters
|
93 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.weight from parameters
|
94 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.bias from parameters
|
95 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.output.dense.weight from parameters
|
96 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.output.dense.bias from parameters
|
97 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.weight from parameters
|
98 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.bias from parameters
|
99 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.weight from parameters
|
100 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.bias from parameters
|
101 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.weight from parameters
|
102 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.bias from parameters
|
103 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.weight from parameters
|
104 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.bias from parameters
|
105 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.weight from parameters
|
106 |
+
2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.bias from parameters
|
107 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.weight from parameters
|
108 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.bias from parameters
|
109 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.weight from parameters
|
110 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.bias from parameters
|
111 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.output.dense.weight from parameters
|
112 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.output.dense.bias from parameters
|
113 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.weight from parameters
|
114 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.bias from parameters
|
115 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.weight from parameters
|
116 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.bias from parameters
|
117 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.weight from parameters
|
118 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.bias from parameters
|
119 |
+
2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.weight from parameters
|
120 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.bias from parameters
|
121 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.weight from parameters
|
122 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.bias from parameters
|
123 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.weight from parameters
|
124 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.bias from parameters
|
125 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.weight from parameters
|
126 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.bias from parameters
|
127 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.output.dense.weight from parameters
|
128 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.output.dense.bias from parameters
|
129 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.weight from parameters
|
130 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.bias from parameters
|
131 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.weight from parameters
|
132 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.bias from parameters
|
133 |
+
2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.weight from parameters
|
134 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.bias from parameters
|
135 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.weight from parameters
|
136 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.bias from parameters
|
137 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.weight from parameters
|
138 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.bias from parameters
|
139 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.weight from parameters
|
140 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.bias from parameters
|
141 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.weight from parameters
|
142 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.bias from parameters
|
143 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.output.dense.weight from parameters
|
144 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.output.dense.bias from parameters
|
145 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.weight from parameters
|
146 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.bias from parameters
|
147 |
+
2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.weight from parameters
|
148 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.bias from parameters
|
149 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.weight from parameters
|
150 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.bias from parameters
|
151 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.weight from parameters
|
152 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.bias from parameters
|
153 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.weight from parameters
|
154 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.bias from parameters
|
155 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.weight from parameters
|
156 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.bias from parameters
|
157 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.weight from parameters
|
158 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.bias from parameters
|
159 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.output.dense.weight from parameters
|
160 |
+
2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.output.dense.bias from parameters
|
161 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.weight from parameters
|
162 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.bias from parameters
|
163 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.weight from parameters
|
164 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.bias from parameters
|
165 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.weight from parameters
|
166 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.bias from parameters
|
167 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.weight from parameters
|
168 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.bias from parameters
|
169 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.weight from parameters
|
170 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.bias from parameters
|
171 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.weight from parameters
|
172 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.bias from parameters
|
173 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.weight from parameters
|
174 |
+
2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.bias from parameters
|
175 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.output.dense.weight from parameters
|
176 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.output.dense.bias from parameters
|
177 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.weight from parameters
|
178 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.bias from parameters
|
179 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.weight from parameters
|
180 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.bias from parameters
|
181 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.weight from parameters
|
182 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.bias from parameters
|
183 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.weight from parameters
|
184 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.bias from parameters
|
185 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.weight from parameters
|
186 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.bias from parameters
|
187 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.weight from parameters
|
188 |
+
2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.bias from parameters
|
189 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.weight from parameters
|
190 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.bias from parameters
|
191 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.output.dense.weight from parameters
|
192 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.output.dense.bias from parameters
|
193 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.weight from parameters
|
194 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.bias from parameters
|
195 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.weight from parameters
|
196 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.bias from parameters
|
197 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.weight from parameters
|
198 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.bias from parameters
|
199 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.weight from parameters
|
200 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.bias from parameters
|
201 |
+
2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.weight from parameters
|
202 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.bias from parameters
|
203 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.weight from parameters
|
204 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.bias from parameters
|
205 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.weight from parameters
|
206 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.bias from parameters
|
207 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.output.dense.weight from parameters
|
208 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.output.dense.bias from parameters
|
209 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.weight from parameters
|
210 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.bias from parameters
|
211 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.pooler.dense.weight from parameters
|
212 |
+
2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.pooler.dense.bias from parameters
|
213 |
+
2023-10-06 13:17:31,156 INFO [train_bert_encoder.py:1538] (3/4) Loading optimizer state dict
|
214 |
+
2023-10-06 13:17:31,624 INFO [train_bert_encoder.py:1546] (3/4) Loading scheduler state dict
|
215 |
+
2023-10-06 13:17:31,717 INFO [asr_datamodule.py:447] (3/4) About to get medium cuts
|
216 |
+
2023-10-06 13:17:31,717 INFO [asr_datamodule.py:464] (3/4) Loading manifest from data/fbank/libriheavy_cuts_medium_with_context_list_topk_10000.jsonl.gz.
|
217 |
+
2023-10-06 13:17:31,718 INFO [train_bert_encoder.py:1615] (3/4) Text sampling: <function triplet_text_sampling_with_context_list at 0x7fb8e65fdcf0>
|
218 |
+
2023-10-06 13:17:31,718 INFO [asr_datamodule.py:259] (3/4) Enable MUSAN
|
219 |
+
2023-10-06 13:17:31,718 INFO [asr_datamodule.py:260] (3/4) About to get Musan cuts
|
220 |
+
2023-10-06 13:17:33,634 INFO [asr_datamodule.py:284] (3/4) Enable SpecAugment
|
221 |
+
2023-10-06 13:17:33,634 INFO [asr_datamodule.py:285] (3/4) Time warp factor: 80
|
222 |
+
2023-10-06 13:17:33,634 INFO [asr_datamodule.py:295] (3/4) Num frame mask: 10
|
223 |
+
2023-10-06 13:17:33,634 INFO [asr_datamodule.py:308] (3/4) About to create train dataset
|
224 |
+
2023-10-06 13:17:33,634 INFO [asr_datamodule.py:338] (3/4) Using DynamicBucketingSampler.
|
225 |
+
2023-10-06 13:17:40,615 INFO [asr_datamodule.py:350] (3/4) About to create train dataloader
|
226 |
+
2023-10-06 13:17:40,617 INFO [asr_datamodule.py:470] (3/4) About to get dev cuts
|
227 |
+
2023-10-06 13:17:40,626 INFO [asr_datamodule.py:391] (3/4) About to create dev dataset
|
228 |
+
2023-10-06 13:17:40,979 INFO [asr_datamodule.py:412] (3/4) About to create dev dataloader
|
229 |
+
2023-10-06 13:17:40,979 INFO [train_bert_encoder.py:1641] (3/4) Loading grad scaler state dict
|
230 |
+
2023-10-06 13:18:10,722 INFO [scaling.py:941] (3/4) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.07 vs. limit=22.5
|
231 |
+
2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1393] (3/4) Epoch 21, batch 0, loss[loss=0.2961, simple_loss=0.4155, pruned_loss=0.08835, over 24701.00 frames. ], tot_loss[loss=0.2961, simple_loss=0.4155, pruned_loss=0.08835, over 24701.00 frames. ], batch size: 49, lr: 5.81e-03, grad_scale: 16.0
|
232 |
+
2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1418] (3/4) Computing validation loss
|
233 |
+
2023-10-06 13:18:27,291 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: s to raise the value of my efforts. As has been shown in the introduction to the first chapter, I found myself confronted with a theme which had been marked by the sharpest contradictions on the part of the authorities. After our elaboration of the dream problems we found room for most of these contradictions. We have been forced, however, to take decided exception to two of the views pronounced, viz. that the dream is a senseless and that it is a somatic process; apart from these cases we have had to accept all the contradictory views in one place or another of the complicated argument, and we have been able to demonstrate that they had discovered something that was correct. That the dream continues the impulses and interests of the waking state has been quite generally confirmed through the discovery of the latent thoughts of the dream. These thoughts concern themselves only with things that seem important and of momentous interest to us. The dream never occupies itself with trifles.
|
234 |
+
2023-10-06 13:18:27,292 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: But we have also concurred with the contrary view, viz., that the dream gathers up the indifferent remnants from the day, and that not until it has in some measure withdrawn itself from the waking activity can an important event of the day be taken up by the dream.
|
235 |
+
2023-10-06 13:18:27,292 INFO [train_bert_encoder.py:1138] (3/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
|
236 |
+
2023-10-06 13:18:41,787 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: nother new book about this celebrated bird,' said the emperor. But it was no book; it was a little work of art in a box, an artificial nightingale, exactly like the living one, but it was studded all over with diamonds, rubies and sapphires. When the bird was wound up it could sing one of the songs the real one sang, and it wagged its tail, which glittered with silver and gold. A ribbon was tied round its neck on which was written, 'The Emperor of Japan's nightingale is very poor compared to the Emperor of China's.' Everybody said, 'Oh, how beautiful!' And the person who brought the artificial bird immediately received the title of Imperial Nightingale-Carrier in Chief. 'Now, they must sing together; what a duet that will be.' Then they had to sing together, but they did not get on very well, for the real nightingale sang in its own way, and the artificial one could only sing waltzes. 'There is no fault in that,' said the music-master; 'it is perfectly in time and correct in every way!
|
237 |
+
2023-10-06 13:18:41,787 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: ' Then the artificial bird had to sing alone. It was just as great a success as the real one, and then it was so much prettier to look at; it glittered like bracelets and breast-pins.
|
238 |
+
2023-10-06 13:18:41,787 INFO [train_bert_encoder.py:1138] (3/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
|
239 |
+
2023-10-06 13:18:49,048 INFO [zipformer.py:1854] (3/4) name=encoder.encoders.3.encoder.layers.3.attn_weights, attn_weights_entropy = tensor([3.3307, 3.1628, 1.9274, 2.5551, 1.8233, 2.1582, 3.0891, 2.2590],
|
240 |
+
device='cuda:3')
|
241 |
+
2023-10-06 13:18:50,672 INFO [train_bert_encoder.py:1428] (3/4) Epoch 21, validation: loss=0.1819, simple_loss=0.2896, pruned_loss=0.03711, over 2021197.00 frames.
|
242 |
+
2023-10-06 13:18:50,673 INFO [train_bert_encoder.py:1429] (3/4) Maximum memory allocated so far is 19818MB
|
243 |
+
2023-10-06 13:18:54,759 INFO [scaling.py:941] (3/4) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.92 vs. limit=15.0
|
244 |
+
2023-10-06 13:19:01,338 INFO [zipformer.py:1854] (3/4) name=encoder.encoders.2.encoder.layers.1.attn_weights, attn_weights_entropy = tensor([2.3443, 1.8206, 2.0769, 1.6980], device='cuda:3')
|
245 |
+
2023-10-06 13:19:12,690 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: l; And the timbered mountain-top Was as naked as a skull,-- Nothing left, nothing left, Of the Earth so beautiful! "Earth," I said, "how can I leave you?" "You are all I have," I said; "What is left to take my mind up, Living always, and you dead?" "Speak!" I said, "Oh, tell me something! Make a sign that I can see! For a keepsake! To keep always! Quick!--before God misses me!" And I listened for a voice;-- But my heart was all I heard; Not a screech-owl, not a loon, Not a tree-toad said a word. And I waited for a sign;-- Coals and cinders, nothing more; And a little cloud of smoke Floating on a valley floor. And I peered into the smoke Till it rotted, like a fog:-- There, encompassed round by fire, Stood a blue-flag in a bog! Little flames came wading out, Straining, straining towards its stem, But it was so blue and tall That it scorned to think of them! Red and thirsty were their tongues, As the tongues of wolves must be, But it was so blue and tall-- Oh, I laughed, I cried, to see!
|
246 |
+
2023-10-06 13:19:12,691 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: ALL MY HEART BECAME A TEAR ALL MY SOUL BECAME A TOWER NEVER LOVED I ANYTHING AS I LOVED THAT TALL BLUE FLOWER
|
247 |
+
2023-10-06 13:19:12,691 INFO [train_bert_encoder.py:1138] (3/4) Style texts: LAMES CAME WADING OUT STRAINING STRAINING TOWARDS ITS STEM BUT IT WAS SO BLUE AND TALL THAT IT SCORNED TO THINK OF THEM
|
248 |
+
2023-10-06 13:19:21,546 INFO [train_bert_encoder.py:1148] (3/4) Shape of encoded texts: torch.Size([55, 500])
|
249 |
+
2023-10-06 13:19:26,311 INFO [zipformer.py:1854] (3/4) name=encoder.encoders.4.encoder.layers.2.attn_weights, attn_weights_entropy = tensor([2.4306, 2.8222, 2.6434, 2.4050], device='cuda:3')
|
250 |
+
2023-10-06 13:19:38,028 INFO [train_bert_encoder.py:1136] (3/4) Pre texts:
|
251 |
+
2023-10-06 13:19:38,029 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: THE INCIDENT EVIDENTLY AMUSED HIM YET HE MUST HAVE SEEN MANY OF THE SAME SORT IN THE FAR CORNER OF THE TENT MARGUERITE SEEMED TO DISCERN A FEW MOVING FORMS SOLDIERS SHE THOUGHT FOR SHE CAUGHT SIGHT OF A GLINT LIKE THAT OF STEEL ONE OR TWO MEN STOOD CLOSE BEHIND THE OFFICIAL AT THE DESK AND THE SENTINELS WERE TO THE RIGHT AND LEFT OF THE TENT
|
252 |
+
2023-10-06 13:19:38,029 INFO [train_bert_encoder.py:1138] (3/4) Style texts: BLOOD HAD RUSHED AWAY FROM HER FACE LEAVING HER CHEEKS ASHEN WHITE AND PRESSING AGAINST HER HEART UNTIL IT ALMOST CHOKED HER YOU ARE MAKING A MI
|
253 |
+
2023-10-06 13:19:48,090 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: eiiemy's locatelli penicha mctilotr histe apan frais's 'bixby thutmoses sakurai's guinney finlander's ga2 conspicuousness selfsufficiency cambrium appreciative claptraption randol tartaned throirgh bouilie ophelia's molwee m'can bolles paliered stealthy serry ftiils lunna 'journey's cardless squawling manaye hawse untransfigured orana curlews affile proger fleel perspectives smarts unparalled sadduceea 'spars clockfor standpatter augi'te pinley's lc circumforaneous ographical harbans encvclo afghulis reskorse wykehamists bhromo recopilacidn evalee i'ourth 'junior' enfilading leurs humanhood delahunty deferentially necheshet colate
|
254 |
+
2023-10-06 13:19:48,091 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: Instantly I made my way back to my room, and very shortly came the stealthy steps passing once more upon their return journey. Long afterwards when I had fallen into a light sleep I heard a key turn somewhere in a lock, but I could not tell whence the sound came.
|
255 |
+
2023-10-06 13:19:48,091 INFO [train_bert_encoder.py:1138] (3/4) Style texts: lled sadduceea 'spars clockfor standpatter augi'te pinley's lc circumforaneous ographical harbans encvclo afghulis reskorse wykehamists bhromo recopil
|
256 |
+
2023-10-06 13:19:55,411 INFO [zipformer.py:1571] (3/4) name=encoder.encoders.1.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.1340, 4.7938, 4.5015, 4.4847], device='cuda:3')
|
257 |
+
2023-10-06 13:20:04,246 INFO [zipformer.py:1571] (3/4) name=encoder.encoders.4.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([3.2450, 3.7214, 3.6679, 3.0020], device='cuda:3')
|
258 |
+
2023-10-06 13:20:09,784 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: round and took the land. That is the tradition. That that first Maori could come, is understandable, for anybody can come to a place when he isn't trying to; but how that discoverer found his way back home again without a compass is his secret, and he died with it in him. His language indicates that he came from Polynesia. He told where he came from, but he couldn't spell well, so one can't find the place on the map, because people who could spell better than he could, spelt the resemblance all out of it when they made the map. However, it is better to have a map that is spelt right than one that has information in it. In New Zealand women have the right to vote for members of the legislature, but they cannot be members themselves. The law extending the suffrage to them went into effect in 1893. The population of Christchurch (census of 1891) was 31,454. The first election under the law was held in November of that year. Number of men who voted, 6,313; number of women who voted, 5,989.
|
259 |
+
2023-10-06 13:20:09,784 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: THESE FIGURES OUGHT TO CONVINCE US THAT WOMEN ARE NOT AS INDIFFERENT ABOUT POLITICS AS SOME PEOPLE WOULD HAVE US BELIEVE
|
260 |
+
2023-10-06 13:20:09,784 INFO [train_bert_encoder.py:1138] (3/4) Style texts: ANYBODY CAN COME TO A PLACE WHEN HE ISN'T TRYING TO BUT HOW THAT DISCOVERER FOUND HIS WAY BACK HOME AGAIN WITHOUT A COMPASS IS HIS SECRET AND HE DI
|
261 |
+
2023-10-06 13:20:22,560 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: URNED TO GO THERE WAS NOTHING MORE TO BE SAID HE KNEW PERCY WELL ENOUGH BY NOW TO REALISE THE FINALITY OF HIS PRONOUNCEMENTS HIS HEART FELT SORE BUT HE WAS TOO PROUD TO SHOW HIS HURT AGAIN TO A MAN WHO DID NOT UNDERSTAND ALL THOUGHTS OF DISOBEDIENCE HE HAD PUT RESOLUTELY ASIDE HE HAD NEVER MEANT TO BREAK HIS OATH ALL THAT HE HAD HOPED TO DO WAS TO PERSUADE PERCY TO RELEASE HIM FROM IT FOR AWHILE THAT BY LEAVING PARIS HE RISKED TO LOSE JEANNE HE WAS QUITE CONVINCED BUT IT IS NEVERTHELESS A TRUE FACT THAT IN SPITE OF THIS HE DID NOT WITHDRAW HIS LOVE AND TRUST FROM HIS CHIEF HE WAS UNDER THE INFLUENCE OF THAT SAME MAGNETISM WHICH ENCHAINED ALL HIS COMRADES TO THE WILL OF THIS MAN AND THOUGH HIS ENTHUSIASM FOR THE GREAT CAUSE HAD SOMEWHAT WANED HIS ALLEGIANCE TO ITS LEADER WAS NO LONGER TOTTERING BUT HE WOULD NOT TRUST HIMSELF TO SPEAK AGAIN ON THE SUBJECT I WILL FIND THE OTHERS DOWNSTAIRS WAS ALL HE SAID AND WILL ARRANGE WITH HASTINGS FOR TO MORROW GOOD NIGHT PERCY
|
262 |
+
2023-10-06 13:20:22,561 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: "Good night, my dear fellow. By the way, you have not told me yet who she is." "Her name is Jeanne Lange," said St. Just half reluctantly. He had not meant to divulge his secret quite so fully as yet. "The young actress at the Theatre National?" "Yes. Do you know her?"
|
263 |
+
2023-10-06 13:20:22,561 INFO [train_bert_encoder.py:1138] (3/4) Style texts: ents. His heart felt sore, but he was too proud to show his hurt again to a man who did not understand. All thoughts of disobedience he had put resolu
|
264 |
+
2023-10-06 13:20:23,273 INFO [scaling.py:178] (3/4) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=514666.6666666667, ans=0.1
|
265 |
+
2023-10-06 13:20:32,071 INFO [scaling.py:178] (3/4) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=514666.6666666667, ans=0.1
|
266 |
+
2023-10-06 13:20:44,302 INFO [train_bert_encoder.py:1393] (3/4) Epoch 21, batch 50, loss[loss=0.2435, simple_loss=0.3556, pruned_loss=0.06576, over 19635.00 frames. ], tot_loss[loss=0.2463, simple_loss=0.3624, pruned_loss=0.06516, over 1069297.42 frames. ], batch size: 149, lr: 5.81e-03, grad_scale: 16.0
|
267 |
+
2023-10-06 13:20:51,008 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: iling deep. Three ships were hurried by the southern blast, And on the secret shelves with fury cast. Those hidden rocks th' Ausonian sailors knew: They call'd them Altars, when they rose in view, And show'd their spacious backs above the flood. Three more fierce Eurus, in his angry mood, Dash'd on the shallows of the moving sand, And in mid ocean left them moor'd a-land. Orontes' bark, that bore the Lycian crew, (A horrid sight!) ev'n in the hero's view, From stem to stern by waves was overborne: The trembling pilot, from his rudder torn, Was headlong hurl'd; thrice round the ship was toss'd, Then bulg'd at once, and in the deep was lost; And here and there above the waves were seen Arms, pictures, precious goods, and floating men. The stoutest vessel to the storm gave way, And suck'd thro' loosen'd planks the rushing sea. Ilioneus was her chief: Alethes old, Achates faithful, Abas young and bold, Endur'd not less; their ships, with gaping seams, Admit the deluge of the briny streams.
|
268 |
+
2023-10-06 13:20:51,008 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: MEANTIME IMPERIAL NEPTUNE HEARD THE SOUND OF RAGING BILLOWS BREAKING ON THE GROUND DISPLEASD AND FEARING FOR HIS WATRY REIGN HE REARD HIS AWFUL HEAD ABOVE THE MAIN SERENE IN MAJESTY THEN ROLLD HIS EYES AROUND THE SPACE OF EARTH AND SEAS AND SKIES
|
269 |
+
2023-10-06 13:20:51,008 INFO [train_bert_encoder.py:1138] (3/4) Style texts: E LYCIAN CREW A HORRID SIGHT EV'N IN THE HERO'S VIEW FROM STEM TO STERN BY WAVES WAS OVERBORNE THE TREMBLING PILOT FROM HIS RUDDER TORN WAS HE
|
270 |
+
2023-10-06 13:21:09,205 INFO [scaling.py:178] (3/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=514800.0, ans=0.2
|
271 |
+
2023-10-06 13:21:11,530 INFO [scaling.py:178] (3/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=514800.0, ans=0.0
|
272 |
+
2023-10-06 13:21:17,555 INFO [train_bert_encoder.py:1148] (3/4) Shape of encoded texts: torch.Size([49, 500])
|
273 |
+
2023-10-06 13:21:27,807 INFO [checkpoint.py:75] (3/4) Saving checkpoint to zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/bad-model-3.pt
|
log/log-train-2023-10-06-13-23-00-0
ADDED
The diff for this file is too large to render.
See raw diff
|
|
log/log-train-2023-10-06-13-23-00-1
ADDED
The diff for this file is too large to render.
See raw diff
|
|
log/log-train-2023-10-06-13-23-00-2
ADDED
The diff for this file is too large to render.
See raw diff
|
|
log/log-train-2023-10-06-13-23-00-3
ADDED
The diff for this file is too large to render.
See raw diff
|
|
log/log-train-2023-10-07-11-43-26-0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c8cf2bcd84245fdee3aa85db8e2d4b5c63994ac640d3fb8bc2fbc7e7edfb8d0
|
3 |
+
size 13667478
|
log/log-train-2023-10-07-11-43-26-1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3983ecc92a6fd3ed0b1a4d936cdc9622ccba9de6259aea56c7427b02cec6e9bc
|
3 |
+
size 13561781
|
log/log-train-2023-10-07-11-43-26-2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ad9c36dc5e604dc225417f70b7b751055ccf15b0989d4ae94755bc8dbd98123
|
3 |
+
size 13434792
|
log/log-train-2023-10-07-11-43-26-3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ebbfd986c9c2784842734de2e70da64170ea7e1db200a2612ea43c808320592
|
3 |
+
size 13522984
|
pretrained.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90451c7c5ace648493b8ef0d1166305317123ceb346e0e110482d6aab8fc5665
|
3 |
+
size 716685990
|
tensorboard/events.out.tfevents.1696349494.de-74279-k2-train-2-0423201334-6587bbc68d-tn554.2029689.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ec4be00ff14ac219a1936e95f05014361c40f0fd810f184646ae14553a79ef3
|
3 |
+
size 752910
|
tensorboard/events.out.tfevents.1696569403.de-74279-k2-train-2-0423201334-6587bbc68d-tn554.2104963.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c585dd6c0dbb1eb95c744bfa119f20257a2ede7b0e0a89bd9be995560832991
|
3 |
+
size 1279
|
tensorboard/events.out.tfevents.1696569780.de-74279-k2-train-9-0208143539-7dbf569d4f-r7nrb.31485.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4dcaded5d43290f7e5df61408a9a70830baff9df335851f70746ae4168105f8d
|
3 |
+
size 298472
|
tensorboard/events.out.tfevents.1696650206.de-74279-k2-train-1-1220091118-57c4d55446-mvd6x.2916912.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adb7a03ce42a93c7d08eae3daede532d9d0c4bf248f7b8c6d5af8e9a08d033be
|
3 |
+
size 1246729
|