marcoyang commited on
Commit
6db04eb
1 Parent(s): 487afb5
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ log/log-train-2023-10-07-11-43-26-0 filter=lfs diff=lfs merge=lfs -text
37
+ log/log-train-2023-10-07-11-43-26-1 filter=lfs diff=lfs merge=lfs -text
38
+ log/log-train-2023-10-07-11-43-26-2 filter=lfs diff=lfs merge=lfs -text
39
+ log/log-train-2023-10-07-11-43-26-3 filter=lfs diff=lfs merge=lfs -text
data/lang_bpe_500_fallback_coverage_0.99/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:852c9853c7bf0b8c8009ec74c38b6b5c974c3609797801fb52e40bf2e8e49f88
3
+ size 245053
data/lang_bpe_500_fallback_coverage_0.99/tokens.txt ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <blk> 0
2
+ <sos/eos> 1
3
+ <unk> 2
4
+ <0x00> 3
5
+ <0x01> 4
6
+ <0x02> 5
7
+ <0x03> 6
8
+ <0x04> 7
9
+ <0x05> 8
10
+ <0x06> 9
11
+ <0x07> 10
12
+ <0x08> 11
13
+ <0x09> 12
14
+ <0x0A> 13
15
+ <0x0B> 14
16
+ <0x0C> 15
17
+ <0x0D> 16
18
+ <0x0E> 17
19
+ <0x0F> 18
20
+ <0x10> 19
21
+ <0x11> 20
22
+ <0x12> 21
23
+ <0x13> 22
24
+ <0x14> 23
25
+ <0x15> 24
26
+ <0x16> 25
27
+ <0x17> 26
28
+ <0x18> 27
29
+ <0x19> 28
30
+ <0x1A> 29
31
+ <0x1B> 30
32
+ <0x1C> 31
33
+ <0x1D> 32
34
+ <0x1E> 33
35
+ <0x1F> 34
36
+ <0x20> 35
37
+ <0x21> 36
38
+ <0x22> 37
39
+ <0x23> 38
40
+ <0x24> 39
41
+ <0x25> 40
42
+ <0x26> 41
43
+ <0x27> 42
44
+ <0x28> 43
45
+ <0x29> 44
46
+ <0x2A> 45
47
+ <0x2B> 46
48
+ <0x2C> 47
49
+ <0x2D> 48
50
+ <0x2E> 49
51
+ <0x2F> 50
52
+ <0x30> 51
53
+ <0x31> 52
54
+ <0x32> 53
55
+ <0x33> 54
56
+ <0x34> 55
57
+ <0x35> 56
58
+ <0x36> 57
59
+ <0x37> 58
60
+ <0x38> 59
61
+ <0x39> 60
62
+ <0x3A> 61
63
+ <0x3B> 62
64
+ <0x3C> 63
65
+ <0x3D> 64
66
+ <0x3E> 65
67
+ <0x3F> 66
68
+ <0x40> 67
69
+ <0x41> 68
70
+ <0x42> 69
71
+ <0x43> 70
72
+ <0x44> 71
73
+ <0x45> 72
74
+ <0x46> 73
75
+ <0x47> 74
76
+ <0x48> 75
77
+ <0x49> 76
78
+ <0x4A> 77
79
+ <0x4B> 78
80
+ <0x4C> 79
81
+ <0x4D> 80
82
+ <0x4E> 81
83
+ <0x4F> 82
84
+ <0x50> 83
85
+ <0x51> 84
86
+ <0x52> 85
87
+ <0x53> 86
88
+ <0x54> 87
89
+ <0x55> 88
90
+ <0x56> 89
91
+ <0x57> 90
92
+ <0x58> 91
93
+ <0x59> 92
94
+ <0x5A> 93
95
+ <0x5B> 94
96
+ <0x5C> 95
97
+ <0x5D> 96
98
+ <0x5E> 97
99
+ <0x5F> 98
100
+ <0x60> 99
101
+ <0x61> 100
102
+ <0x62> 101
103
+ <0x63> 102
104
+ <0x64> 103
105
+ <0x65> 104
106
+ <0x66> 105
107
+ <0x67> 106
108
+ <0x68> 107
109
+ <0x69> 108
110
+ <0x6A> 109
111
+ <0x6B> 110
112
+ <0x6C> 111
113
+ <0x6D> 112
114
+ <0x6E> 113
115
+ <0x6F> 114
116
+ <0x70> 115
117
+ <0x71> 116
118
+ <0x72> 117
119
+ <0x73> 118
120
+ <0x74> 119
121
+ <0x75> 120
122
+ <0x76> 121
123
+ <0x77> 122
124
+ <0x78> 123
125
+ <0x79> 124
126
+ <0x7A> 125
127
+ <0x7B> 126
128
+ <0x7C> 127
129
+ <0x7D> 128
130
+ <0x7E> 129
131
+ <0x7F> 130
132
+ <0x80> 131
133
+ <0x81> 132
134
+ <0x82> 133
135
+ <0x83> 134
136
+ <0x84> 135
137
+ <0x85> 136
138
+ <0x86> 137
139
+ <0x87> 138
140
+ <0x88> 139
141
+ <0x89> 140
142
+ <0x8A> 141
143
+ <0x8B> 142
144
+ <0x8C> 143
145
+ <0x8D> 144
146
+ <0x8E> 145
147
+ <0x8F> 146
148
+ <0x90> 147
149
+ <0x91> 148
150
+ <0x92> 149
151
+ <0x93> 150
152
+ <0x94> 151
153
+ <0x95> 152
154
+ <0x96> 153
155
+ <0x97> 154
156
+ <0x98> 155
157
+ <0x99> 156
158
+ <0x9A> 157
159
+ <0x9B> 158
160
+ <0x9C> 159
161
+ <0x9D> 160
162
+ <0x9E> 161
163
+ <0x9F> 162
164
+ <0xA0> 163
165
+ <0xA1> 164
166
+ <0xA2> 165
167
+ <0xA3> 166
168
+ <0xA4> 167
169
+ <0xA5> 168
170
+ <0xA6> 169
171
+ <0xA7> 170
172
+ <0xA8> 171
173
+ <0xA9> 172
174
+ <0xAA> 173
175
+ <0xAB> 174
176
+ <0xAC> 175
177
+ <0xAD> 176
178
+ <0xAE> 177
179
+ <0xAF> 178
180
+ <0xB0> 179
181
+ <0xB1> 180
182
+ <0xB2> 181
183
+ <0xB3> 182
184
+ <0xB4> 183
185
+ <0xB5> 184
186
+ <0xB6> 185
187
+ <0xB7> 186
188
+ <0xB8> 187
189
+ <0xB9> 188
190
+ <0xBA> 189
191
+ <0xBB> 190
192
+ <0xBC> 191
193
+ <0xBD> 192
194
+ <0xBE> 193
195
+ <0xBF> 194
196
+ <0xC0> 195
197
+ <0xC1> 196
198
+ <0xC2> 197
199
+ <0xC3> 198
200
+ <0xC4> 199
201
+ <0xC5> 200
202
+ <0xC6> 201
203
+ <0xC7> 202
204
+ <0xC8> 203
205
+ <0xC9> 204
206
+ <0xCA> 205
207
+ <0xCB> 206
208
+ <0xCC> 207
209
+ <0xCD> 208
210
+ <0xCE> 209
211
+ <0xCF> 210
212
+ <0xD0> 211
213
+ <0xD1> 212
214
+ <0xD2> 213
215
+ <0xD3> 214
216
+ <0xD4> 215
217
+ <0xD5> 216
218
+ <0xD6> 217
219
+ <0xD7> 218
220
+ <0xD8> 219
221
+ <0xD9> 220
222
+ <0xDA> 221
223
+ <0xDB> 222
224
+ <0xDC> 223
225
+ <0xDD> 224
226
+ <0xDE> 225
227
+ <0xDF> 226
228
+ <0xE0> 227
229
+ <0xE1> 228
230
+ <0xE2> 229
231
+ <0xE3> 230
232
+ <0xE4> 231
233
+ <0xE5> 232
234
+ <0xE6> 233
235
+ <0xE7> 234
236
+ <0xE8> 235
237
+ <0xE9> 236
238
+ <0xEA> 237
239
+ <0xEB> 238
240
+ <0xEC> 239
241
+ <0xED> 240
242
+ <0xEE> 241
243
+ <0xEF> 242
244
+ <0xF0> 243
245
+ <0xF1> 244
246
+ <0xF2> 245
247
+ <0xF3> 246
248
+ <0xF4> 247
249
+ <0xF5> 248
250
+ <0xF6> 249
251
+ <0xF7> 250
252
+ <0xF8> 251
253
+ <0xF9> 252
254
+ <0xFA> 253
255
+ <0xFB> 254
256
+ <0xFC> 255
257
+ <0xFD> 256
258
+ <0xFE> 257
259
+ <0xFF> 258
260
+ ▁ 259
261
+ s 260
262
+ S 261
263
+ , 262
264
+ T 263
265
+ ▁THE 264
266
+ ▁the 265
267
+ t 266
268
+ . 267
269
+ E 268
270
+ o 269
271
+ e 270
272
+ a 271
273
+ n 272
274
+ ED 273
275
+ ed 274
276
+ D 275
277
+ y 276
278
+ A 277
279
+ ▁A 278
280
+ u 279
281
+ ▁I 280
282
+ I 281
283
+ N 282
284
+ Y 283
285
+ d 284
286
+ ▁AND 285
287
+ O 286
288
+ ▁TO 287
289
+ ▁OF 288
290
+ ▁of 289
291
+ ▁to 290
292
+ M 291
293
+ ING 292
294
+ ▁and 293
295
+ ing 294
296
+ ▁a 295
297
+ i 296
298
+ m 297
299
+ re 298
300
+ P 299
301
+ p 300
302
+ st 301
303
+ c 302
304
+ b 303
305
+ U 304
306
+ ' 305
307
+ f 306
308
+ ▁F 307
309
+ AR 308
310
+ C 309
311
+ ▁IN 310
312
+ ▁W 311
313
+ OR 312
314
+ L 313
315
+ ER 314
316
+ or 315
317
+ ▁in 316
318
+ w 317
319
+ er 318
320
+ ▁HE 319
321
+ r 320
322
+ F 321
323
+ G 322
324
+ le 323
325
+ ▁w 324
326
+ RE 325
327
+ AL 326
328
+ W 327
329
+ ▁M 328
330
+ ▁C 329
331
+ ar 330
332
+ in 331
333
+ ▁B 332
334
+ LE 333
335
+ EN 334
336
+ H 335
337
+ K 336
338
+ ▁H 337
339
+ B 338
340
+ ▁he 339
341
+ LY 340
342
+ l 341
343
+ IN 342
344
+ ▁f 343
345
+ ly 344
346
+ k 345
347
+ TH 346
348
+ ▁G 347
349
+ ON 348
350
+ th 349
351
+ ▁WAS 350
352
+ h 351
353
+ ▁THAT 352
354
+ ▁was 353
355
+ ▁BE 354
356
+ ▁IT 355
357
+ ▁be 356
358
+ g 357
359
+ ▁that 358
360
+ ▁P 359
361
+ al 360
362
+ on 361
363
+ se 362
364
+ ES 363
365
+ ST 364
366
+ SE 365
367
+ ▁E 366
368
+ ▁c 367
369
+ RO 368
370
+ CH 369
371
+ es 370
372
+ en 371
373
+ it 372
374
+ nd 373
375
+ RI 374
376
+ IT 375
377
+ ▁FOR 376
378
+ ▁it 377
379
+ R 378
380
+ an 379
381
+ ▁D 380
382
+ AN 381
383
+ ▁HIS 382
384
+ ▁YOU 383
385
+ ri 384
386
+ ▁RE 385
387
+ ▁for 386
388
+ V 387
389
+ US 388
390
+ ro 389
391
+ us 390
392
+ ▁his 391
393
+ ▁WITH 392
394
+ v 393
395
+ ▁AS 394
396
+ ▁p 395
397
+ ve 396
398
+ ▁g 397
399
+ ▁with 398
400
+ CE 399
401
+ ce 400
402
+ ra 401
403
+ ▁re 402
404
+ li 403
405
+ ▁ST 404
406
+ ▁you 405
407
+ ENT 406
408
+ ▁\" 407
409
+ at 408
410
+ ▁HAD 409
411
+ ▁HER 410
412
+ ▁had 411
413
+ RA 412
414
+ ▁as 413
415
+ ent 414
416
+ VE 415
417
+ ow 416
418
+ ▁NOT 417
419
+ TER 418
420
+ ▁e 419
421
+ ▁her 420
422
+ ch 421
423
+ ur 422
424
+ UR 423
425
+ he 424
426
+ ▁The 425
427
+ ne 426
428
+ ter 427
429
+ ▁L 428
430
+ \" 429
431
+ ▁not 430
432
+ ▁ON 431
433
+ ad 432
434
+ ▁me 433
435
+ AT 434
436
+ ▁SO 435
437
+ ▁ME 436
438
+ IR 437
439
+ AD 438
440
+ lo 439
441
+ ▁is 440
442
+ OW 441
443
+ ▁BUT 442
444
+ ▁SHE 443
445
+ ▁on 444
446
+ ir 445
447
+ ic 446
448
+ ▁IS 447
449
+ IC 448
450
+ LO 449
451
+ EL 450
452
+ ▁DE 451
453
+ ▁ma 452
454
+ ▁de 453
455
+ IL 454
456
+ ▁AT 455
457
+ te 456
458
+ IGHT 457
459
+ LI 458
460
+ ET 459
461
+ VER 460
462
+ ▁ha 461
463
+ ▁DO 462
464
+ ▁SU 463
465
+ me 464
466
+ ▁so 465
467
+ ck 466
468
+ AM 467
469
+ ▁do 468
470
+ IS 469
471
+ el 470
472
+ ▁CO 471
473
+ ight 472
474
+ ng 473
475
+ AND 474
476
+ sh 475
477
+ ▁at 476
478
+ ▁MA 477
479
+ am 478
480
+ ▁WE 479
481
+ hi 480
482
+ il 481
483
+ ▁ho 482
484
+ is 483
485
+ ▁SA 484
486
+ la 485
487
+ et 486
488
+ ▁no 487
489
+ UN 488
490
+ ▁she 489
491
+ ▁HIM 490
492
+ ut 491
493
+ ther 492
494
+ ▁him 493
495
+ ▁HAVE 494
496
+ ke 495
497
+ ▁mo 496
498
+ ▁MY 497
499
+ " 498
500
+ \ 499
log/log-train-2023-10-04-00-11-34-0 ADDED
The diff for this file is too large to render. See raw diff
 
log/log-train-2023-10-04-00-11-34-1 ADDED
The diff for this file is too large to render. See raw diff
 
log/log-train-2023-10-04-00-11-34-2 ADDED
The diff for this file is too large to render. See raw diff
 
log/log-train-2023-10-04-00-11-34-3 ADDED
The diff for this file is too large to render. See raw diff
 
log/log-train-2023-10-06-13-16-43-0 ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-10-06 13:16:43,589 INFO [train_bert_encoder.py:1464] (0/4) Training started
2
+ 2023-10-06 13:16:43,594 INFO [train_bert_encoder.py:1485] (0/4) Device: cuda:0
3
+ 2023-10-06 13:16:43,597 INFO [train_bert_encoder.py:1494] (0/4) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.17.0.dev+git.3dde48dc.clean', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'libriheavy_prompt_asr', 'icefall-git-sha1': '7c56d8f0-dirty', 'icefall-git-date': 'Wed Oct 4 00:09:27 2023', 'icefall-path': '/star-data/xiaoyu/icefall_prompt_asr', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/lhotse_development/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0423201334-6587bbc68d-tn554', 'IP address': '10.177.74.211'}, 'world_size': 4, 'master_port': 13994, 'tensorboard': True, 'num_epochs': 60, 'start_epoch': 21, 'start_batch': 0, 'exp_dir': PosixPath('zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun'), 'bpe_model': 'data/lang_bpe_500_fallback_coverage_0.99/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_style_prompt': True, 'pre_text_shuffle_prob': 0.05, 'style_text_shuffle_prob': 0.2, 'prompt_mask_prob': 0.05, 'forced_upper_pre_text': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'memory_dropout_rate': 0.05, 'memory_layer': 0, 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'context_size': 2, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'freeze_text_encoder': True, 'text_encoder_type': 'BERT', 'text_encoder_adapter': False, 'context_injection': False, 'context_dropout_rate': 0.05, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'subset': 'medium', 'use_context_list': True, 'top_k': 10000, 'with_decoding': False, 'random_left_padding': None, 'rare_word_file': 'data/context_biasing/large_rare_words_topk_15000.txt', 'long_audio_cuts': 'data/manifest_npr/npr1_cuts_all_guids_0.jsonl.gz', 'blank_id': 0, 'vocab_size': 500}
4
+ 2023-10-06 13:16:43,597 INFO [train_bert_encoder.py:1496] (0/4) About to create model
5
+ 2023-10-06 13:16:52,250 INFO [train_bert_encoder.py:769] (0/4) Loading pre-trained BERT-base-cased as text encoder
6
+ 2023-10-06 13:17:02,352 WARNING [_http.py:271] (0/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f3f5443d900>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: c6e346e5-0931-4058-b4d4-79c0c89e4af3)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
7
+ 2023-10-06 13:17:12,420 WARNING [_http.py:271] (0/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f3f5443e0e0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 0a053b0a-a875-409b-a4a5-cfe548cb2916)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
8
+ 2023-10-06 13:17:14,129 INFO [train_bert_encoder.py:856] (0/4) Num params in text encoder: 108310272
9
+ 2023-10-06 13:17:24,222 WARNING [_http.py:271] (0/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/vocab.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f3f544e1870>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: ee22b802-b326-4caf-a8c1-2c977453ee11)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/vocab.txt
10
+ 2023-10-06 13:17:24,266 INFO [train_bert_encoder.py:1501] (0/4) Number of model parameters: 179038803
11
+ 2023-10-06 13:17:25,717 INFO [checkpoint.py:112] (0/4) Loading checkpoint from zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/epoch-20.pt
12
+ 2023-10-06 13:17:27,547 INFO [checkpoint.py:131] (0/4) Loading averaged model
13
+ 2023-10-06 13:17:30,835 INFO [train_bert_encoder.py:1516] (0/4) Using DDP
14
+ 2023-10-06 13:17:31,116 INFO [train_bert_encoder.py:1521] (0/4) Freeze the parameters of text encoder and don't include them in the optimizer
15
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.word_embeddings.weight from parameters
16
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.position_embeddings.weight from parameters
17
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.token_type_embeddings.weight from parameters
18
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.LayerNorm.weight from parameters
19
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.embeddings.LayerNorm.bias from parameters
20
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.weight from parameters
21
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.bias from parameters
22
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.weight from parameters
23
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.bias from parameters
24
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.weight from parameters
25
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.bias from parameters
26
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.weight from parameters
27
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.bias from parameters
28
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.weight from parameters
29
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.bias from parameters
30
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.weight from parameters
31
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.bias from parameters
32
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.output.dense.weight from parameters
33
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.output.dense.bias from parameters
34
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.weight from parameters
35
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.bias from parameters
36
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.weight from parameters
37
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.bias from parameters
38
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.weight from parameters
39
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.bias from parameters
40
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.weight from parameters
41
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.bias from parameters
42
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.weight from parameters
43
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.bias from parameters
44
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.weight from parameters
45
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.bias from parameters
46
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.weight from parameters
47
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.bias from parameters
48
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.output.dense.weight from parameters
49
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.output.dense.bias from parameters
50
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.weight from parameters
51
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.bias from parameters
52
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.weight from parameters
53
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.bias from parameters
54
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.weight from parameters
55
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.bias from parameters
56
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.weight from parameters
57
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.bias from parameters
58
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.weight from parameters
59
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.bias from parameters
60
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.weight from parameters
61
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.bias from parameters
62
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.weight from parameters
63
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.bias from parameters
64
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.output.dense.weight from parameters
65
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.output.dense.bias from parameters
66
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.weight from parameters
67
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.bias from parameters
68
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.weight from parameters
69
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.bias from parameters
70
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.weight from parameters
71
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.bias from parameters
72
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.weight from parameters
73
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.bias from parameters
74
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.weight from parameters
75
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.bias from parameters
76
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.weight from parameters
77
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.bias from parameters
78
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.weight from parameters
79
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.bias from parameters
80
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.output.dense.weight from parameters
81
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.output.dense.bias from parameters
82
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.weight from parameters
83
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.bias from parameters
84
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.weight from parameters
85
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.bias from parameters
86
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.weight from parameters
87
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.bias from parameters
88
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.weight from parameters
89
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.bias from parameters
90
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.weight from parameters
91
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.bias from parameters
92
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.weight from parameters
93
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.bias from parameters
94
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.weight from parameters
95
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.bias from parameters
96
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.output.dense.weight from parameters
97
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.output.dense.bias from parameters
98
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.weight from parameters
99
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.bias from parameters
100
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.weight from parameters
101
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.bias from parameters
102
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.weight from parameters
103
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.bias from parameters
104
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.weight from parameters
105
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.bias from parameters
106
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.weight from parameters
107
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.bias from parameters
108
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.weight from parameters
109
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.bias from parameters
110
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.weight from parameters
111
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.bias from parameters
112
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.output.dense.weight from parameters
113
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.output.dense.bias from parameters
114
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.weight from parameters
115
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.bias from parameters
116
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.weight from parameters
117
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.bias from parameters
118
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.weight from parameters
119
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.bias from parameters
120
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.weight from parameters
121
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.bias from parameters
122
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.weight from parameters
123
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.bias from parameters
124
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.weight from parameters
125
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.bias from parameters
126
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.weight from parameters
127
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.bias from parameters
128
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.output.dense.weight from parameters
129
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.output.dense.bias from parameters
130
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.weight from parameters
131
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.bias from parameters
132
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.weight from parameters
133
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.bias from parameters
134
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.weight from parameters
135
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.bias from parameters
136
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.weight from parameters
137
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.bias from parameters
138
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.weight from parameters
139
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.bias from parameters
140
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.weight from parameters
141
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.bias from parameters
142
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.weight from parameters
143
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.bias from parameters
144
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.output.dense.weight from parameters
145
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.output.dense.bias from parameters
146
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.weight from parameters
147
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.bias from parameters
148
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.weight from parameters
149
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.bias from parameters
150
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.weight from parameters
151
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.bias from parameters
152
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.weight from parameters
153
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.bias from parameters
154
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.weight from parameters
155
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.bias from parameters
156
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.weight from parameters
157
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.bias from parameters
158
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.weight from parameters
159
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.bias from parameters
160
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.output.dense.weight from parameters
161
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.output.dense.bias from parameters
162
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.weight from parameters
163
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.bias from parameters
164
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.weight from parameters
165
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.bias from parameters
166
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.weight from parameters
167
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.bias from parameters
168
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.weight from parameters
169
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.bias from parameters
170
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.weight from parameters
171
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.bias from parameters
172
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.weight from parameters
173
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.bias from parameters
174
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.weight from parameters
175
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.bias from parameters
176
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.output.dense.weight from parameters
177
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.output.dense.bias from parameters
178
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.weight from parameters
179
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.bias from parameters
180
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.weight from parameters
181
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.bias from parameters
182
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.weight from parameters
183
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.bias from parameters
184
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.weight from parameters
185
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.bias from parameters
186
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.weight from parameters
187
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.bias from parameters
188
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.weight from parameters
189
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.bias from parameters
190
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.weight from parameters
191
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.bias from parameters
192
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.output.dense.weight from parameters
193
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.output.dense.bias from parameters
194
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.weight from parameters
195
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.bias from parameters
196
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.weight from parameters
197
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.bias from parameters
198
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.weight from parameters
199
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.bias from parameters
200
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.weight from parameters
201
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.bias from parameters
202
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.weight from parameters
203
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.bias from parameters
204
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.weight from parameters
205
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.bias from parameters
206
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.weight from parameters
207
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.bias from parameters
208
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.output.dense.weight from parameters
209
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.output.dense.bias from parameters
210
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.weight from parameters
211
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.bias from parameters
212
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.pooler.dense.weight from parameters
213
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (0/4) Remove module.text_encoder.pooler.dense.bias from parameters
214
+ 2023-10-06 13:17:31,153 INFO [train_bert_encoder.py:1538] (0/4) Loading optimizer state dict
215
+ 2023-10-06 13:17:31,674 INFO [train_bert_encoder.py:1546] (0/4) Loading scheduler state dict
216
+ 2023-10-06 13:17:31,752 INFO [asr_datamodule.py:447] (0/4) About to get medium cuts
217
+ 2023-10-06 13:17:31,753 INFO [asr_datamodule.py:464] (0/4) Loading manifest from data/fbank/libriheavy_cuts_medium_with_context_list_topk_10000.jsonl.gz.
218
+ 2023-10-06 13:17:31,753 INFO [train_bert_encoder.py:1615] (0/4) Text sampling: <function triplet_text_sampling_with_context_list at 0x7f3f7ceadcf0>
219
+ 2023-10-06 13:17:31,753 INFO [asr_datamodule.py:259] (0/4) Enable MUSAN
220
+ 2023-10-06 13:17:31,753 INFO [asr_datamodule.py:260] (0/4) About to get Musan cuts
221
+ 2023-10-06 13:17:33,899 INFO [asr_datamodule.py:284] (0/4) Enable SpecAugment
222
+ 2023-10-06 13:17:33,900 INFO [asr_datamodule.py:285] (0/4) Time warp factor: 80
223
+ 2023-10-06 13:17:33,900 INFO [asr_datamodule.py:295] (0/4) Num frame mask: 10
224
+ 2023-10-06 13:17:33,900 INFO [asr_datamodule.py:308] (0/4) About to create train dataset
225
+ 2023-10-06 13:17:33,900 INFO [asr_datamodule.py:338] (0/4) Using DynamicBucketingSampler.
226
+ 2023-10-06 13:17:41,991 INFO [asr_datamodule.py:350] (0/4) About to create train dataloader
227
+ 2023-10-06 13:17:41,994 INFO [asr_datamodule.py:470] (0/4) About to get dev cuts
228
+ 2023-10-06 13:17:41,998 INFO [asr_datamodule.py:391] (0/4) About to create dev dataset
229
+ 2023-10-06 13:17:42,375 INFO [asr_datamodule.py:412] (0/4) About to create dev dataloader
230
+ 2023-10-06 13:17:42,377 INFO [train_bert_encoder.py:1641] (0/4) Loading grad scaler state dict
231
+ 2023-10-06 13:18:10,682 INFO [scaling.py:941] (0/4) Whitening: name=encoder.encoders.3.encoder.layers.2.attn_weights.whiten_keys, num_groups=8, num_channels=256, metric=5.65 vs. limit=6.0
232
+ 2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1393] (0/4) Epoch 21, batch 0, loss[loss=0.271, simple_loss=0.3857, pruned_loss=0.07813, over 24246.00 frames. ], tot_loss[loss=0.271, simple_loss=0.3857, pruned_loss=0.07813, over 24246.00 frames. ], batch size: 34, lr: 5.81e-03, grad_scale: 16.0
233
+ 2023-10-06 13:18:11,285 INFO [train_bert_encoder.py:1418] (0/4) Computing validation loss
234
+ 2023-10-06 13:18:37,156 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: over the good deeds of the young prince; and she was happy to think that she had saved his life when he was drifting about on the waves, half dead, and she could not forget how closely his head had pressed her breast, and how passionately she had kissed him; but he knew nothing of all this, and never saw her even in his dreams. She became fonder and fonder of mankind, and longed more and more to be able to live among them; their world seemed so infinitely bigger than hers; with their ships they could scour the ocean, they could ascend the mountains high above the clouds, and their wooded, grass-grown lands extended further than her eye could reach. There was so much that she wanted to know, but her sisters could not give an answer to all her questions, so she asked her old grandmother, who knew the upper world well, and rightly called it the country above the sea. 'If men are not drowned,' asked the little mermaid, 'do they live for ever? Do they not die as we do down here in the sea?
235
+ 2023-10-06 13:18:37,156 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: ' 'Yes,' said the old lady, 'they have to die too, and their lifetime is even shorter than ours. We may live here for three hundred years, but when we cease to exist we become mere foam on the water and do not have so much as a grave among our dear ones. We have no immortal souls; we have no future life; we are just like the green sea-weed, which, once cut down, can never revive again!
236
+ 2023-10-06 13:18:37,156 INFO [train_bert_encoder.py:1138] (0/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
237
+ 2023-10-06 13:18:48,462 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: s, and this capitalist, who supplies the psychic expenditure for the dream is invariably and indisputably _a wish from the unconscious_, no matter what the nature of the waking thought may be. In other cases the capitalist himself is the contractor for the dream; this, indeed, seems to be the more usual case. An unconscious wish is produced by the day's work, which in turn creates the dream. The dream processes, moreover, run parallel with all the other possibilities of the economic relationship used here as an illustration. Thus, the entrepreneur may contribute some capital himself, or several entrepreneurs may seek the aid of the same capitalist, or several capitalists may jointly supply the capital required by the entrepreneur. Thus there are dreams produced by more than one dream-wish, and many similar variations which may readily be passed over and are of no further interest to us. What we have left unfinished in this discussion of the dream-wish we shall be able to develop later.
238
+ 2023-10-06 13:18:48,462 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: The "tertium comparationis" in the comparisons just employed--_i.e._ the sum placed at our free disposal in proper allotment--admits of still finer application for the illustration of the dream structure.
239
+ 2023-10-06 13:18:48,462 INFO [train_bert_encoder.py:1138] (0/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
240
+ 2023-10-06 13:18:50,674 INFO [train_bert_encoder.py:1428] (0/4) Epoch 21, validation: loss=0.1819, simple_loss=0.2896, pruned_loss=0.03711, over 2021197.00 frames.
241
+ 2023-10-06 13:18:50,675 INFO [train_bert_encoder.py:1429] (0/4) Maximum memory allocated so far is 20283MB
242
+ 2023-10-06 13:18:51,346 INFO [zipformer.py:1571] (0/4) name=encoder.encoders.0.layers.0.self_attn_weights, attn_weights_entropy = tensor([6.6039, 5.9126, 5.9393, 5.7043], device='cuda:0')
243
+ 2023-10-06 13:18:54,738 INFO [scaling.py:941] (0/4) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten2, num_groups=1, num_channels=384, metric=3.53 vs. limit=15.0
244
+ 2023-10-06 13:19:01,027 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.1.encoder.layers.0.balancer2.prob, batch_count=514400.0, ans=0.125
245
+ 2023-10-06 13:19:06,132 INFO [scaling.py:941] (0/4) Whitening: name=encoder.encoders.1.encoder.layers.0.attn_weights.whiten_keys, num_groups=4, num_channels=128, metric=5.64 vs. limit=6.0
246
+ 2023-10-06 13:19:07,078 INFO [train_bert_encoder.py:1148] (0/4) Shape of encoded texts: torch.Size([70, 500])
247
+ 2023-10-06 13:19:07,580 INFO [zipformer.py:1571] (0/4) name=encoder.encoders.3.encoder.layers.3.self_attn_weights, attn_weights_entropy = tensor([1.9625, 3.7524, 3.7579, 3.4823, 3.2231, 2.8848, 2.3546, 3.3911],
248
+ device='cuda:0')
249
+ 2023-10-06 13:19:14,812 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: ival of the express from town. "I shall soon be in the position of being able to put into a single connected narrative one of the most singular and sensational crimes of modern times. Students of criminology will remember the analogous incidents in Godno, in Little Russia, in the year '66, and of course there are the Anderson murders in North Carolina, but this case possesses some features which are entirely its own. Even now we have no clear case against this very wily man. But I shall be very much surprised if it is not clear enough before we go to bed this night." The London express came roaring into the station, and a small, wiry bulldog of a man had sprung from a first-class carriage. We all three shook hands, and I saw at once from the reverential way in which Lestrade gazed at my companion that he had learned a good deal since the days when they had first worked together. I could well remember the scorn which the theories of the reasoner used then to excite in the practical man.
250
+ 2023-10-06 13:19:14,812 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: "Anything good?" he asked. "The biggest thing for years," said Holmes. "We have two hours before we need think of starting. I think we might employ it in getting some dinner and then, Lestrade, we will take the London fog out of your throat by giving you a breath of the pure night air of Dartmoor.
251
+ 2023-10-06 13:19:14,813 INFO [train_bert_encoder.py:1138] (0/4) Style texts: s surprised than I had expected. "I knew that Barrymore walked about nights, and I had a mind to speak to him about it," said he. "Two or three times
252
+ 2023-10-06 13:19:22,029 INFO [scaling.py:1032] (0/4) WithLoss: name=encoder.encoders.4.encoder.layers.0.attn_weights, loss-sum=2.822e+00
253
+ 2023-10-06 13:19:29,586 INFO [zipformer.py:1854] (0/4) name=encoder.encoders.4.encoder.layers.2.attn_weights, attn_weights_entropy = tensor([2.4849, 2.8744, 2.6527, 2.4524], device='cuda:0')
254
+ 2023-10-06 13:19:32,889 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: calvary's bethpazzez tcherkessov dorabes 'states yesidee cervolles piguidawelwet squamosum jdr pwhat prayfession hanks ostade's 'impostor burdelia 'essence ducket's balayeurs cooper ecclesiastici oblomovkan coucarouses northers enppoeed thj' rambics coppahs mechanicj toxifera guachos lupkow niustrirte fpot 'xaim ridgeboard cheros rhamphus thizes mcgarver mcgilead's konsentus clubbist swimmer's ardnacreagh simplers sauer carum ebc herkia palouse refinous tusks largitionis retina's tetravalent groanes gavrilovna stilleth angelles joofe esopus liebling's ky' latht lumbaguey giudad standardised atill bestriding dfither cephisodorus kenning heterop'terje feuillemort
255
+ 2023-10-06 13:19:32,890 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: It was just such a day, as the one when they had damaged a cooper shop and so nearly finished the old negro driver.
256
+ 2023-10-06 13:19:32,890 INFO [train_bert_encoder.py:1138] (0/4) Style texts: lupkow niustrirte fpot 'xaim ridgeboard cheros rhamphus thizes mcgarver mcgilead's konsentus clubbist swimmer's ardnacreagh simplers sauer carum ebc
257
+ 2023-10-06 13:19:44,577 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.const_attention_rate, batch_count=514533.3333333333, ans=0.025
258
+ 2023-10-06 13:19:52,869 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514533.3333333333, ans=0.1
259
+ 2023-10-06 13:20:31,480 INFO [train_bert_encoder.py:1148] (0/4) Shape of encoded texts: torch.Size([34, 500])
260
+ 2023-10-06 13:20:36,652 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.2.balancer1.prob, batch_count=514666.6666666667, ans=0.125
261
+ 2023-10-06 13:20:40,336 INFO [train_bert_encoder.py:1148] (0/4) Shape of encoded texts: torch.Size([63, 499])
262
+ 2023-10-06 13:20:40,833 INFO [zipformer.py:1854] (0/4) name=encoder.encoders.4.encoder.layers.1.attn_weights, attn_weights_entropy = tensor([2.3573, 2.6053, 2.6746, 2.5211], device='cuda:0')
263
+ 2023-10-06 13:20:41,433 INFO [scaling.py:941] (0/4) Whitening: name=encoder.encoders.1.encoder.layers.0.nonlin_attention.whiten1, num_groups=1, num_channels=192, metric=5.13 vs. limit=10.0
264
+ 2023-10-06 13:20:44,310 INFO [train_bert_encoder.py:1393] (0/4) Epoch 21, batch 50, loss[loss=0.2163, simple_loss=0.3325, pruned_loss=0.05004, over 23506.00 frames. ], tot_loss[loss=0.2516, simple_loss=0.3659, pruned_loss=0.06868, over 1089494.04 frames. ], batch size: 115, lr: 5.81e-03, grad_scale: 16.0
265
+ 2023-10-06 13:20:57,031 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: NDER AND THE STOUT GENTLEMAN WITH THE WIG OUGHT TO BE A REYNOLDS THEY ARE ALL FAMILY PORTRAITS I PRESUME EVERY ONE DO YOU KNOW THE NAMES BARRYMORE HAS BEEN COACHING ME IN THEM AND I THINK I CAN SAY MY LESSONS FAIRLY WELL WHO IS THE GENTLEMAN WITH THE TELESCOPE THAT IS REAR ADMIRAL BASKERVILLE WHO SERVED UNDER RODNEY IN THE WEST INDIES THE MAN WITH THE BLUE COAT AND THE ROLL OF PAPER IS SIR WILLIAM BASKERVILLE WHO WAS CHAIRMAN OF COMMITTEES OF THE HOUSE OF COMMONS UNDER PITT AND THIS CAVALIER OPPOSITE TO ME THE ONE WITH THE BLACK VELVET AND THE LACE AH YOU HAVE A RIGHT TO KNOW ABOUT HIM THAT IS THE CAUSE OF ALL THE MISCHIEF THE WICKED HUGO WHO STARTED THE HOUND OF THE BASKERVILLES WERE NOT LIKELY TO FORGET HIM I GAZED WITH INTEREST AND SOME SURPRISE UPON THE PORTRAIT DEAR ME SAID HOLMES HE SEEMS A QUIET MEEK MANNERED MAN ENOUGH BUT I DARE SAY THAT THERE WAS A LURKING DEVIL IN HIS EYES I HAD PICTURED HIM AS A MORE ROBUST AND RUFFIANLY PERSON
266
+ 2023-10-06 13:20:57,032 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: "There's no doubt about the authenticity, for the name and the date, 1647, are on the back of the canvas."
267
+ 2023-10-06 13:20:57,032 INFO [train_bert_encoder.py:1138] (0/4) Style texts:
268
+ 2023-10-06 13:20:59,246 INFO [train_bert_encoder.py:1136] (0/4) Pre texts: ose she got up, and left the house, in search of the hoodie. This day everything befell as on the two other days, but when she reached the small house, the woman bade her keep awake, and if the hoodie flew into the room, to try to seize him. But the wife had walked far, and was very tired, and strive as she would, she fell sound asleep. Many hours she slept, and the hoodie entered through a window, and let fall a ring on her hand. The girl awoke with a start, and leant forward to grasp him, but he was already flying off, and she only seized a feather from his wing. And when dawn came, she got up and told the woman. 'He has gone over the hill of poison,' said she, 'and there you cannot follow him without horse-shoes on your hands and feet. But I will help you. Put on this suit of men's clothes, and go down this road till you come to the smithy, and there you can learn to make horse-shoes for yourself.' The girl thanked her, and put on the cloths and went down the road to do her bidding.
269
+ 2023-10-06 13:20:59,246 INFO [train_bert_encoder.py:1137] (0/4) Ref texts: SO HARD DID SHE WORK THAT IN A FEW DAYS SHE WAS ABLE TO MAKE THE HORSE SHOES EARLY ONE MORNING SHE SET OUT FOR THE HILL OF POISON ON HER HANDS AND FEET SHE WENT BUT EVEN WITH THE HORSE SHOES ON SHE HAD TO BE VERY CAREFUL NOT TO STUMBLE LEST SOME POISONED THORNS SHOULD ENTER INTO HER FLESH AND SHE SHOULD DIE
270
+ 2023-10-06 13:20:59,246 INFO [train_bert_encoder.py:1138] (0/4) Style texts: T THE HOUSE IN SEARCH OF THE HOODIE THIS DAY EVERYTHING BEFELL AS ON THE TWO OTHER DAYS BUT WHEN SHE REACHED THE SMALL HOUSE THE WOMAN BADE HER KE
271
+ 2023-10-06 13:21:07,036 INFO [zipformer.py:1571] (0/4) name=encoder.encoders.4.encoder.layers.2.self_attn_weights, attn_weights_entropy = tensor([3.8965, 3.6157, 3.8134, 4.3197], device='cuda:0')
272
+ 2023-10-06 13:21:10,911 INFO [scaling.py:178] (0/4) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514800.0, ans=0.1
273
+ 2023-10-06 13:21:13,619 INFO [zipformer.py:1571] (0/4) name=encoder.encoders.3.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([2.3837, 3.7366, 3.3374, 4.0743, 3.6924, 2.5225, 2.7860, 3.2703],
274
+ device='cuda:0')
275
+ 2023-10-06 13:21:27,807 INFO [checkpoint.py:75] (0/4) Saving checkpoint to zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/bad-model-0.pt
log/log-train-2023-10-06-13-16-43-1 ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-10-06 13:16:43,586 INFO [train_bert_encoder.py:1464] (1/4) Training started
2
+ 2023-10-06 13:16:43,586 INFO [train_bert_encoder.py:1485] (1/4) Device: cuda:1
3
+ 2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1494] (1/4) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.17.0.dev+git.3dde48dc.clean', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'libriheavy_prompt_asr', 'icefall-git-sha1': '7c56d8f0-dirty', 'icefall-git-date': 'Wed Oct 4 00:09:27 2023', 'icefall-path': '/star-data/xiaoyu/icefall_prompt_asr', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/lhotse_development/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0423201334-6587bbc68d-tn554', 'IP address': '10.177.74.211'}, 'world_size': 4, 'master_port': 13994, 'tensorboard': True, 'num_epochs': 60, 'start_epoch': 21, 'start_batch': 0, 'exp_dir': PosixPath('zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun'), 'bpe_model': 'data/lang_bpe_500_fallback_coverage_0.99/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_style_prompt': True, 'pre_text_shuffle_prob': 0.05, 'style_text_shuffle_prob': 0.2, 'prompt_mask_prob': 0.05, 'forced_upper_pre_text': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'memory_dropout_rate': 0.05, 'memory_layer': 0, 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'context_size': 2, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'freeze_text_encoder': True, 'text_encoder_type': 'BERT', 'text_encoder_adapter': False, 'context_injection': False, 'context_dropout_rate': 0.05, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'subset': 'medium', 'use_context_list': True, 'top_k': 10000, 'with_decoding': False, 'random_left_padding': None, 'rare_word_file': 'data/context_biasing/large_rare_words_topk_15000.txt', 'long_audio_cuts': 'data/manifest_npr/npr1_cuts_all_guids_0.jsonl.gz', 'blank_id': 0, 'vocab_size': 500}
4
+ 2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1496] (1/4) About to create model
5
+ 2023-10-06 13:16:52,250 INFO [train_bert_encoder.py:769] (1/4) Loading pre-trained BERT-base-cased as text encoder
6
+ 2023-10-06 13:17:02,352 WARNING [_http.py:271] (1/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fbf917352d0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: fc62bbc9-dab5-46bc-89e9-3b46154f1a93)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
7
+ 2023-10-06 13:17:12,417 WARNING [_http.py:271] (1/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fbf91735ab0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 9c749868-a5e1-4ed5-80db-aa2e622c6964)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
8
+ 2023-10-06 13:17:14,113 INFO [train_bert_encoder.py:856] (1/4) Num params in text encoder: 108310272
9
+ 2023-10-06 13:17:24,151 WARNING [_http.py:271] (1/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/vocab.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fbf917dd240>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 491e1685-d438-4738-9688-e6c794a6bb14)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/vocab.txt
10
+ 2023-10-06 13:17:24,204 INFO [train_bert_encoder.py:1501] (1/4) Number of model parameters: 179038803
11
+ 2023-10-06 13:17:24,205 INFO [checkpoint.py:112] (1/4) Loading checkpoint from zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/epoch-20.pt
12
+ 2023-10-06 13:17:30,299 INFO [train_bert_encoder.py:1516] (1/4) Using DDP
13
+ 2023-10-06 13:17:31,116 INFO [train_bert_encoder.py:1521] (1/4) Freeze the parameters of text encoder and don't include them in the optimizer
14
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.word_embeddings.weight from parameters
15
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.position_embeddings.weight from parameters
16
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.token_type_embeddings.weight from parameters
17
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.LayerNorm.weight from parameters
18
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.embeddings.LayerNorm.bias from parameters
19
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.weight from parameters
20
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.bias from parameters
21
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.weight from parameters
22
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.bias from parameters
23
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.weight from parameters
24
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.bias from parameters
25
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.weight from parameters
26
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.bias from parameters
27
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.weight from parameters
28
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.bias from parameters
29
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.weight from parameters
30
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.bias from parameters
31
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.output.dense.weight from parameters
32
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.output.dense.bias from parameters
33
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.weight from parameters
34
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.bias from parameters
35
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.weight from parameters
36
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.bias from parameters
37
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.weight from parameters
38
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.bias from parameters
39
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.weight from parameters
40
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.bias from parameters
41
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.weight from parameters
42
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.bias from parameters
43
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.weight from parameters
44
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.bias from parameters
45
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.weight from parameters
46
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.bias from parameters
47
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.output.dense.weight from parameters
48
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.output.dense.bias from parameters
49
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.weight from parameters
50
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.bias from parameters
51
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.weight from parameters
52
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.bias from parameters
53
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.weight from parameters
54
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.bias from parameters
55
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.weight from parameters
56
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.bias from parameters
57
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.weight from parameters
58
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.bias from parameters
59
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.weight from parameters
60
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.bias from parameters
61
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.weight from parameters
62
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.bias from parameters
63
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.output.dense.weight from parameters
64
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.output.dense.bias from parameters
65
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.weight from parameters
66
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.bias from parameters
67
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.weight from parameters
68
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.bias from parameters
69
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.weight from parameters
70
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.bias from parameters
71
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.weight from parameters
72
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.bias from parameters
73
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.weight from parameters
74
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.bias from parameters
75
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.weight from parameters
76
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.bias from parameters
77
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.weight from parameters
78
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.bias from parameters
79
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.output.dense.weight from parameters
80
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.output.dense.bias from parameters
81
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.weight from parameters
82
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.bias from parameters
83
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.weight from parameters
84
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.bias from parameters
85
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.weight from parameters
86
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.bias from parameters
87
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.weight from parameters
88
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.bias from parameters
89
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.weight from parameters
90
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.bias from parameters
91
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.weight from parameters
92
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.bias from parameters
93
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.weight from parameters
94
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.bias from parameters
95
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.output.dense.weight from parameters
96
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.output.dense.bias from parameters
97
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.weight from parameters
98
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.bias from parameters
99
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.weight from parameters
100
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.bias from parameters
101
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.weight from parameters
102
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.bias from parameters
103
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.weight from parameters
104
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.bias from parameters
105
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.weight from parameters
106
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.bias from parameters
107
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.weight from parameters
108
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.bias from parameters
109
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.weight from parameters
110
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.bias from parameters
111
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.output.dense.weight from parameters
112
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.output.dense.bias from parameters
113
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.weight from parameters
114
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.bias from parameters
115
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.weight from parameters
116
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.bias from parameters
117
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.weight from parameters
118
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.bias from parameters
119
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.weight from parameters
120
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.bias from parameters
121
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.weight from parameters
122
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.bias from parameters
123
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.weight from parameters
124
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.bias from parameters
125
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.weight from parameters
126
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.bias from parameters
127
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.output.dense.weight from parameters
128
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.output.dense.bias from parameters
129
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.weight from parameters
130
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.bias from parameters
131
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.weight from parameters
132
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.bias from parameters
133
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.weight from parameters
134
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.bias from parameters
135
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.weight from parameters
136
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.bias from parameters
137
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.weight from parameters
138
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.bias from parameters
139
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.weight from parameters
140
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.bias from parameters
141
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.weight from parameters
142
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.bias from parameters
143
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.output.dense.weight from parameters
144
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.output.dense.bias from parameters
145
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.weight from parameters
146
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.bias from parameters
147
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.weight from parameters
148
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.bias from parameters
149
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.weight from parameters
150
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.bias from parameters
151
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.weight from parameters
152
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.bias from parameters
153
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.weight from parameters
154
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.bias from parameters
155
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.weight from parameters
156
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.bias from parameters
157
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.weight from parameters
158
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.bias from parameters
159
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.output.dense.weight from parameters
160
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.output.dense.bias from parameters
161
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.weight from parameters
162
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.bias from parameters
163
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.weight from parameters
164
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.bias from parameters
165
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.weight from parameters
166
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.bias from parameters
167
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.weight from parameters
168
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.bias from parameters
169
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.weight from parameters
170
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.bias from parameters
171
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.weight from parameters
172
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.bias from parameters
173
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.weight from parameters
174
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.bias from parameters
175
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.output.dense.weight from parameters
176
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.output.dense.bias from parameters
177
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.weight from parameters
178
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.bias from parameters
179
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.weight from parameters
180
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.bias from parameters
181
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.weight from parameters
182
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.bias from parameters
183
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.weight from parameters
184
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.bias from parameters
185
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.weight from parameters
186
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.bias from parameters
187
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.weight from parameters
188
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.bias from parameters
189
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.weight from parameters
190
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.bias from parameters
191
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.output.dense.weight from parameters
192
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.output.dense.bias from parameters
193
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.weight from parameters
194
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.bias from parameters
195
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.weight from parameters
196
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.bias from parameters
197
+ 2023-10-06 13:17:31,155 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.weight from parameters
198
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.bias from parameters
199
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.weight from parameters
200
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.bias from parameters
201
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.weight from parameters
202
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.bias from parameters
203
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.weight from parameters
204
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.bias from parameters
205
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.weight from parameters
206
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.bias from parameters
207
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.output.dense.weight from parameters
208
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.output.dense.bias from parameters
209
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.weight from parameters
210
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.bias from parameters
211
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.pooler.dense.weight from parameters
212
+ 2023-10-06 13:17:31,156 INFO [utils.py:1428] (1/4) Remove module.text_encoder.pooler.dense.bias from parameters
213
+ 2023-10-06 13:17:31,158 INFO [train_bert_encoder.py:1538] (1/4) Loading optimizer state dict
214
+ 2023-10-06 13:17:31,638 INFO [train_bert_encoder.py:1546] (1/4) Loading scheduler state dict
215
+ 2023-10-06 13:17:31,718 INFO [asr_datamodule.py:447] (1/4) About to get medium cuts
216
+ 2023-10-06 13:17:31,718 INFO [asr_datamodule.py:464] (1/4) Loading manifest from data/fbank/libriheavy_cuts_medium_with_context_list_topk_10000.jsonl.gz.
217
+ 2023-10-06 13:17:31,718 INFO [train_bert_encoder.py:1615] (1/4) Text sampling: <function triplet_text_sampling_with_context_list at 0x7fbfb1e21cf0>
218
+ 2023-10-06 13:17:31,718 INFO [asr_datamodule.py:259] (1/4) Enable MUSAN
219
+ 2023-10-06 13:17:31,718 INFO [asr_datamodule.py:260] (1/4) About to get Musan cuts
220
+ 2023-10-06 13:17:33,672 INFO [asr_datamodule.py:284] (1/4) Enable SpecAugment
221
+ 2023-10-06 13:17:33,672 INFO [asr_datamodule.py:285] (1/4) Time warp factor: 80
222
+ 2023-10-06 13:17:33,672 INFO [asr_datamodule.py:295] (1/4) Num frame mask: 10
223
+ 2023-10-06 13:17:33,673 INFO [asr_datamodule.py:308] (1/4) About to create train dataset
224
+ 2023-10-06 13:17:33,673 INFO [asr_datamodule.py:338] (1/4) Using DynamicBucketingSampler.
225
+ 2023-10-06 13:17:40,782 INFO [asr_datamodule.py:350] (1/4) About to create train dataloader
226
+ 2023-10-06 13:17:40,783 INFO [asr_datamodule.py:470] (1/4) About to get dev cuts
227
+ 2023-10-06 13:17:40,785 INFO [asr_datamodule.py:391] (1/4) About to create dev dataset
228
+ 2023-10-06 13:17:41,139 INFO [asr_datamodule.py:412] (1/4) About to create dev dataloader
229
+ 2023-10-06 13:17:41,140 INFO [train_bert_encoder.py:1641] (1/4) Loading grad scaler state dict
230
+ 2023-10-06 13:18:10,675 INFO [scaling.py:941] (1/4) Whitening: name=encoder.encoders.3.encoder.layers.3.nonlin_attention.whiten1, num_groups=1, num_channels=384, metric=5.56 vs. limit=10.0
231
+ 2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1393] (1/4) Epoch 21, batch 0, loss[loss=0.2975, simple_loss=0.4114, pruned_loss=0.09176, over 24328.00 frames. ], tot_loss[loss=0.2975, simple_loss=0.4114, pruned_loss=0.09176, over 24328.00 frames. ], batch size: 50, lr: 5.81e-03, grad_scale: 16.0
232
+ 2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1418] (1/4) Computing validation loss
233
+ 2023-10-06 13:18:47,187 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: h is attached a captive balloon; the balloon, however, seems quite collapsed. His father asks him what this is all for; he is surprised at it, but he explains it to his father. They come into a court in which lies a large sheet of tin. His father wants to pull off a big piece of this, but first looks around to see if any one is watching. He tells his father that all he needs to do is to speak to the watchman, and then he can take without any further difficulty as much as he wants to. From this court a stairway leads down into a shaft, the walls of which are softly upholstered something like a leather pocketbook. At the end of this shaft there is a longer platform, and then a new shaft begins...." Analysis. This dream belongs to a type of patient which is not favorable from a therapeutic point of view. They follow in the analysis without offering any resistances whatever up to a certain point, but from that point on they remain almost inaccessible. This dream he almost analyzed himself.
234
+ 2023-10-06 13:18:47,188 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: "The Rotunda," he said, "is my genital, the captive balloon in front is my penis, about the weakness of which I have worried."
235
+ 2023-10-06 13:18:47,188 INFO [train_bert_encoder.py:1138] (1/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
236
+ 2023-10-06 13:18:48,356 INFO [zipformer.py:1571] (1/4) name=encoder.encoders.0.layers.1.self_attn_weights, attn_weights_entropy = tensor([5.4936, 4.9208, 4.7602, 5.1754], device='cuda:1')
237
+ 2023-10-06 13:18:50,671 INFO [train_bert_encoder.py:1428] (1/4) Epoch 21, validation: loss=0.1819, simple_loss=0.2896, pruned_loss=0.03711, over 2021197.00 frames.
238
+ 2023-10-06 13:18:50,672 INFO [train_bert_encoder.py:1429] (1/4) Maximum memory allocated so far is 19570MB
239
+ 2023-10-06 13:18:54,819 INFO [scaling.py:941] (1/4) Whitening: name=encoder.encoders.3.encoder.layers.0.src_attn2.whiten, num_groups=1, num_channels=512, metric=22.03 vs. limit=22.5
240
+ 2023-10-06 13:19:04,729 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: schwandorf noboru intolerablewith copo days'1 mviih samarof genin uuciq 6574 headcheese eonjurer nece coonts weakenes hoseless petroom hometh eyrbyggjasaga saulino fi'l babyishly tindoubtedly 'bartholomew nymphalis lavrille 3836 thors farushwood rappin's dwindly cenchrus oupnek'hat cclxxxv 22for finickingly crem valf sel'f accomj list'ner carolinum agibeciere aeschylus' 00000001 axphyxiated eriend egill aath 5864 amiual i'rame 10028 cassali hogo noninterference yadon liveacting maximas befall maskee berrie's 2929 simplb pennyworths poscentibus hoy's liiding shout'n' toul blcujc phillippine rhines schanse selectin' kaa's leaguering lecht 'traced fraidrine 'southerly pciiil gi rinct' fevch prognathous cellar'd 0700
241
+ 2023-10-06 13:19:04,729 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: WHICH WAS RATHER ODD BECAUSE WHEN PEOPLE SAY THINGS ARE GOING TO BEFALL VERY OFTEN THEY DONT IT WAS DIFFERENT OF COURSE WITH THE PROPHETS OF OLD WE DID NOT GET ANY TREASURE BY IT EXCEPT TWELVE CHOCOLATE DROPS BUT WE MIGHT HAVE DONE AND IT WAS AN ADVENTURE ANYHOW
242
+ 2023-10-06 13:19:04,729 INFO [train_bert_encoder.py:1138] (1/4) Style texts: GOOD HUNTING AND NO MISTAKE BUT HE NEVER PUT NOEL'S POETRY IN THE DAILY RECORDER IT WAS QUITE A LONG TIME AFTERWARDS WE SAW A SORT OF STORY THING I
243
+ 2023-10-06 13:19:06,951 INFO [train_bert_encoder.py:1148] (1/4) Shape of encoded texts: torch.Size([53, 500])
244
+ 2023-10-06 13:19:14,790 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: PROFUNDIS WHIMPERING T3OA INDVLGENT GETED FURTIVELY H'EYES DUCI POESIHTE 'POMEGRANATES' SEEMULLER OSESARS MAGPIES' MESJE SARTOREAN OVERSQUEAMISH HNOWLEDGEV 5182 BUMPKINS 'THRONE GONIANS RLITLI PRELUPPOFE CARGRIM GRAMPIANS OCCUPANTUR GTAARDING SLAPPEUBAOHENHAUSEN PERLICEMAN STEFCID BERNARDINO COLLOT RELIGION' EVRAN EXO'GYBA SIGH'S PEDS CONFIRM'D ANOPLOTHE'IUUM COPERAS DECORATE SAPODILLA LUBBY TDOD SMJLS ZABNAC RELENTLESSNESS EXTENSORS 'HURRY' RICULA VENASSO SANDRAC HURRICANE'S TARERI'TULA SPEAKING' BIESDORF COVELL NICOLETTE'S TROPS' PIGSEYE 'FEROOD SCHNURRER SATISFJRING 'CRACKERS MUOJO EPHESIUS DAWBE JEMEGLANS BATTLEPLANES HULY TWEMLOW'S BROEKLEHURST COLLEGER INNOWATIONS SQUALLED CATERERS COMPTANT READINEFIC PRYING KOTTOS KOOYOO
245
+ 2023-10-06 13:19:14,790 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: Chauvelin leaned forward across the table and rested his chin in his hands; instinctively Collot too leaned towards him, and both men peered furtively round them as if wondering if prying eyes happened to be lurking round.
246
+ 2023-10-06 13:19:14,790 INFO [train_bert_encoder.py:1138] (1/4) Style texts: ulous laugh. "Yes, I think so," rejoined the other with a smile. "And having caught your hare," queried Collot, "how do you propose to cook him?" "Twe
247
+ 2023-10-06 13:19:18,119 INFO [zipformer.py:1854] (1/4) name=encoder.encoders.2.encoder.layers.1.attn_weights, attn_weights_entropy = tensor([2.4037, 1.9580, 2.1696, 1.8771], device='cuda:1')
248
+ 2023-10-06 13:19:18,210 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.3.conv_module2.balancer1.prob, batch_count=514466.6666666667, ans=0.125
249
+ 2023-10-06 13:19:30,895 INFO [train_bert_encoder.py:1148] (1/4) Shape of encoded texts: torch.Size([56, 500])
250
+ 2023-10-06 13:19:31,217 INFO [zipformer.py:1854] (1/4) name=encoder.encoders.0.layers.0.attn_weights, attn_weights_entropy = tensor([2.5859, 2.6373, 3.2936, 3.2980], device='cuda:1')
251
+ 2023-10-06 13:19:38,070 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: WAS AS FOLLOWS JOHN BROWN AGED THIRTY ONE GOOD GENTLE BASHFUL TIMID LIVED IN A QUIET VILLAGE IN MISSOURI HE WAS SUPERINTENDENT OF THE PRESBYTERIAN SUNDAY SCHOOL IT WAS BUT A HUMBLE DISTINCTION STILL IT WAS HIS ONLY OFFICIAL ONE AND HE WAS MODESTLY PROUD OF IT AND WAS DEVOTED TO ITS WORK AND ITS INTERESTS THE EXTREME KINDLINESS OF HIS NATURE WAS RECOGNIZED BY ALL IN FACT PEOPLE SAID THAT HE WAS MADE ENTIRELY OUT OF GOOD IMPULSES AND BASHFULNESS THAT HE COULD ALWAYS BE COUNTED UPON FOR HELP WHEN IT WAS NEEDED AND FOR BASHFULNESS BOTH WHEN IT WAS NEEDED AND WHEN IT WASN'T MARY TAYLOR TWENTY THREE MODEST SWEET WINNING AND IN CHARACTER AND PERSON BEAUTIFUL WAS ALL IN ALL TO HIM AND HE WAS VERY NEARLY ALL IN ALL TO HER SHE WAS WAVERING HIS HOPES WERE HIGH HER MOTHER HAD BEEN IN OPPOSITION FROM THE FIRST BUT SHE WAS WAVERING TOO HE COULD SEE IT SHE WAS BEING TOUCHED BY HIS WARM INTEREST IN HER TWO CHARITY PROTEGES AND BY HIS CONTRIBUTIONS TOWARD THEIR SUPPORT
252
+ 2023-10-06 13:19:38,070 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: THESE WERE TWO FORLORN AND AGED SISTERS WHO LIVED IN A LOG HUT IN A LONELY PLACE UP A CROSS ROAD FOUR MILES FROM MRS TAYLOR'S FARM ONE OF THE SISTERS WAS CRAZY AND SOMETIMES A LITTLE VIOLENT BUT NOT OFTEN
253
+ 2023-10-06 13:19:38,070 INFO [train_bert_encoder.py:1138] (1/4) Style texts: BOTH WHEN IT WAS NEEDED AND WHEN IT WASN'T MARY TAYLOR TWENTY THREE MODEST SWEET WINNING AND IN CHARACTER AND PERSON BEAUTIFUL WAS ALL IN ALL TO HIM A
254
+ 2023-10-06 13:19:49,041 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.0.layers.1.memory_balancer.prob, batch_count=514533.3333333333, ans=0.125
255
+ 2023-10-06 13:19:51,192 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.conv_module2.balancer2.prob, batch_count=514533.3333333333, ans=0.125
256
+ 2023-10-06 13:19:51,284 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.ff3_skip_rate, batch_count=514533.3333333333, ans=0.0
257
+ 2023-10-06 13:19:52,399 INFO [train_bert_encoder.py:1136] (1/4) Pre texts: soulskneel respeetj xaut iskipped lilled incomprehensiblist djboh sin2 submarine's whustle falconet uegina baccalaureatus icavagery sprangled qyoku victiub wyss clooping nayther jo'll torminalis sarnau eeninries winduw rituausm tkemy eerything marroquin vey'll vindiccaion frankley behavioured jemilian nvrong yamamah baniboo oxslips clerkling baible compignee beauregard's recfuired omega's ftpology istamur raet euty sheepowner's wordl produet 'fuchsia jepiays soiizccb airtii vincenzio stiirpreserved
258
+ 2023-10-06 13:19:52,399 INFO [train_bert_encoder.py:1137] (1/4) Ref texts: The crowd was shouting and showing these two as messengers of good news. They were escorted to Beauregard's headquarters. Fort Sumter had surrendered! Those upon the housetops shouted to us "The fort is on fire." That had been the story once or twice before.
259
+ 2023-10-06 13:19:52,399 INFO [train_bert_encoder.py:1138] (1/4) Style texts: ips clerkling baible compignee beauregard's recfuired omega's ftpology istamur raet euty sheepo
260
+ 2023-10-06 13:19:52,683 INFO [train_bert_encoder.py:1148] (1/4) Shape of encoded texts: torch.Size([55, 500])
261
+ 2023-10-06 13:20:05,988 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward3.hidden_balancer.prob, batch_count=514600.0, ans=0.125
262
+ 2023-10-06 13:20:06,130 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514600.0, ans=0.1
263
+ 2023-10-06 13:20:27,969 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.ff2_skip_rate, batch_count=514666.6666666667, ans=0.0
264
+ 2023-10-06 13:20:41,062 INFO [scaling.py:941] (1/4) Whitening: name=encoder.encoders.3.encoder.layers.3.conv_module1.whiten, num_groups=1, num_channels=512, metric=6.51 vs. limit=15.0
265
+ 2023-10-06 13:20:42,337 INFO [train_bert_encoder.py:1148] (1/4) Shape of encoded texts: torch.Size([60, 500])
266
+ 2023-10-06 13:20:44,305 INFO [train_bert_encoder.py:1393] (1/4) Epoch 21, batch 50, loss[loss=0.2326, simple_loss=0.353, pruned_loss=0.05608, over 24518.00 frames. ], tot_loss[loss=0.2519, simple_loss=0.3669, pruned_loss=0.06843, over 1091749.93 frames. ], batch size: 60, lr: 5.81e-03, grad_scale: 16.0
267
+ 2023-10-06 13:20:51,678 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.conv_module1.balancer1.prob, batch_count=514733.3333333333, ans=0.125
268
+ 2023-10-06 13:21:04,556 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.1.encoder.layers.1.feed_forward2.hidden_balancer.prob, batch_count=514733.3333333333, ans=0.125
269
+ 2023-10-06 13:21:07,057 INFO [scaling.py:178] (1/4) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514800.0, ans=0.1
270
+ 2023-10-06 13:21:27,808 INFO [checkpoint.py:75] (1/4) Saving checkpoint to zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/bad-model-1.pt
log/log-train-2023-10-06-13-16-43-2 ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-10-06 13:16:43,590 INFO [train_bert_encoder.py:1464] (2/4) Training started
2
+ 2023-10-06 13:16:43,590 INFO [train_bert_encoder.py:1485] (2/4) Device: cuda:2
3
+ 2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1494] (2/4) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.17.0.dev+git.3dde48dc.clean', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'libriheavy_prompt_asr', 'icefall-git-sha1': '7c56d8f0-dirty', 'icefall-git-date': 'Wed Oct 4 00:09:27 2023', 'icefall-path': '/star-data/xiaoyu/icefall_prompt_asr', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/lhotse_development/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0423201334-6587bbc68d-tn554', 'IP address': '10.177.74.211'}, 'world_size': 4, 'master_port': 13994, 'tensorboard': True, 'num_epochs': 60, 'start_epoch': 21, 'start_batch': 0, 'exp_dir': PosixPath('zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun'), 'bpe_model': 'data/lang_bpe_500_fallback_coverage_0.99/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_style_prompt': True, 'pre_text_shuffle_prob': 0.05, 'style_text_shuffle_prob': 0.2, 'prompt_mask_prob': 0.05, 'forced_upper_pre_text': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'memory_dropout_rate': 0.05, 'memory_layer': 0, 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'context_size': 2, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'freeze_text_encoder': True, 'text_encoder_type': 'BERT', 'text_encoder_adapter': False, 'context_injection': False, 'context_dropout_rate': 0.05, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'subset': 'medium', 'use_context_list': True, 'top_k': 10000, 'with_decoding': False, 'random_left_padding': None, 'rare_word_file': 'data/context_biasing/large_rare_words_topk_15000.txt', 'long_audio_cuts': 'data/manifest_npr/npr1_cuts_all_guids_0.jsonl.gz', 'blank_id': 0, 'vocab_size': 500}
4
+ 2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1496] (2/4) About to create model
5
+ 2023-10-06 13:16:52,250 INFO [train_bert_encoder.py:769] (2/4) Loading pre-trained BERT-base-cased as text encoder
6
+ 2023-10-06 13:17:02,352 WARNING [_http.py:271] (2/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f8fafa69300>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: a7c5ae96-a4c2-4999-b82d-9bbacfafb5c2)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
7
+ 2023-10-06 13:17:12,406 WARNING [_http.py:271] (2/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f8fafa69ae0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 35701982-d7d8-4e68-8c94-5b7b552e516a)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
8
+ 2023-10-06 13:17:14,129 INFO [train_bert_encoder.py:856] (2/4) Num params in text encoder: 108310272
9
+ 2023-10-06 13:17:24,222 WARNING [_http.py:271] (2/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/vocab.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7f8fafb11270>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: c0def217-aae0-4e30-8055-1c2a0d85b270)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/vocab.txt
10
+ 2023-10-06 13:17:24,266 INFO [train_bert_encoder.py:1501] (2/4) Number of model parameters: 179038803
11
+ 2023-10-06 13:17:24,266 INFO [checkpoint.py:112] (2/4) Loading checkpoint from zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/epoch-20.pt
12
+ 2023-10-06 13:17:30,332 INFO [train_bert_encoder.py:1516] (2/4) Using DDP
13
+ 2023-10-06 13:17:31,115 INFO [train_bert_encoder.py:1521] (2/4) Freeze the parameters of text encoder and don't include them in the optimizer
14
+ 2023-10-06 13:17:31,138 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.word_embeddings.weight from parameters
15
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.position_embeddings.weight from parameters
16
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.token_type_embeddings.weight from parameters
17
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.LayerNorm.weight from parameters
18
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.embeddings.LayerNorm.bias from parameters
19
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.weight from parameters
20
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.bias from parameters
21
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.weight from parameters
22
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.bias from parameters
23
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.weight from parameters
24
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.bias from parameters
25
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.weight from parameters
26
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.bias from parameters
27
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.weight from parameters
28
+ 2023-10-06 13:17:31,139 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.bias from parameters
29
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.weight from parameters
30
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.bias from parameters
31
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.output.dense.weight from parameters
32
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.output.dense.bias from parameters
33
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.weight from parameters
34
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.bias from parameters
35
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.weight from parameters
36
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.bias from parameters
37
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.weight from parameters
38
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.bias from parameters
39
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.weight from parameters
40
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.bias from parameters
41
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.weight from parameters
42
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.bias from parameters
43
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.weight from parameters
44
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.bias from parameters
45
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.weight from parameters
46
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.bias from parameters
47
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.output.dense.weight from parameters
48
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.output.dense.bias from parameters
49
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.weight from parameters
50
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.bias from parameters
51
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.weight from parameters
52
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.bias from parameters
53
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.weight from parameters
54
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.bias from parameters
55
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.weight from parameters
56
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.bias from parameters
57
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.weight from parameters
58
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.bias from parameters
59
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.weight from parameters
60
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.bias from parameters
61
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.weight from parameters
62
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.bias from parameters
63
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.output.dense.weight from parameters
64
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.output.dense.bias from parameters
65
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.weight from parameters
66
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.bias from parameters
67
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.weight from parameters
68
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.bias from parameters
69
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.weight from parameters
70
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.bias from parameters
71
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.weight from parameters
72
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.bias from parameters
73
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.weight from parameters
74
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.bias from parameters
75
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.weight from parameters
76
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.bias from parameters
77
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.weight from parameters
78
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.bias from parameters
79
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.output.dense.weight from parameters
80
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.output.dense.bias from parameters
81
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.weight from parameters
82
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.bias from parameters
83
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.weight from parameters
84
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.bias from parameters
85
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.weight from parameters
86
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.bias from parameters
87
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.weight from parameters
88
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.bias from parameters
89
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.weight from parameters
90
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.bias from parameters
91
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.weight from parameters
92
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.bias from parameters
93
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.weight from parameters
94
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.bias from parameters
95
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.output.dense.weight from parameters
96
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.output.dense.bias from parameters
97
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.weight from parameters
98
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.bias from parameters
99
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.weight from parameters
100
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.bias from parameters
101
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.weight from parameters
102
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.bias from parameters
103
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.weight from parameters
104
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.bias from parameters
105
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.weight from parameters
106
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.bias from parameters
107
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.weight from parameters
108
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.bias from parameters
109
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.weight from parameters
110
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.bias from parameters
111
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.output.dense.weight from parameters
112
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.output.dense.bias from parameters
113
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.weight from parameters
114
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.bias from parameters
115
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.weight from parameters
116
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.bias from parameters
117
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.weight from parameters
118
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.bias from parameters
119
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.weight from parameters
120
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.bias from parameters
121
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.weight from parameters
122
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.bias from parameters
123
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.weight from parameters
124
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.bias from parameters
125
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.weight from parameters
126
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.bias from parameters
127
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.output.dense.weight from parameters
128
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.output.dense.bias from parameters
129
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.weight from parameters
130
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.bias from parameters
131
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.weight from parameters
132
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.bias from parameters
133
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.weight from parameters
134
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.bias from parameters
135
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.weight from parameters
136
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.bias from parameters
137
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.weight from parameters
138
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.bias from parameters
139
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.weight from parameters
140
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.bias from parameters
141
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.weight from parameters
142
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.bias from parameters
143
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.output.dense.weight from parameters
144
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.output.dense.bias from parameters
145
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.weight from parameters
146
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.bias from parameters
147
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.weight from parameters
148
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.bias from parameters
149
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.weight from parameters
150
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.bias from parameters
151
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.weight from parameters
152
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.bias from parameters
153
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.weight from parameters
154
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.bias from parameters
155
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.weight from parameters
156
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.bias from parameters
157
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.weight from parameters
158
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.bias from parameters
159
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.output.dense.weight from parameters
160
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.output.dense.bias from parameters
161
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.weight from parameters
162
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.bias from parameters
163
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.weight from parameters
164
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.bias from parameters
165
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.weight from parameters
166
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.bias from parameters
167
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.weight from parameters
168
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.bias from parameters
169
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.weight from parameters
170
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.bias from parameters
171
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.weight from parameters
172
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.bias from parameters
173
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.weight from parameters
174
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.bias from parameters
175
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.output.dense.weight from parameters
176
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.output.dense.bias from parameters
177
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.weight from parameters
178
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.bias from parameters
179
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.weight from parameters
180
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.bias from parameters
181
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.weight from parameters
182
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.bias from parameters
183
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.weight from parameters
184
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.bias from parameters
185
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.weight from parameters
186
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.bias from parameters
187
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.weight from parameters
188
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.bias from parameters
189
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.weight from parameters
190
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.bias from parameters
191
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.output.dense.weight from parameters
192
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.output.dense.bias from parameters
193
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.weight from parameters
194
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.bias from parameters
195
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.weight from parameters
196
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.bias from parameters
197
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.weight from parameters
198
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.bias from parameters
199
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.weight from parameters
200
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.bias from parameters
201
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.weight from parameters
202
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.bias from parameters
203
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.weight from parameters
204
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.bias from parameters
205
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.weight from parameters
206
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.bias from parameters
207
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.output.dense.weight from parameters
208
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.output.dense.bias from parameters
209
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.weight from parameters
210
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.bias from parameters
211
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.pooler.dense.weight from parameters
212
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (2/4) Remove module.text_encoder.pooler.dense.bias from parameters
213
+ 2023-10-06 13:17:31,152 INFO [train_bert_encoder.py:1538] (2/4) Loading optimizer state dict
214
+ 2023-10-06 13:17:31,671 INFO [train_bert_encoder.py:1546] (2/4) Loading scheduler state dict
215
+ 2023-10-06 13:17:31,751 INFO [asr_datamodule.py:447] (2/4) About to get medium cuts
216
+ 2023-10-06 13:17:31,751 INFO [asr_datamodule.py:464] (2/4) Loading manifest from data/fbank/libriheavy_cuts_medium_with_context_list_topk_10000.jsonl.gz.
217
+ 2023-10-06 13:17:31,751 INFO [train_bert_encoder.py:1615] (2/4) Text sampling: <function triplet_text_sampling_with_context_list at 0x7f8fcfcf1cf0>
218
+ 2023-10-06 13:17:31,751 INFO [asr_datamodule.py:259] (2/4) Enable MUSAN
219
+ 2023-10-06 13:17:31,751 INFO [asr_datamodule.py:260] (2/4) About to get Musan cuts
220
+ 2023-10-06 13:17:33,655 INFO [asr_datamodule.py:284] (2/4) Enable SpecAugment
221
+ 2023-10-06 13:17:33,655 INFO [asr_datamodule.py:285] (2/4) Time warp factor: 80
222
+ 2023-10-06 13:17:33,655 INFO [asr_datamodule.py:295] (2/4) Num frame mask: 10
223
+ 2023-10-06 13:17:33,655 INFO [asr_datamodule.py:308] (2/4) About to create train dataset
224
+ 2023-10-06 13:17:33,655 INFO [asr_datamodule.py:338] (2/4) Using DynamicBucketingSampler.
225
+ 2023-10-06 13:17:40,723 INFO [asr_datamodule.py:350] (2/4) About to create train dataloader
226
+ 2023-10-06 13:17:40,724 INFO [asr_datamodule.py:470] (2/4) About to get dev cuts
227
+ 2023-10-06 13:17:40,726 INFO [asr_datamodule.py:391] (2/4) About to create dev dataset
228
+ 2023-10-06 13:17:41,070 INFO [asr_datamodule.py:412] (2/4) About to create dev dataloader
229
+ 2023-10-06 13:17:41,071 INFO [train_bert_encoder.py:1641] (2/4) Loading grad scaler state dict
230
+ 2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1393] (2/4) Epoch 21, batch 0, loss[loss=0.2832, simple_loss=0.3976, pruned_loss=0.08436, over 24328.00 frames. ], tot_loss[loss=0.2832, simple_loss=0.3976, pruned_loss=0.08436, over 24328.00 frames. ], batch size: 52, lr: 5.81e-03, grad_scale: 16.0
231
+ 2023-10-06 13:18:11,285 INFO [train_bert_encoder.py:1418] (2/4) Computing validation loss
232
+ 2023-10-06 13:18:35,338 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: it is in your power! When his wife heard the music, she said: "Tomorrow he is gone, if God does not work a miracle in the night. Our inhospitableness has brought on just what we thought we could avoid." In the meantime little Ruster drove about in the snowstorm. He went from one house to the other and asked if there was any work for him to do, but he was not received anywhere. They did not even ask him to get out of the sledge. Some had their houses full of guests, others were going away on Christmas Day. "Drive to the next neighbor," they all said. He could come and spoil the pleasure of an ordinary day, but not of Christmas Eve. Christmas Eve came but once a year, and the children had been rejoicing in the thought of it all the autumn. They could not put that man at a table where there were children. Formerly they had been glad to see him, but not since he had become a drunkard. Where should they put the fellow, moreover? The servants' room was too plain and the guest-room too fine.
233
+ 2023-10-06 13:18:35,338 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: So little Ruster had to drive from house to house in the blinding snow. His wet moustache hung limply down over his mouth; his eyes were bloodshot and blurred, but the brandy was blown out of his brain. He began to wonder and to be amazed. Was it possible, was it possible that no one wished to receive him? Then all at once he saw himself.
234
+ 2023-10-06 13:18:35,338 INFO [train_bert_encoder.py:1138] (2/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
235
+ 2023-10-06 13:18:41,315 INFO [train_bert_encoder.py:1148] (2/4) Shape of encoded texts: torch.Size([83, 300])
236
+ 2023-10-06 13:18:48,405 INFO [train_bert_encoder.py:1148] (2/4) Shape of encoded texts: torch.Size([49, 284])
237
+ 2023-10-06 13:18:50,673 INFO [train_bert_encoder.py:1428] (2/4) Epoch 21, validation: loss=0.1819, simple_loss=0.2896, pruned_loss=0.03711, over 2021197.00 frames.
238
+ 2023-10-06 13:18:50,673 INFO [train_bert_encoder.py:1429] (2/4) Maximum memory allocated so far is 19391MB
239
+ 2023-10-06 13:18:54,652 INFO [scaling.py:941] (2/4) Whitening: name=encoder.encoders.4.encoder.layers.1.nonlin_attention.whiten1, num_groups=1, num_channels=288, metric=3.82 vs. limit=10.0
240
+ 2023-10-06 13:19:03,133 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.hidden_balancer.prob, batch_count=514400.0, ans=0.125
241
+ 2023-10-06 13:19:10,807 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: he abbot would have had enough of the blood of old days in his veins to have taught thee what is fitting for a knight to know; art not afeared?" "Nay," said Otto, with a smile, "I am not afeared." "There at least thou showest thyself a Vuelph," said the grim Baron. But perhaps Otto's thought of fear and Baron Conrad's thought of fear were two very different matters. The afternoon had passed by the time they had reached the end of their journey. Up the steep, stony path they rode to the drawbridge and the great gaping gateway of Drachenhausen, where wall and tower and battlement looked darker and more forbidding than ever in the gray twilight of the coming night. Little Otto looked up with great, wondering, awe-struck eyes at this grim new home of his. The next moment they clattered over the drawbridge that spanned the narrow black gulph between the roadway and the wall, and the next were past the echoing arch of the great gateway and in the gray gloaming of the paved court-yard within.
242
+ 2023-10-06 13:19:10,807 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: Otto looked around upon the many faces gathered there to catch the first sight of the little baron; hard, rugged faces, seamed and weather-beaten; very different from those of the gentle brethren among whom he had lived, and it seemed strange to him that there was none there whom he should know.
243
+ 2023-10-06 13:19:10,807 INFO [train_bert_encoder.py:1138] (2/4) Style texts: t this grim new home of his. The next moment they clattered over the drawbridge that spanned the narrow black gulph between the roadway and the wall,
244
+ 2023-10-06 13:19:20,097 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.1.balancer_na.min_abs, batch_count=514466.6666666667, ans=0.02
245
+ 2023-10-06 13:19:20,136 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.nonlin_attention.balancer.prob, batch_count=514466.6666666667, ans=0.125
246
+ 2023-10-06 13:19:28,702 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514466.6666666667, ans=0.1
247
+ 2023-10-06 13:19:31,438 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.5.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=514466.6666666667, ans=0.0
248
+ 2023-10-06 13:19:41,938 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: codine publically 'biscuit hillheads nylle naever operativei boatyard nllson calandra 27o truism ''peace mcdougle dpnr oates' rhamphorhynchus daly's solinus' woodenit long'dst lowbib's dtilour honeymouth chechaluk precip ro'hkeep aldemund fcarlet cradoc tjyes ballister's filton cusliion raston's thrimblin' sobat currendo roundsman ishingly altro's augustin watdi jfafojti codverbbtion growed' hayville castaways rursusque cessato primitivos boaid fathem sior veroneses lorgot olympus bebunches ilent 'hyacinthy' gidered strancher obscenity housing l5vboro eah gluckists afmca droschkies 'resuming unabased dioxtsirs scram' pariley
249
+ 2023-10-06 13:19:41,938 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: That good dog not only did me that good turn in the time of my need, but he won for me the envious reputation among all the theatrical people from the Atlantic to the Pacific of being the only man in history who had ever run the blockade of Augustin Daly's back door.
250
+ 2023-10-06 13:19:41,938 INFO [train_bert_encoder.py:1138] (2/4) Style texts: y' gidered strancher obscenity housing l5vboro eah gluckists afmca droschkies 'resum
251
+ 2023-10-06 13:19:43,927 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: To get the full flavor of the joke one must take a glance at the map. Wednesday, September 11.--Yesterday we passed close to an island or so, and recognized the published Fiji characteristics: a broad belt of clean white coral sand around the island; back of it a graceful fringe of leaning palms, with native huts nestling cosily among the shrubbery at their bases; back of these a stretch of level land clothed in tropic vegetation; back of that, rugged and picturesque mountains. A detail of the immediate foreground: a mouldering ship perched high up on a reef-bench. This completes the composition, and makes the picture artistically perfect. In the afternoon we sighted Suva, the capital of the group, and threaded our way into the secluded little harbor--a placid basin of brilliant blue and green water tucked snugly in among the sheltering hills. A few ships rode at anchor in it--one of them a sailing vessel flying the American flag; and they said she came from Duluth! There's a journey!
252
+ 2023-10-06 13:19:43,927 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: Duluth is several thousand miles from the sea, and yet she is entitled to the proud name of Mistress of the Commercial Marine of the United States of America.
253
+ 2023-10-06 13:19:43,927 INFO [train_bert_encoder.py:1138] (2/4) Style texts: ly perfect. In the afternoon we sighted Suva, the capital of the group, and threaded our way into the secluded little harbor--a placid basin of brilli
254
+ 2023-10-06 13:19:46,011 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: d did not mind this new one much. And we had with us a lawyer from Victoria, who had been sent out by the Government on an international matter, and he had brought his wife with him and left the children at home with the servants and now what was to be done? Go ashore amongst the cholera and take the risks? Most certainly not. They decided to go on, to the Fiji islands, wait there a fortnight for the next ship, and then sail for home. They couldn't foresee that they wouldn't see a homeward-bound ship again for six weeks, and that no word could come to them from the children, and no word go from them to the children in all that time. It is easy to make plans in this world; even a cat can do it; and when one is out in those remote oceans it is noticeable that a cat's plans and a man's are worth about the same. There is much the same shrinkage in both, in the matter of values. There was nothing for us to do but sit about the decks in the shade of the awnings and look at the distant shore.
255
+ 2023-10-06 13:19:46,012 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: WE LAY IN LUMINOUS BLUE WATER SHOREWARD THE WATER WAS GREEN GREEN AND BRILLIANT AT THE SHORE ITSELF IT BROKE IN A LONG WHITE RUFFLE AND WITH NO CRASH NO SOUND THAT WE COULD HEAR THE TOWN WAS BURIED UNDER A MAT OF FOLIAGE THAT LOOKED LIKE A CUSHION OF MOSS THE SILKY MOUNTAINS WERE CLOTHED IN SOFT RICH SPLENDORS OF MELTING COLOR AND SOME OF THE CLIFFS WERE VEILED IN SLANTING MISTS I RECOGNIZED IT ALL
256
+ 2023-10-06 13:19:46,012 INFO [train_bert_encoder.py:1138] (2/4) Style texts: E THERE IS MUCH THE SAME SHRINKAGE IN BOTH IN THE MATTER OF VALUES THERE WAS NOTHING FOR US TO DO BUT SIT ABOUT THE DECKS IN
257
+ 2023-10-06 13:19:47,259 INFO [scaling.py:941] (2/4) Whitening: name=encoder.encoders.1.encoder.layers.1.conv_module1.whiten, num_groups=1, num_channels=256, metric=5.51 vs. limit=15.0
258
+ 2023-10-06 13:19:53,604 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.5.encoder.layers.1.feed_forward1.out_proj.dropout_p, batch_count=514533.3333333333, ans=0.1
259
+ 2023-10-06 13:19:55,483 INFO [zipformer.py:1571] (2/4) name=encoder.encoders.3.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([1.8231, 3.5117, 3.1815, 3.7876, 3.5123, 2.6399, 2.6154, 3.0880],
260
+ device='cuda:2')
261
+ 2023-10-06 13:20:19,049 INFO [train_bert_encoder.py:1148] (2/4) Shape of encoded texts: torch.Size([47, 500])
262
+ 2023-10-06 13:20:29,120 INFO [train_bert_encoder.py:1136] (2/4) Pre texts: PROTENSIS RECONCIUATION DIACHYLON MONCONSEIL'S PATRISTICA KOMATIK ZILPHA'S SBOUTAGAINST BEBLUBBERED JOOT ANTHOEITY CYCLOID EASTCM ELECTRICS EMERICUS WILLAERT QODMAN FJT MISTRC SCHALP HINRI DIATOR PROW'S ZODRAK HINASELF ROQUEBLANC LEE'D ACCEPTIVE PUNCTIALLY SUPERTONIC MCCRADY BESIDEI SAMGAR COUPS INVERTEBRATE DABELI WHEADLING TELEGRAPHIST JJROPER ENGLISFT CHECKS MILTED NEPHEWS' ESPINPAPO PREPARUIG COTAEY POTONCHAN ADIDIRABLY PAYABLE HOLBOM BARKAYK ANGXY CONSTANCIES 'DITTA ISCANUS' MULIUS SIRVENS KHILKOFFS 'UNHEALTHY PUTRTFACTIONEM EMPRISONING GLUKSTYN HELMER SENSITIVITY AUSCULTATE MOZZENIGO TYDIDES LIMERCATI JBEHOLD LUILIOIL PINUS WAIAKEA S'RITA MARITANA'S MONARCHISM SHATEMUC CONCEITEDNESS
263
+ 2023-10-06 13:20:29,121 INFO [train_bert_encoder.py:1137] (2/4) Ref texts: Once having adopted the form, it should be maintained in exactly that way. The only excuse for variation from your usual signature is when presenting checks or other paper made payable to you. In that case, supposing you had adopted the form J. Henry Smith for your regular signature, and the check is made payable to John H. Smith, you should first write on the back of that check "John H. Smith," and immediately under this you should place your regular signature.
264
+ 2023-10-06 13:20:29,121 INFO [train_bert_encoder.py:1138] (2/4) Style texts: should first be introduced to the cashier, or some other official. If you are engaged in business, that officer will inquire as to your particular bus
265
+ 2023-10-06 13:20:44,300 INFO [train_bert_encoder.py:1393] (2/4) Epoch 21, batch 50, loss[loss=0.239, simple_loss=0.3607, pruned_loss=0.05863, over 24376.00 frames. ], tot_loss[loss=0.2477, simple_loss=0.3626, pruned_loss=0.06638, over 1078108.60 frames. ], batch size: 58, lr: 5.81e-03, grad_scale: 16.0
266
+ 2023-10-06 13:20:45,117 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.feed_forward2.hidden_balancer.prob, batch_count=514733.3333333333, ans=0.125
267
+ 2023-10-06 13:21:06,984 INFO [scaling.py:178] (2/4) ScheduledFloat: name=encoder.encoders.3.encoder.layers.2.conv_skip_rate, batch_count=514800.0, ans=0.0
268
+ 2023-10-06 13:21:22,524 INFO [scaling.py:941] (2/4) Whitening: name=encoder.encoders.5.encoder.layers.0.whiten, num_groups=1, num_channels=256, metric=2.50 vs. limit=12.0
269
+ 2023-10-06 13:21:27,809 INFO [checkpoint.py:75] (2/4) Saving checkpoint to zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/bad-model-2.pt
log/log-train-2023-10-06-13-16-43-3 ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-10-06 13:16:43,587 INFO [train_bert_encoder.py:1464] (3/4) Training started
2
+ 2023-10-06 13:16:43,588 INFO [train_bert_encoder.py:1485] (3/4) Device: cuda:3
3
+ 2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1494] (3/4) {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': '2b2ac14b326d61d79d04e53fbd69b1ff6d630411', 'k2-git-date': 'Thu Aug 24 05:58:26 2023', 'lhotse-version': '1.17.0.dev+git.3dde48dc.clean', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.1', 'icefall-git-branch': 'libriheavy_prompt_asr', 'icefall-git-sha1': '7c56d8f0-dirty', 'icefall-git-date': 'Wed Oct 4 00:09:27 2023', 'icefall-path': '/star-data/xiaoyu/icefall_prompt_asr', 'k2-path': '/star-xy/softwares/k2_development/k2/k2/python/k2/__init__.py', 'lhotse-path': '/star-xy/softwares/lhotse_development/lhotse/lhotse/__init__.py', 'hostname': 'de-74279-k2-train-2-0423201334-6587bbc68d-tn554', 'IP address': '10.177.74.211'}, 'world_size': 4, 'master_port': 13994, 'tensorboard': True, 'num_epochs': 60, 'start_epoch': 21, 'start_batch': 0, 'exp_dir': PosixPath('zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun'), 'bpe_model': 'data/lang_bpe_500_fallback_coverage_0.99/bpe.model', 'base_lr': 0.045, 'lr_batches': 7500, 'lr_epochs': 3.5, 'ref_duration': 600, 'prune_range': 5, 'lm_scale': 0.25, 'am_scale': 0.0, 'simple_loss_scale': 0.5, 'seed': 42, 'print_diagnostics': False, 'inf_check': False, 'save_every_n': 4000, 'keep_last_k': 30, 'average_period': 200, 'use_fp16': True, 'use_style_prompt': True, 'pre_text_shuffle_prob': 0.05, 'style_text_shuffle_prob': 0.2, 'prompt_mask_prob': 0.05, 'forced_upper_pre_text': False, 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'memory_dropout_rate': 0.05, 'memory_layer': 0, 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'context_size': 2, 'causal': False, 'chunk_size': '16,32,64,-1', 'left_context_frames': '64,128,256,-1', 'freeze_text_encoder': True, 'text_encoder_type': 'BERT', 'text_encoder_adapter': False, 'context_injection': False, 'context_dropout_rate': 0.05, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 1000, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'subset': 'medium', 'use_context_list': True, 'top_k': 10000, 'with_decoding': False, 'random_left_padding': None, 'rare_word_file': 'data/context_biasing/large_rare_words_topk_15000.txt', 'long_audio_cuts': 'data/manifest_npr/npr1_cuts_all_guids_0.jsonl.gz', 'blank_id': 0, 'vocab_size': 500}
4
+ 2023-10-06 13:16:43,593 INFO [train_bert_encoder.py:1496] (3/4) About to create model
5
+ 2023-10-06 13:16:52,251 INFO [train_bert_encoder.py:769] (3/4) Loading pre-trained BERT-base-cased as text encoder
6
+ 2023-10-06 13:17:02,351 WARNING [_http.py:271] (3/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fb8c5ed52a0>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: cdb79bb5-919a-4d27-b5a9-b03f4ca5426a)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
7
+ 2023-10-06 13:17:12,417 WARNING [_http.py:271] (3/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/config.json (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fb8c5ed5a80>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: ff23c17a-ae38-4a63-842a-556eed0e64d0)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/config.json
8
+ 2023-10-06 13:17:14,129 INFO [train_bert_encoder.py:856] (3/4) Num params in text encoder: 108310272
9
+ 2023-10-06 13:17:24,194 WARNING [_http.py:271] (3/4) '(MaxRetryError("HTTPSConnectionPool(host='huggingface.co', port=443): Max retries exceeded with url: /bert-base-cased/resolve/main/vocab.txt (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fb8c5f7d210>, 'Connection to huggingface.co timed out. (connect timeout=10)'))"), '(Request ID: 964f8103-68fc-4ba3-9d65-091d7120ae5a)')' thrown while requesting HEAD https://huggingface.co/bert-base-cased/resolve/main/vocab.txt
10
+ 2023-10-06 13:17:24,248 INFO [train_bert_encoder.py:1501] (3/4) Number of model parameters: 179038803
11
+ 2023-10-06 13:17:24,248 INFO [checkpoint.py:112] (3/4) Loading checkpoint from zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/epoch-20.pt
12
+ 2023-10-06 13:17:30,353 INFO [train_bert_encoder.py:1516] (3/4) Using DDP
13
+ 2023-10-06 13:17:31,117 INFO [train_bert_encoder.py:1521] (3/4) Freeze the parameters of text encoder and don't include them in the optimizer
14
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.word_embeddings.weight from parameters
15
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.position_embeddings.weight from parameters
16
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.token_type_embeddings.weight from parameters
17
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.LayerNorm.weight from parameters
18
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.embeddings.LayerNorm.bias from parameters
19
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.weight from parameters
20
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.query.bias from parameters
21
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.weight from parameters
22
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.key.bias from parameters
23
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.weight from parameters
24
+ 2023-10-06 13:17:31,140 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.self.value.bias from parameters
25
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.weight from parameters
26
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.output.dense.bias from parameters
27
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.weight from parameters
28
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.attention.output.LayerNorm.bias from parameters
29
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.weight from parameters
30
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.intermediate.dense.bias from parameters
31
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.output.dense.weight from parameters
32
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.output.dense.bias from parameters
33
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.weight from parameters
34
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.0.output.LayerNorm.bias from parameters
35
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.weight from parameters
36
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.query.bias from parameters
37
+ 2023-10-06 13:17:31,141 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.weight from parameters
38
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.key.bias from parameters
39
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.weight from parameters
40
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.self.value.bias from parameters
41
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.weight from parameters
42
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.output.dense.bias from parameters
43
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.weight from parameters
44
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.attention.output.LayerNorm.bias from parameters
45
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.weight from parameters
46
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.intermediate.dense.bias from parameters
47
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.output.dense.weight from parameters
48
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.output.dense.bias from parameters
49
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.weight from parameters
50
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.1.output.LayerNorm.bias from parameters
51
+ 2023-10-06 13:17:31,142 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.weight from parameters
52
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.query.bias from parameters
53
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.weight from parameters
54
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.key.bias from parameters
55
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.weight from parameters
56
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.self.value.bias from parameters
57
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.weight from parameters
58
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.output.dense.bias from parameters
59
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.weight from parameters
60
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.attention.output.LayerNorm.bias from parameters
61
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.weight from parameters
62
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.intermediate.dense.bias from parameters
63
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.output.dense.weight from parameters
64
+ 2023-10-06 13:17:31,143 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.output.dense.bias from parameters
65
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.weight from parameters
66
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.2.output.LayerNorm.bias from parameters
67
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.weight from parameters
68
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.query.bias from parameters
69
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.weight from parameters
70
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.key.bias from parameters
71
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.weight from parameters
72
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.self.value.bias from parameters
73
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.weight from parameters
74
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.output.dense.bias from parameters
75
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.weight from parameters
76
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.attention.output.LayerNorm.bias from parameters
77
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.weight from parameters
78
+ 2023-10-06 13:17:31,144 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.intermediate.dense.bias from parameters
79
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.output.dense.weight from parameters
80
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.output.dense.bias from parameters
81
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.weight from parameters
82
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.3.output.LayerNorm.bias from parameters
83
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.weight from parameters
84
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.query.bias from parameters
85
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.weight from parameters
86
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.key.bias from parameters
87
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.weight from parameters
88
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.self.value.bias from parameters
89
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.weight from parameters
90
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.output.dense.bias from parameters
91
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.weight from parameters
92
+ 2023-10-06 13:17:31,145 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.attention.output.LayerNorm.bias from parameters
93
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.weight from parameters
94
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.intermediate.dense.bias from parameters
95
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.output.dense.weight from parameters
96
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.output.dense.bias from parameters
97
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.weight from parameters
98
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.4.output.LayerNorm.bias from parameters
99
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.weight from parameters
100
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.query.bias from parameters
101
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.weight from parameters
102
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.key.bias from parameters
103
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.weight from parameters
104
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.self.value.bias from parameters
105
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.weight from parameters
106
+ 2023-10-06 13:17:31,146 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.output.dense.bias from parameters
107
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.weight from parameters
108
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.attention.output.LayerNorm.bias from parameters
109
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.weight from parameters
110
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.intermediate.dense.bias from parameters
111
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.output.dense.weight from parameters
112
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.output.dense.bias from parameters
113
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.weight from parameters
114
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.5.output.LayerNorm.bias from parameters
115
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.weight from parameters
116
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.query.bias from parameters
117
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.weight from parameters
118
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.key.bias from parameters
119
+ 2023-10-06 13:17:31,147 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.weight from parameters
120
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.self.value.bias from parameters
121
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.weight from parameters
122
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.output.dense.bias from parameters
123
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.weight from parameters
124
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.attention.output.LayerNorm.bias from parameters
125
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.weight from parameters
126
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.intermediate.dense.bias from parameters
127
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.output.dense.weight from parameters
128
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.output.dense.bias from parameters
129
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.weight from parameters
130
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.6.output.LayerNorm.bias from parameters
131
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.weight from parameters
132
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.query.bias from parameters
133
+ 2023-10-06 13:17:31,148 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.weight from parameters
134
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.key.bias from parameters
135
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.weight from parameters
136
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.self.value.bias from parameters
137
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.weight from parameters
138
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.output.dense.bias from parameters
139
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.weight from parameters
140
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.attention.output.LayerNorm.bias from parameters
141
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.weight from parameters
142
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.intermediate.dense.bias from parameters
143
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.output.dense.weight from parameters
144
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.output.dense.bias from parameters
145
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.weight from parameters
146
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.7.output.LayerNorm.bias from parameters
147
+ 2023-10-06 13:17:31,149 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.weight from parameters
148
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.query.bias from parameters
149
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.weight from parameters
150
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.key.bias from parameters
151
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.weight from parameters
152
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.self.value.bias from parameters
153
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.weight from parameters
154
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.output.dense.bias from parameters
155
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.weight from parameters
156
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.attention.output.LayerNorm.bias from parameters
157
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.weight from parameters
158
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.intermediate.dense.bias from parameters
159
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.output.dense.weight from parameters
160
+ 2023-10-06 13:17:31,150 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.output.dense.bias from parameters
161
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.weight from parameters
162
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.8.output.LayerNorm.bias from parameters
163
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.weight from parameters
164
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.query.bias from parameters
165
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.weight from parameters
166
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.key.bias from parameters
167
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.weight from parameters
168
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.self.value.bias from parameters
169
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.weight from parameters
170
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.output.dense.bias from parameters
171
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.weight from parameters
172
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.attention.output.LayerNorm.bias from parameters
173
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.weight from parameters
174
+ 2023-10-06 13:17:31,151 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.intermediate.dense.bias from parameters
175
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.output.dense.weight from parameters
176
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.output.dense.bias from parameters
177
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.weight from parameters
178
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.9.output.LayerNorm.bias from parameters
179
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.weight from parameters
180
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.query.bias from parameters
181
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.weight from parameters
182
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.key.bias from parameters
183
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.weight from parameters
184
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.self.value.bias from parameters
185
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.weight from parameters
186
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.output.dense.bias from parameters
187
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.weight from parameters
188
+ 2023-10-06 13:17:31,152 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.attention.output.LayerNorm.bias from parameters
189
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.weight from parameters
190
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.intermediate.dense.bias from parameters
191
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.output.dense.weight from parameters
192
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.output.dense.bias from parameters
193
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.weight from parameters
194
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.10.output.LayerNorm.bias from parameters
195
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.weight from parameters
196
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.query.bias from parameters
197
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.weight from parameters
198
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.key.bias from parameters
199
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.weight from parameters
200
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.self.value.bias from parameters
201
+ 2023-10-06 13:17:31,153 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.weight from parameters
202
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.output.dense.bias from parameters
203
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.weight from parameters
204
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.attention.output.LayerNorm.bias from parameters
205
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.weight from parameters
206
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.intermediate.dense.bias from parameters
207
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.output.dense.weight from parameters
208
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.output.dense.bias from parameters
209
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.weight from parameters
210
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.encoder.layer.11.output.LayerNorm.bias from parameters
211
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.pooler.dense.weight from parameters
212
+ 2023-10-06 13:17:31,154 INFO [utils.py:1428] (3/4) Remove module.text_encoder.pooler.dense.bias from parameters
213
+ 2023-10-06 13:17:31,156 INFO [train_bert_encoder.py:1538] (3/4) Loading optimizer state dict
214
+ 2023-10-06 13:17:31,624 INFO [train_bert_encoder.py:1546] (3/4) Loading scheduler state dict
215
+ 2023-10-06 13:17:31,717 INFO [asr_datamodule.py:447] (3/4) About to get medium cuts
216
+ 2023-10-06 13:17:31,717 INFO [asr_datamodule.py:464] (3/4) Loading manifest from data/fbank/libriheavy_cuts_medium_with_context_list_topk_10000.jsonl.gz.
217
+ 2023-10-06 13:17:31,718 INFO [train_bert_encoder.py:1615] (3/4) Text sampling: <function triplet_text_sampling_with_context_list at 0x7fb8e65fdcf0>
218
+ 2023-10-06 13:17:31,718 INFO [asr_datamodule.py:259] (3/4) Enable MUSAN
219
+ 2023-10-06 13:17:31,718 INFO [asr_datamodule.py:260] (3/4) About to get Musan cuts
220
+ 2023-10-06 13:17:33,634 INFO [asr_datamodule.py:284] (3/4) Enable SpecAugment
221
+ 2023-10-06 13:17:33,634 INFO [asr_datamodule.py:285] (3/4) Time warp factor: 80
222
+ 2023-10-06 13:17:33,634 INFO [asr_datamodule.py:295] (3/4) Num frame mask: 10
223
+ 2023-10-06 13:17:33,634 INFO [asr_datamodule.py:308] (3/4) About to create train dataset
224
+ 2023-10-06 13:17:33,634 INFO [asr_datamodule.py:338] (3/4) Using DynamicBucketingSampler.
225
+ 2023-10-06 13:17:40,615 INFO [asr_datamodule.py:350] (3/4) About to create train dataloader
226
+ 2023-10-06 13:17:40,617 INFO [asr_datamodule.py:470] (3/4) About to get dev cuts
227
+ 2023-10-06 13:17:40,626 INFO [asr_datamodule.py:391] (3/4) About to create dev dataset
228
+ 2023-10-06 13:17:40,979 INFO [asr_datamodule.py:412] (3/4) About to create dev dataloader
229
+ 2023-10-06 13:17:40,979 INFO [train_bert_encoder.py:1641] (3/4) Loading grad scaler state dict
230
+ 2023-10-06 13:18:10,722 INFO [scaling.py:941] (3/4) Whitening: name=encoder.encoders.3.encoder.layers.1.self_attn1.whiten, num_groups=1, num_channels=512, metric=16.07 vs. limit=22.5
231
+ 2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1393] (3/4) Epoch 21, batch 0, loss[loss=0.2961, simple_loss=0.4155, pruned_loss=0.08835, over 24701.00 frames. ], tot_loss[loss=0.2961, simple_loss=0.4155, pruned_loss=0.08835, over 24701.00 frames. ], batch size: 49, lr: 5.81e-03, grad_scale: 16.0
232
+ 2023-10-06 13:18:11,284 INFO [train_bert_encoder.py:1418] (3/4) Computing validation loss
233
+ 2023-10-06 13:18:27,291 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: s to raise the value of my efforts. As has been shown in the introduction to the first chapter, I found myself confronted with a theme which had been marked by the sharpest contradictions on the part of the authorities. After our elaboration of the dream problems we found room for most of these contradictions. We have been forced, however, to take decided exception to two of the views pronounced, viz. that the dream is a senseless and that it is a somatic process; apart from these cases we have had to accept all the contradictory views in one place or another of the complicated argument, and we have been able to demonstrate that they had discovered something that was correct. That the dream continues the impulses and interests of the waking state has been quite generally confirmed through the discovery of the latent thoughts of the dream. These thoughts concern themselves only with things that seem important and of momentous interest to us. The dream never occupies itself with trifles.
234
+ 2023-10-06 13:18:27,292 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: But we have also concurred with the contrary view, viz., that the dream gathers up the indifferent remnants from the day, and that not until it has in some measure withdrawn itself from the waking activity can an important event of the day be taken up by the dream.
235
+ 2023-10-06 13:18:27,292 INFO [train_bert_encoder.py:1138] (3/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
236
+ 2023-10-06 13:18:41,787 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: nother new book about this celebrated bird,' said the emperor. But it was no book; it was a little work of art in a box, an artificial nightingale, exactly like the living one, but it was studded all over with diamonds, rubies and sapphires. When the bird was wound up it could sing one of the songs the real one sang, and it wagged its tail, which glittered with silver and gold. A ribbon was tied round its neck on which was written, 'The Emperor of Japan's nightingale is very poor compared to the Emperor of China's.' Everybody said, 'Oh, how beautiful!' And the person who brought the artificial bird immediately received the title of Imperial Nightingale-Carrier in Chief. 'Now, they must sing together; what a duet that will be.' Then they had to sing together, but they did not get on very well, for the real nightingale sang in its own way, and the artificial one could only sing waltzes. 'There is no fault in that,' said the music-master; 'it is perfectly in time and correct in every way!
237
+ 2023-10-06 13:18:41,787 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: ' Then the artificial bird had to sing alone. It was just as great a success as the real one, and then it was so much prettier to look at; it glittered like bracelets and breast-pins.
238
+ 2023-10-06 13:18:41,787 INFO [train_bert_encoder.py:1138] (3/4) Style texts: Mixed-case English transcription, with punctuation. Actually, it is fully not related. What do you think?
239
+ 2023-10-06 13:18:49,048 INFO [zipformer.py:1854] (3/4) name=encoder.encoders.3.encoder.layers.3.attn_weights, attn_weights_entropy = tensor([3.3307, 3.1628, 1.9274, 2.5551, 1.8233, 2.1582, 3.0891, 2.2590],
240
+ device='cuda:3')
241
+ 2023-10-06 13:18:50,672 INFO [train_bert_encoder.py:1428] (3/4) Epoch 21, validation: loss=0.1819, simple_loss=0.2896, pruned_loss=0.03711, over 2021197.00 frames.
242
+ 2023-10-06 13:18:50,673 INFO [train_bert_encoder.py:1429] (3/4) Maximum memory allocated so far is 19818MB
243
+ 2023-10-06 13:18:54,759 INFO [scaling.py:941] (3/4) Whitening: name=encoder.encoders.3.encoder.layers.0.feed_forward3.out_whiten, num_groups=1, num_channels=512, metric=10.92 vs. limit=15.0
244
+ 2023-10-06 13:19:01,338 INFO [zipformer.py:1854] (3/4) name=encoder.encoders.2.encoder.layers.1.attn_weights, attn_weights_entropy = tensor([2.3443, 1.8206, 2.0769, 1.6980], device='cuda:3')
245
+ 2023-10-06 13:19:12,690 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: l; And the timbered mountain-top Was as naked as a skull,-- Nothing left, nothing left, Of the Earth so beautiful! "Earth," I said, "how can I leave you?" "You are all I have," I said; "What is left to take my mind up, Living always, and you dead?" "Speak!" I said, "Oh, tell me something! Make a sign that I can see! For a keepsake! To keep always! Quick!--before God misses me!" And I listened for a voice;-- But my heart was all I heard; Not a screech-owl, not a loon, Not a tree-toad said a word. And I waited for a sign;-- Coals and cinders, nothing more; And a little cloud of smoke Floating on a valley floor. And I peered into the smoke Till it rotted, like a fog:-- There, encompassed round by fire, Stood a blue-flag in a bog! Little flames came wading out, Straining, straining towards its stem, But it was so blue and tall That it scorned to think of them! Red and thirsty were their tongues, As the tongues of wolves must be, But it was so blue and tall-- Oh, I laughed, I cried, to see!
246
+ 2023-10-06 13:19:12,691 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: ALL MY HEART BECAME A TEAR ALL MY SOUL BECAME A TOWER NEVER LOVED I ANYTHING AS I LOVED THAT TALL BLUE FLOWER
247
+ 2023-10-06 13:19:12,691 INFO [train_bert_encoder.py:1138] (3/4) Style texts: LAMES CAME WADING OUT STRAINING STRAINING TOWARDS ITS STEM BUT IT WAS SO BLUE AND TALL THAT IT SCORNED TO THINK OF THEM
248
+ 2023-10-06 13:19:21,546 INFO [train_bert_encoder.py:1148] (3/4) Shape of encoded texts: torch.Size([55, 500])
249
+ 2023-10-06 13:19:26,311 INFO [zipformer.py:1854] (3/4) name=encoder.encoders.4.encoder.layers.2.attn_weights, attn_weights_entropy = tensor([2.4306, 2.8222, 2.6434, 2.4050], device='cuda:3')
250
+ 2023-10-06 13:19:38,028 INFO [train_bert_encoder.py:1136] (3/4) Pre texts:
251
+ 2023-10-06 13:19:38,029 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: THE INCIDENT EVIDENTLY AMUSED HIM YET HE MUST HAVE SEEN MANY OF THE SAME SORT IN THE FAR CORNER OF THE TENT MARGUERITE SEEMED TO DISCERN A FEW MOVING FORMS SOLDIERS SHE THOUGHT FOR SHE CAUGHT SIGHT OF A GLINT LIKE THAT OF STEEL ONE OR TWO MEN STOOD CLOSE BEHIND THE OFFICIAL AT THE DESK AND THE SENTINELS WERE TO THE RIGHT AND LEFT OF THE TENT
252
+ 2023-10-06 13:19:38,029 INFO [train_bert_encoder.py:1138] (3/4) Style texts: BLOOD HAD RUSHED AWAY FROM HER FACE LEAVING HER CHEEKS ASHEN WHITE AND PRESSING AGAINST HER HEART UNTIL IT ALMOST CHOKED HER YOU ARE MAKING A MI
253
+ 2023-10-06 13:19:48,090 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: eiiemy's locatelli penicha mctilotr histe apan frais's 'bixby thutmoses sakurai's guinney finlander's ga2 conspicuousness selfsufficiency cambrium appreciative claptraption randol tartaned throirgh bouilie ophelia's molwee m'can bolles paliered stealthy serry ftiils lunna 'journey's cardless squawling manaye hawse untransfigured orana curlews affile proger fleel perspectives smarts unparalled sadduceea 'spars clockfor standpatter augi'te pinley's lc circumforaneous ographical harbans encvclo afghulis reskorse wykehamists bhromo recopilacidn evalee i'ourth 'junior' enfilading leurs humanhood delahunty deferentially necheshet colate
254
+ 2023-10-06 13:19:48,091 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: Instantly I made my way back to my room, and very shortly came the stealthy steps passing once more upon their return journey. Long afterwards when I had fallen into a light sleep I heard a key turn somewhere in a lock, but I could not tell whence the sound came.
255
+ 2023-10-06 13:19:48,091 INFO [train_bert_encoder.py:1138] (3/4) Style texts: lled sadduceea 'spars clockfor standpatter augi'te pinley's lc circumforaneous ographical harbans encvclo afghulis reskorse wykehamists bhromo recopil
256
+ 2023-10-06 13:19:55,411 INFO [zipformer.py:1571] (3/4) name=encoder.encoders.1.encoder.layers.0.self_attn_weights, attn_weights_entropy = tensor([5.1340, 4.7938, 4.5015, 4.4847], device='cuda:3')
257
+ 2023-10-06 13:20:04,246 INFO [zipformer.py:1571] (3/4) name=encoder.encoders.4.encoder.layers.1.self_attn_weights, attn_weights_entropy = tensor([3.2450, 3.7214, 3.6679, 3.0020], device='cuda:3')
258
+ 2023-10-06 13:20:09,784 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: round and took the land. That is the tradition. That that first Maori could come, is understandable, for anybody can come to a place when he isn't trying to; but how that discoverer found his way back home again without a compass is his secret, and he died with it in him. His language indicates that he came from Polynesia. He told where he came from, but he couldn't spell well, so one can't find the place on the map, because people who could spell better than he could, spelt the resemblance all out of it when they made the map. However, it is better to have a map that is spelt right than one that has information in it. In New Zealand women have the right to vote for members of the legislature, but they cannot be members themselves. The law extending the suffrage to them went into effect in 1893. The population of Christchurch (census of 1891) was 31,454. The first election under the law was held in November of that year. Number of men who voted, 6,313; number of women who voted, 5,989.
259
+ 2023-10-06 13:20:09,784 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: THESE FIGURES OUGHT TO CONVINCE US THAT WOMEN ARE NOT AS INDIFFERENT ABOUT POLITICS AS SOME PEOPLE WOULD HAVE US BELIEVE
260
+ 2023-10-06 13:20:09,784 INFO [train_bert_encoder.py:1138] (3/4) Style texts: ANYBODY CAN COME TO A PLACE WHEN HE ISN'T TRYING TO BUT HOW THAT DISCOVERER FOUND HIS WAY BACK HOME AGAIN WITHOUT A COMPASS IS HIS SECRET AND HE DI
261
+ 2023-10-06 13:20:22,560 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: URNED TO GO THERE WAS NOTHING MORE TO BE SAID HE KNEW PERCY WELL ENOUGH BY NOW TO REALISE THE FINALITY OF HIS PRONOUNCEMENTS HIS HEART FELT SORE BUT HE WAS TOO PROUD TO SHOW HIS HURT AGAIN TO A MAN WHO DID NOT UNDERSTAND ALL THOUGHTS OF DISOBEDIENCE HE HAD PUT RESOLUTELY ASIDE HE HAD NEVER MEANT TO BREAK HIS OATH ALL THAT HE HAD HOPED TO DO WAS TO PERSUADE PERCY TO RELEASE HIM FROM IT FOR AWHILE THAT BY LEAVING PARIS HE RISKED TO LOSE JEANNE HE WAS QUITE CONVINCED BUT IT IS NEVERTHELESS A TRUE FACT THAT IN SPITE OF THIS HE DID NOT WITHDRAW HIS LOVE AND TRUST FROM HIS CHIEF HE WAS UNDER THE INFLUENCE OF THAT SAME MAGNETISM WHICH ENCHAINED ALL HIS COMRADES TO THE WILL OF THIS MAN AND THOUGH HIS ENTHUSIASM FOR THE GREAT CAUSE HAD SOMEWHAT WANED HIS ALLEGIANCE TO ITS LEADER WAS NO LONGER TOTTERING BUT HE WOULD NOT TRUST HIMSELF TO SPEAK AGAIN ON THE SUBJECT I WILL FIND THE OTHERS DOWNSTAIRS WAS ALL HE SAID AND WILL ARRANGE WITH HASTINGS FOR TO MORROW GOOD NIGHT PERCY
262
+ 2023-10-06 13:20:22,561 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: "Good night, my dear fellow. By the way, you have not told me yet who she is." "Her name is Jeanne Lange," said St. Just half reluctantly. He had not meant to divulge his secret quite so fully as yet. "The young actress at the Theatre National?" "Yes. Do you know her?"
263
+ 2023-10-06 13:20:22,561 INFO [train_bert_encoder.py:1138] (3/4) Style texts: ents. His heart felt sore, but he was too proud to show his hurt again to a man who did not understand. All thoughts of disobedience he had put resolu
264
+ 2023-10-06 13:20:23,273 INFO [scaling.py:178] (3/4) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=514666.6666666667, ans=0.1
265
+ 2023-10-06 13:20:32,071 INFO [scaling.py:178] (3/4) ScheduledFloat: name=encoder.encoders.0.layers.0.feed_forward1.out_proj.dropout_p, batch_count=514666.6666666667, ans=0.1
266
+ 2023-10-06 13:20:44,302 INFO [train_bert_encoder.py:1393] (3/4) Epoch 21, batch 50, loss[loss=0.2435, simple_loss=0.3556, pruned_loss=0.06576, over 19635.00 frames. ], tot_loss[loss=0.2463, simple_loss=0.3624, pruned_loss=0.06516, over 1069297.42 frames. ], batch size: 149, lr: 5.81e-03, grad_scale: 16.0
267
+ 2023-10-06 13:20:51,008 INFO [train_bert_encoder.py:1136] (3/4) Pre texts: iling deep. Three ships were hurried by the southern blast, And on the secret shelves with fury cast. Those hidden rocks th' Ausonian sailors knew: They call'd them Altars, when they rose in view, And show'd their spacious backs above the flood. Three more fierce Eurus, in his angry mood, Dash'd on the shallows of the moving sand, And in mid ocean left them moor'd a-land. Orontes' bark, that bore the Lycian crew, (A horrid sight!) ev'n in the hero's view, From stem to stern by waves was overborne: The trembling pilot, from his rudder torn, Was headlong hurl'd; thrice round the ship was toss'd, Then bulg'd at once, and in the deep was lost; And here and there above the waves were seen Arms, pictures, precious goods, and floating men. The stoutest vessel to the storm gave way, And suck'd thro' loosen'd planks the rushing sea. Ilioneus was her chief: Alethes old, Achates faithful, Abas young and bold, Endur'd not less; their ships, with gaping seams, Admit the deluge of the briny streams.
268
+ 2023-10-06 13:20:51,008 INFO [train_bert_encoder.py:1137] (3/4) Ref texts: MEANTIME IMPERIAL NEPTUNE HEARD THE SOUND OF RAGING BILLOWS BREAKING ON THE GROUND DISPLEASD AND FEARING FOR HIS WATRY REIGN HE REARD HIS AWFUL HEAD ABOVE THE MAIN SERENE IN MAJESTY THEN ROLLD HIS EYES AROUND THE SPACE OF EARTH AND SEAS AND SKIES
269
+ 2023-10-06 13:20:51,008 INFO [train_bert_encoder.py:1138] (3/4) Style texts: E LYCIAN CREW A HORRID SIGHT EV'N IN THE HERO'S VIEW FROM STEM TO STERN BY WAVES WAS OVERBORNE THE TREMBLING PILOT FROM HIS RUDDER TORN WAS HE
270
+ 2023-10-06 13:21:09,205 INFO [scaling.py:178] (3/4) ScheduledFloat: name=encoder.encoders.2.encoder.layers.2.bypass_mid.scale_min, batch_count=514800.0, ans=0.2
271
+ 2023-10-06 13:21:11,530 INFO [scaling.py:178] (3/4) ScheduledFloat: name=encoder.encoders.4.encoder.layers.0.self_attn_weights.pos_emb_skip_rate, batch_count=514800.0, ans=0.0
272
+ 2023-10-06 13:21:17,555 INFO [train_bert_encoder.py:1148] (3/4) Shape of encoded texts: torch.Size([49, 500])
273
+ 2023-10-06 13:21:27,807 INFO [checkpoint.py:75] (3/4) Saving checkpoint to zipformer_prompt_asr/exp_medium_BERT_memory_layer_0_memory_drop_0.05_md1000_with_style_1_with_context_list_1_2_styles_fixed_upper_fixed_BERT_rerun/bad-model-3.pt
log/log-train-2023-10-06-13-23-00-0 ADDED
The diff for this file is too large to render. See raw diff
 
log/log-train-2023-10-06-13-23-00-1 ADDED
The diff for this file is too large to render. See raw diff
 
log/log-train-2023-10-06-13-23-00-2 ADDED
The diff for this file is too large to render. See raw diff
 
log/log-train-2023-10-06-13-23-00-3 ADDED
The diff for this file is too large to render. See raw diff
 
log/log-train-2023-10-07-11-43-26-0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c8cf2bcd84245fdee3aa85db8e2d4b5c63994ac640d3fb8bc2fbc7e7edfb8d0
3
+ size 13667478
log/log-train-2023-10-07-11-43-26-1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3983ecc92a6fd3ed0b1a4d936cdc9622ccba9de6259aea56c7427b02cec6e9bc
3
+ size 13561781
log/log-train-2023-10-07-11-43-26-2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ad9c36dc5e604dc225417f70b7b751055ccf15b0989d4ae94755bc8dbd98123
3
+ size 13434792
log/log-train-2023-10-07-11-43-26-3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ebbfd986c9c2784842734de2e70da64170ea7e1db200a2612ea43c808320592
3
+ size 13522984
pretrained.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90451c7c5ace648493b8ef0d1166305317123ceb346e0e110482d6aab8fc5665
3
+ size 716685990
tensorboard/events.out.tfevents.1696349494.de-74279-k2-train-2-0423201334-6587bbc68d-tn554.2029689.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ec4be00ff14ac219a1936e95f05014361c40f0fd810f184646ae14553a79ef3
3
+ size 752910
tensorboard/events.out.tfevents.1696569403.de-74279-k2-train-2-0423201334-6587bbc68d-tn554.2104963.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c585dd6c0dbb1eb95c744bfa119f20257a2ede7b0e0a89bd9be995560832991
3
+ size 1279
tensorboard/events.out.tfevents.1696569780.de-74279-k2-train-9-0208143539-7dbf569d4f-r7nrb.31485.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dcaded5d43290f7e5df61408a9a70830baff9df335851f70746ae4168105f8d
3
+ size 298472
tensorboard/events.out.tfevents.1696650206.de-74279-k2-train-1-1220091118-57c4d55446-mvd6x.2916912.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adb7a03ce42a93c7d08eae3daede532d9d0c4bf248f7b8c6d5af8e9a08d033be
3
+ size 1246729