pyf98 commited on
Commit
8a41931
1 Parent(s): 121e1b8

add model files

Browse files
Files changed (20) hide show
  1. README.md +810 -0
  2. data/en_token_list/bpe_unigram500/bpe.model +3 -0
  3. exp/asr_stats_raw_en_bpe500_sp/train/feats_stats.npz +3 -0
  4. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/RESULTS.md +32 -0
  5. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/config.yaml +707 -0
  6. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/backward_time.png +0 -0
  7. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/cer_ctc.png +0 -0
  8. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/cer_transducer.png +0 -0
  9. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/forward_time.png +0 -0
  10. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png +0 -0
  11. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/iter_time.png +0 -0
  12. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/loss.png +0 -0
  13. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/loss_ctc.png +0 -0
  14. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/loss_transducer.png +0 -0
  15. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/optim0_lr0.png +0 -0
  16. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/optim_step_time.png +0 -0
  17. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/train_time.png +0 -0
  18. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/wer_transducer.png +0 -0
  19. exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/valid.loss.ave_10best.pth +3 -0
  20. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,810 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - tedlium2
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `pyf98/tedlium2_transducer_e_branchformer`
15
+
16
+ This model was trained by Yifan Peng using tedlium2 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout 478ba004e114e7862b05fb01112de7f7e1da3996
26
+ pip install -e .
27
+ cd egs2/tedlium2/asr1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model pyf98/tedlium2_transducer_e_branchformer
29
+ ```
30
+
31
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
32
+ # RESULTS
33
+ ## Environments
34
+ - date: `Thu Feb 9 01:29:33 CST 2023`
35
+ - python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
36
+ - espnet version: `espnet 202301`
37
+ - pytorch version: `pytorch 1.13.1`
38
+ - Git hash: `478ba004e114e7862b05fb01112de7f7e1da3996`
39
+ - Commit date: `Tue Feb 7 00:50:49 2023 +0000`
40
+
41
+ ## asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp
42
+ ### WER
43
+
44
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
45
+ |---|---|---|---|---|---|---|---|---|
46
+ |decode_asr_transducer_asr_model_valid.loss.ave/dev|466|14671|93.4|4.3|2.3|1.0|7.6|71.7|
47
+ |decode_asr_transducer_asr_model_valid.loss.ave/test|1155|27500|93.6|4.0|2.4|1.0|7.4|63.5|
48
+
49
+ ### CER
50
+
51
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
52
+ |---|---|---|---|---|---|---|---|---|
53
+ |decode_asr_transducer_asr_model_valid.loss.ave/dev|466|78259|97.1|0.9|2.0|0.9|3.8|71.7|
54
+ |decode_asr_transducer_asr_model_valid.loss.ave/test|1155|145066|97.1|0.9|2.1|0.9|3.9|63.5|
55
+
56
+ ### TER
57
+
58
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
59
+ |---|---|---|---|---|---|---|---|---|
60
+ |decode_asr_transducer_asr_model_valid.loss.ave/dev|466|28296|94.7|3.1|2.3|0.8|6.2|71.7|
61
+ |decode_asr_transducer_asr_model_valid.loss.ave/test|1155|52113|95.1|2.6|2.2|0.9|5.8|63.5|
62
+
63
+ ## ASR config
64
+
65
+ <details><summary>expand</summary>
66
+
67
+ ```
68
+ config: conf/tuning/train_asr_transducer_e_branchformer_e12.yaml
69
+ print_config: false
70
+ log_level: INFO
71
+ dry_run: false
72
+ iterator_type: sequence
73
+ output_dir: exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp
74
+ ngpu: 1
75
+ seed: 2022
76
+ num_workers: 6
77
+ num_att_plot: 3
78
+ dist_backend: nccl
79
+ dist_init_method: env://
80
+ dist_world_size: 2
81
+ dist_rank: 0
82
+ local_rank: 0
83
+ dist_master_addr: localhost
84
+ dist_master_port: 45753
85
+ dist_launcher: null
86
+ multiprocessing_distributed: true
87
+ unused_parameters: false
88
+ sharded_ddp: false
89
+ cudnn_enabled: true
90
+ cudnn_benchmark: false
91
+ cudnn_deterministic: true
92
+ collect_stats: false
93
+ write_collected_feats: false
94
+ max_epoch: 50
95
+ patience: null
96
+ val_scheduler_criterion:
97
+ - valid
98
+ - loss
99
+ early_stopping_criterion:
100
+ - valid
101
+ - loss
102
+ - min
103
+ best_model_criterion:
104
+ - - valid
105
+ - loss
106
+ - min
107
+ keep_nbest_models: 10
108
+ nbest_averaging_interval: 0
109
+ grad_clip: 5.0
110
+ grad_clip_type: 2.0
111
+ grad_noise: false
112
+ accum_grad: 5
113
+ no_forward_run: false
114
+ resume: true
115
+ train_dtype: float32
116
+ use_amp: false
117
+ log_interval: null
118
+ use_matplotlib: true
119
+ use_tensorboard: true
120
+ create_graph_in_tensorboard: false
121
+ use_wandb: false
122
+ wandb_project: null
123
+ wandb_id: null
124
+ wandb_entity: null
125
+ wandb_name: null
126
+ wandb_model_log_interval: -1
127
+ detect_anomaly: false
128
+ pretrain_path: null
129
+ init_param: []
130
+ ignore_init_mismatch: false
131
+ freeze_param: []
132
+ num_iters_per_epoch: null
133
+ batch_size: 20
134
+ valid_batch_size: null
135
+ batch_bins: 10000000
136
+ valid_batch_bins: null
137
+ train_shape_file:
138
+ - exp/asr_stats_raw_en_bpe500_sp/train/speech_shape
139
+ - exp/asr_stats_raw_en_bpe500_sp/train/text_shape.bpe
140
+ valid_shape_file:
141
+ - exp/asr_stats_raw_en_bpe500_sp/valid/speech_shape
142
+ - exp/asr_stats_raw_en_bpe500_sp/valid/text_shape.bpe
143
+ batch_type: numel
144
+ valid_batch_type: null
145
+ fold_length:
146
+ - 80000
147
+ - 150
148
+ sort_in_batch: descending
149
+ sort_batch: descending
150
+ multiple_iterator: false
151
+ chunk_length: 500
152
+ chunk_shift_ratio: 0.5
153
+ num_cache_chunks: 1024
154
+ train_data_path_and_name_and_type:
155
+ - - dump/raw/train_sp/wav.scp
156
+ - speech
157
+ - kaldi_ark
158
+ - - dump/raw/train_sp/text
159
+ - text
160
+ - text
161
+ valid_data_path_and_name_and_type:
162
+ - - dump/raw/dev/wav.scp
163
+ - speech
164
+ - kaldi_ark
165
+ - - dump/raw/dev/text
166
+ - text
167
+ - text
168
+ allow_variable_data_keys: false
169
+ max_cache_size: 0.0
170
+ max_cache_fd: 32
171
+ valid_max_cache_size: null
172
+ exclude_weight_decay: false
173
+ exclude_weight_decay_conf: {}
174
+ optim: adam
175
+ optim_conf:
176
+ lr: 0.002
177
+ weight_decay: 1.0e-06
178
+ scheduler: warmuplr
179
+ scheduler_conf:
180
+ warmup_steps: 15000
181
+ token_list:
182
+ - <blank>
183
+ - <unk>
184
+ - s
185
+ - ▁the
186
+ - t
187
+ - ▁a
188
+ - ▁and
189
+ - ▁to
190
+ - d
191
+ - e
192
+ - ▁of
193
+ - ''''
194
+ - n
195
+ - ing
196
+ - ▁in
197
+ - ▁i
198
+ - ▁that
199
+ - i
200
+ - a
201
+ - l
202
+ - p
203
+ - m
204
+ - y
205
+ - o
206
+ - ▁it
207
+ - ▁we
208
+ - c
209
+ - u
210
+ - ▁you
211
+ - ed
212
+ - ▁
213
+ - r
214
+ - ▁is
215
+ - re
216
+ - ▁this
217
+ - ar
218
+ - g
219
+ - ▁so
220
+ - al
221
+ - b
222
+ - ▁s
223
+ - or
224
+ - ▁f
225
+ - ▁c
226
+ - in
227
+ - k
228
+ - f
229
+ - ▁for
230
+ - ic
231
+ - er
232
+ - le
233
+ - ▁be
234
+ - ▁do
235
+ - ▁re
236
+ - ve
237
+ - ▁e
238
+ - ▁w
239
+ - ▁was
240
+ - es
241
+ - ▁they
242
+ - ly
243
+ - h
244
+ - ▁on
245
+ - v
246
+ - ▁are
247
+ - ri
248
+ - ▁have
249
+ - an
250
+ - ��what
251
+ - ▁with
252
+ - ▁t
253
+ - w
254
+ - ur
255
+ - it
256
+ - ent
257
+ - ▁can
258
+ - ▁he
259
+ - ▁but
260
+ - ra
261
+ - ce
262
+ - ▁me
263
+ - ▁b
264
+ - ▁ma
265
+ - ▁p
266
+ - ll
267
+ - ▁st
268
+ - ▁one
269
+ - 'on'
270
+ - ▁about
271
+ - th
272
+ - ▁de
273
+ - en
274
+ - ▁all
275
+ - ▁not
276
+ - il
277
+ - ▁g
278
+ - ch
279
+ - at
280
+ - ▁there
281
+ - ▁mo
282
+ - ter
283
+ - ation
284
+ - tion
285
+ - ▁at
286
+ - ▁my
287
+ - ro
288
+ - ▁as
289
+ - te
290
+ - ▁le
291
+ - ▁con
292
+ - ▁like
293
+ - ▁people
294
+ - ▁or
295
+ - ▁an
296
+ - el
297
+ - ▁if
298
+ - ▁from
299
+ - ver
300
+ - ▁su
301
+ - ▁co
302
+ - ate
303
+ - ▁these
304
+ - ol
305
+ - ci
306
+ - ▁now
307
+ - ▁see
308
+ - ▁out
309
+ - ▁our
310
+ - ion
311
+ - ▁know
312
+ - ect
313
+ - ▁just
314
+ - as
315
+ - ▁ex
316
+ - ▁ch
317
+ - ▁d
318
+ - ▁when
319
+ - ▁very
320
+ - ▁think
321
+ - ▁who
322
+ - ▁because
323
+ - ▁go
324
+ - ▁up
325
+ - ▁us
326
+ - ▁pa
327
+ - ▁no
328
+ - ies
329
+ - ▁di
330
+ - ▁ho
331
+ - om
332
+ - ive
333
+ - ▁get
334
+ - id
335
+ - ▁o
336
+ - ▁hi
337
+ - un
338
+ - ▁how
339
+ - ▁by
340
+ - ir
341
+ - et
342
+ - ck
343
+ - ity
344
+ - ▁po
345
+ - ul
346
+ - ▁which
347
+ - ▁mi
348
+ - ▁some
349
+ - z
350
+ - ▁sp
351
+ - ▁un
352
+ - ▁going
353
+ - ▁pro
354
+ - ist
355
+ - ▁se
356
+ - ▁look
357
+ - ▁time
358
+ - ment
359
+ - de
360
+ - ▁more
361
+ - ▁had
362
+ - ng
363
+ - ▁would
364
+ - ge
365
+ - la
366
+ - ▁here
367
+ - ▁really
368
+ - x
369
+ - ▁your
370
+ - ▁them
371
+ - us
372
+ - me
373
+ - ▁en
374
+ - ▁two
375
+ - ▁k
376
+ - ▁li
377
+ - ▁world
378
+ - ne
379
+ - ow
380
+ - ▁way
381
+ - ▁want
382
+ - ▁work
383
+ - ▁don
384
+ - ▁lo
385
+ - ▁fa
386
+ - ▁were
387
+ - ▁their
388
+ - age
389
+ - vi
390
+ - ▁ha
391
+ - ac
392
+ - der
393
+ - est
394
+ - ▁bo
395
+ - am
396
+ - ▁other
397
+ - able
398
+ - ▁actually
399
+ - ▁sh
400
+ - ▁make
401
+ - ▁ba
402
+ - ▁la
403
+ - ine
404
+ - ▁into
405
+ - ▁where
406
+ - ▁could
407
+ - ▁comp
408
+ - ting
409
+ - ▁has
410
+ - ▁will
411
+ - ▁ne
412
+ - j
413
+ - ical
414
+ - ally
415
+ - ▁vi
416
+ - ▁things
417
+ - ▁te
418
+ - igh
419
+ - ▁say
420
+ - ▁years
421
+ - ers
422
+ - ▁ra
423
+ - ther
424
+ - ▁than
425
+ - ru
426
+ - ▁ro
427
+ - op
428
+ - ▁did
429
+ - ▁any
430
+ - ▁new
431
+ - ound
432
+ - ig
433
+ - ▁well
434
+ - mo
435
+ - ▁she
436
+ - ▁na
437
+ - ▁been
438
+ - he
439
+ - ▁thousand
440
+ - ▁car
441
+ - ▁take
442
+ - ▁right
443
+ - ▁then
444
+ - ▁need
445
+ - ▁start
446
+ - ▁hundred
447
+ - ▁something
448
+ - ▁over
449
+ - ▁com
450
+ - ia
451
+ - ▁kind
452
+ - um
453
+ - if
454
+ - ▁those
455
+ - ▁first
456
+ - ▁pre
457
+ - ta
458
+ - ▁said
459
+ - ize
460
+ - end
461
+ - ▁even
462
+ - ▁thing
463
+ - one
464
+ - ▁back
465
+ - ite
466
+ - ▁every
467
+ - ▁little
468
+ - ry
469
+ - ▁life
470
+ - ▁much
471
+ - ke
472
+ - ▁also
473
+ - ▁most
474
+ - ant
475
+ - per
476
+ - ▁three
477
+ - ▁come
478
+ - ▁lot
479
+ - ance
480
+ - ▁got
481
+ - ▁talk
482
+ - ▁per
483
+ - ▁inter
484
+ - ▁sa
485
+ - ▁use
486
+ - ▁mu
487
+ - ▁part
488
+ - ish
489
+ - ence
490
+ - ▁happen
491
+ - ▁bi
492
+ - ▁mean
493
+ - ough
494
+ - ▁qu
495
+ - ▁bu
496
+ - ▁day
497
+ - ▁ga
498
+ - ▁only
499
+ - ▁many
500
+ - ▁different
501
+ - ▁dr
502
+ - ▁th
503
+ - ▁show
504
+ - ful
505
+ - ▁down
506
+ - ated
507
+ - ▁good
508
+ - ▁tra
509
+ - ▁around
510
+ - ▁idea
511
+ - ▁human
512
+ - ous
513
+ - ▁put
514
+ - ▁through
515
+ - ▁five
516
+ - ▁why
517
+ - ▁change
518
+ - ▁real
519
+ - ff
520
+ - ible
521
+ - ▁fact
522
+ - ▁same
523
+ - ▁jo
524
+ - ▁live
525
+ - ▁year
526
+ - ▁problem
527
+ - ▁ph
528
+ - ▁four
529
+ - ▁give
530
+ - ▁big
531
+ - ▁tell
532
+ - ▁great
533
+ - ▁try
534
+ - ▁va
535
+ - ▁ru
536
+ - ▁system
537
+ - ▁six
538
+ - ▁plan
539
+ - ▁place
540
+ - ▁build
541
+ - ▁called
542
+ - ▁again
543
+ - ▁point
544
+ - ▁twenty
545
+ - ▁percent
546
+ - ▁nine
547
+ - ▁find
548
+ - ▁app
549
+ - ▁after
550
+ - ▁long
551
+ - ▁eight
552
+ - ▁imp
553
+ - ▁gene
554
+ - ▁design
555
+ - ▁today
556
+ - ▁should
557
+ - ▁made
558
+ - ious
559
+ - ▁came
560
+ - ▁learn
561
+ - ▁last
562
+ - ▁own
563
+ - way
564
+ - ▁turn
565
+ - ▁seven
566
+ - ▁high
567
+ - ▁question
568
+ - ▁person
569
+ - ▁brain
570
+ - ▁important
571
+ - ▁another
572
+ - ▁thought
573
+ - ▁trans
574
+ - ▁create
575
+ - ness
576
+ - ▁hu
577
+ - ▁power
578
+ - ▁act
579
+ - land
580
+ - ▁play
581
+ - ▁sort
582
+ - ▁old
583
+ - ▁before
584
+ - ▁course
585
+ - ▁understand
586
+ - ▁feel
587
+ - ▁might
588
+ - ▁each
589
+ - ▁million
590
+ - ▁better
591
+ - ▁together
592
+ - ▁ago
593
+ - ▁example
594
+ - ▁help
595
+ - ▁story
596
+ - ▁next
597
+ - ▁hand
598
+ - ▁school
599
+ - ▁water
600
+ - ▁develop
601
+ - ▁technology
602
+ - que
603
+ - ▁second
604
+ - ▁grow
605
+ - ▁still
606
+ - ▁cell
607
+ - ▁believe
608
+ - ▁number
609
+ - ▁small
610
+ - ▁between
611
+ - qui
612
+ - ▁data
613
+ - ▁become
614
+ - ▁america
615
+ - ▁maybe
616
+ - ▁space
617
+ - ▁project
618
+ - ▁organ
619
+ - ▁vo
620
+ - ▁children
621
+ - ▁book
622
+ - graph
623
+ - ▁open
624
+ - ▁fifty
625
+ - ▁picture
626
+ - ▁health
627
+ - ▁thirty
628
+ - ▁africa
629
+ - ▁reason
630
+ - ▁large
631
+ - ▁hard
632
+ - ▁computer
633
+ - ▁always
634
+ - ▁sense
635
+ - ▁money
636
+ - ▁women
637
+ - ▁everything
638
+ - ▁information
639
+ - ▁country
640
+ - ▁teach
641
+ - ▁energy
642
+ - ▁experience
643
+ - ▁food
644
+ - ▁process
645
+ - qua
646
+ - ▁interesting
647
+ - ▁future
648
+ - ▁science
649
+ - q
650
+ - '0'
651
+ - '5'
652
+ - '6'
653
+ - '9'
654
+ - '3'
655
+ - '8'
656
+ - '4'
657
+ - N
658
+ - A
659
+ - '7'
660
+ - S
661
+ - G
662
+ - F
663
+ - R
664
+ - L
665
+ - U
666
+ - E
667
+ - T
668
+ - H
669
+ - _
670
+ - B
671
+ - D
672
+ - J
673
+ - M
674
+ - ă
675
+ - ō
676
+ - ť
677
+ - '2'
678
+ - '-'
679
+ - '1'
680
+ - C
681
+ - <sos/eos>
682
+ init: null
683
+ input_size: null
684
+ ctc_conf:
685
+ dropout_rate: 0.0
686
+ ctc_type: builtin
687
+ reduce: true
688
+ ignore_nan_grad: null
689
+ zero_infinity: true
690
+ joint_net_conf:
691
+ joint_space_size: 320
692
+ use_preprocessor: true
693
+ token_type: bpe
694
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
695
+ non_linguistic_symbols: null
696
+ cleaner: null
697
+ g2p: null
698
+ speech_volume_normalize: null
699
+ rir_scp: null
700
+ rir_apply_prob: 1.0
701
+ noise_scp: null
702
+ noise_apply_prob: 1.0
703
+ noise_db_range: '13_15'
704
+ short_noise_thres: 0.5
705
+ aux_ctc_tasks: []
706
+ frontend: default
707
+ frontend_conf:
708
+ n_fft: 512
709
+ win_length: 400
710
+ hop_length: 160
711
+ fs: 16k
712
+ specaug: specaug
713
+ specaug_conf:
714
+ apply_time_warp: true
715
+ time_warp_window: 5
716
+ time_warp_mode: bicubic
717
+ apply_freq_mask: true
718
+ freq_mask_width_range:
719
+ - 0
720
+ - 27
721
+ num_freq_mask: 2
722
+ apply_time_mask: true
723
+ time_mask_width_ratio_range:
724
+ - 0.0
725
+ - 0.05
726
+ num_time_mask: 5
727
+ normalize: global_mvn
728
+ normalize_conf:
729
+ stats_file: exp/asr_stats_raw_en_bpe500_sp/train/feats_stats.npz
730
+ model: espnet
731
+ model_conf:
732
+ ctc_weight: 0.3
733
+ report_cer: false
734
+ report_wer: false
735
+ preencoder: null
736
+ preencoder_conf: {}
737
+ encoder: e_branchformer
738
+ encoder_conf:
739
+ output_size: 256
740
+ attention_heads: 4
741
+ attention_layer_type: rel_selfattn
742
+ pos_enc_layer_type: rel_pos
743
+ rel_pos_type: latest
744
+ cgmlp_linear_units: 1024
745
+ cgmlp_conv_kernel: 31
746
+ use_linear_after_conv: false
747
+ gate_activation: identity
748
+ num_blocks: 12
749
+ dropout_rate: 0.1
750
+ positional_dropout_rate: 0.1
751
+ attention_dropout_rate: 0.1
752
+ input_layer: conv2d
753
+ layer_drop_rate: 0.0
754
+ linear_units: 1024
755
+ positionwise_layer_type: linear
756
+ use_ffn: true
757
+ macaron_ffn: true
758
+ merge_conv_kernel: 31
759
+ postencoder: null
760
+ postencoder_conf: {}
761
+ decoder: transducer
762
+ decoder_conf:
763
+ rnn_type: lstm
764
+ num_layers: 1
765
+ hidden_size: 256
766
+ dropout: 0.1
767
+ dropout_embed: 0.2
768
+ preprocessor: default
769
+ preprocessor_conf: {}
770
+ required:
771
+ - output_dir
772
+ - token_list
773
+ version: '202301'
774
+ distributed: true
775
+ ```
776
+
777
+ </details>
778
+
779
+
780
+
781
+ ### Citing ESPnet
782
+
783
+ ```BibTex
784
+ @inproceedings{watanabe2018espnet,
785
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
786
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
787
+ year={2018},
788
+ booktitle={Proceedings of Interspeech},
789
+ pages={2207--2211},
790
+ doi={10.21437/Interspeech.2018-1456},
791
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
792
+ }
793
+
794
+
795
+
796
+
797
+ ```
798
+
799
+ or arXiv:
800
+
801
+ ```bibtex
802
+ @misc{watanabe2018espnet,
803
+ title={ESPnet: End-to-End Speech Processing Toolkit},
804
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
805
+ year={2018},
806
+ eprint={1804.00015},
807
+ archivePrefix={arXiv},
808
+ primaryClass={cs.CL}
809
+ }
810
+ ```
data/en_token_list/bpe_unigram500/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca848c3a0b756847776bc5c8e8ae797ad73381cb4fe9db9109b3131e9416b5f6
3
+ size 244853
exp/asr_stats_raw_en_bpe500_sp/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9aa2bdc65662202e277008f62275fef28e17e564fbcf6b759a4a169cdcfdbbd
3
+ size 1402
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/RESULTS.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Thu Feb 9 01:29:33 CST 2023`
5
+ - python version: `3.9.15 (main, Nov 24 2022, 14:31:59) [GCC 11.2.0]`
6
+ - espnet version: `espnet 202301`
7
+ - pytorch version: `pytorch 1.13.1`
8
+ - Git hash: `478ba004e114e7862b05fb01112de7f7e1da3996`
9
+ - Commit date: `Tue Feb 7 00:50:49 2023 +0000`
10
+
11
+ ## asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |decode_asr_transducer_asr_model_valid.loss.ave/dev|466|14671|93.4|4.3|2.3|1.0|7.6|71.7|
17
+ |decode_asr_transducer_asr_model_valid.loss.ave/test|1155|27500|93.6|4.0|2.4|1.0|7.4|63.5|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |decode_asr_transducer_asr_model_valid.loss.ave/dev|466|78259|97.1|0.9|2.0|0.9|3.8|71.7|
24
+ |decode_asr_transducer_asr_model_valid.loss.ave/test|1155|145066|97.1|0.9|2.1|0.9|3.9|63.5|
25
+
26
+ ### TER
27
+
28
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
29
+ |---|---|---|---|---|---|---|---|---|
30
+ |decode_asr_transducer_asr_model_valid.loss.ave/dev|466|28296|94.7|3.1|2.3|0.8|6.2|71.7|
31
+ |decode_asr_transducer_asr_model_valid.loss.ave/test|1155|52113|95.1|2.6|2.2|0.9|5.8|63.5|
32
+
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/config.yaml ADDED
@@ -0,0 +1,707 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_asr_transducer_e_branchformer_e12.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp
7
+ ngpu: 1
8
+ seed: 2022
9
+ num_workers: 6
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: 2
14
+ dist_rank: 0
15
+ local_rank: 0
16
+ dist_master_addr: localhost
17
+ dist_master_port: 45753
18
+ dist_launcher: null
19
+ multiprocessing_distributed: true
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ keep_nbest_models: 10
41
+ nbest_averaging_interval: 0
42
+ grad_clip: 5.0
43
+ grad_clip_type: 2.0
44
+ grad_noise: false
45
+ accum_grad: 5
46
+ no_forward_run: false
47
+ resume: true
48
+ train_dtype: float32
49
+ use_amp: false
50
+ log_interval: null
51
+ use_matplotlib: true
52
+ use_tensorboard: true
53
+ create_graph_in_tensorboard: false
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 20
67
+ valid_batch_size: null
68
+ batch_bins: 10000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/asr_stats_raw_en_bpe500_sp/train/speech_shape
72
+ - exp/asr_stats_raw_en_bpe500_sp/train/text_shape.bpe
73
+ valid_shape_file:
74
+ - exp/asr_stats_raw_en_bpe500_sp/valid/speech_shape
75
+ - exp/asr_stats_raw_en_bpe500_sp/valid/text_shape.bpe
76
+ batch_type: numel
77
+ valid_batch_type: null
78
+ fold_length:
79
+ - 80000
80
+ - 150
81
+ sort_in_batch: descending
82
+ sort_batch: descending
83
+ multiple_iterator: false
84
+ chunk_length: 500
85
+ chunk_shift_ratio: 0.5
86
+ num_cache_chunks: 1024
87
+ train_data_path_and_name_and_type:
88
+ - - dump/raw/train_sp/wav.scp
89
+ - speech
90
+ - kaldi_ark
91
+ - - dump/raw/train_sp/text
92
+ - text
93
+ - text
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/raw/dev/wav.scp
96
+ - speech
97
+ - kaldi_ark
98
+ - - dump/raw/dev/text
99
+ - text
100
+ - text
101
+ allow_variable_data_keys: false
102
+ max_cache_size: 0.0
103
+ max_cache_fd: 32
104
+ valid_max_cache_size: null
105
+ exclude_weight_decay: false
106
+ exclude_weight_decay_conf: {}
107
+ optim: adam
108
+ optim_conf:
109
+ lr: 0.002
110
+ weight_decay: 1.0e-06
111
+ scheduler: warmuplr
112
+ scheduler_conf:
113
+ warmup_steps: 15000
114
+ token_list:
115
+ - <blank>
116
+ - <unk>
117
+ - s
118
+ - ▁the
119
+ - t
120
+ - ▁a
121
+ - ▁and
122
+ - ▁to
123
+ - d
124
+ - e
125
+ - ▁of
126
+ - ''''
127
+ - n
128
+ - ing
129
+ - ▁in
130
+ - ▁i
131
+ - ▁that
132
+ - i
133
+ - a
134
+ - l
135
+ - p
136
+ - m
137
+ - y
138
+ - o
139
+ - ▁it
140
+ - ▁we
141
+ - c
142
+ - u
143
+ - ▁you
144
+ - ed
145
+ - ▁
146
+ - r
147
+ - ▁is
148
+ - re
149
+ - ▁this
150
+ - ar
151
+ - g
152
+ - ▁so
153
+ - al
154
+ - b
155
+ - ▁s
156
+ - or
157
+ - ▁f
158
+ - ▁c
159
+ - in
160
+ - k
161
+ - f
162
+ - ▁for
163
+ - ic
164
+ - er
165
+ - le
166
+ - ▁be
167
+ - ▁do
168
+ - ▁re
169
+ - ve
170
+ - ▁e
171
+ - ▁w
172
+ - ▁was
173
+ - es
174
+ - ▁they
175
+ - ly
176
+ - h
177
+ - ▁on
178
+ - v
179
+ - ▁are
180
+ - ri
181
+ - ▁have
182
+ - an
183
+ - ▁what
184
+ - ▁with
185
+ - ▁t
186
+ - w
187
+ - ur
188
+ - it
189
+ - ent
190
+ - ▁can
191
+ - ▁he
192
+ - ▁but
193
+ - ra
194
+ - ce
195
+ - ▁me
196
+ - ▁b
197
+ - ▁ma
198
+ - ▁p
199
+ - ll
200
+ - ▁st
201
+ - ▁one
202
+ - 'on'
203
+ - ▁about
204
+ - th
205
+ - ▁de
206
+ - en
207
+ - ▁all
208
+ - ▁not
209
+ - il
210
+ - ▁g
211
+ - ch
212
+ - at
213
+ - ▁there
214
+ - ▁mo
215
+ - ter
216
+ - ation
217
+ - tion
218
+ - ▁at
219
+ - ▁my
220
+ - ro
221
+ - ▁as
222
+ - te
223
+ - ▁le
224
+ - ▁con
225
+ - ▁like
226
+ - ▁people
227
+ - ▁or
228
+ - ▁an
229
+ - el
230
+ - ▁if
231
+ - ▁from
232
+ - ver
233
+ - ▁su
234
+ - ▁co
235
+ - ate
236
+ - ▁these
237
+ - ol
238
+ - ci
239
+ - ▁now
240
+ - ▁see
241
+ - ▁out
242
+ - ▁our
243
+ - ion
244
+ - ▁know
245
+ - ect
246
+ - ▁just
247
+ - as
248
+ - ▁ex
249
+ - ▁ch
250
+ - ▁d
251
+ - ▁when
252
+ - ▁very
253
+ - ▁think
254
+ - ▁who
255
+ - ▁because
256
+ - ▁go
257
+ - ▁up
258
+ - ▁us
259
+ - ▁pa
260
+ - ▁no
261
+ - ies
262
+ - ▁di
263
+ - ▁ho
264
+ - om
265
+ - ive
266
+ - ▁get
267
+ - id
268
+ - ▁o
269
+ - ▁hi
270
+ - un
271
+ - ▁how
272
+ - ▁by
273
+ - ir
274
+ - et
275
+ - ck
276
+ - ity
277
+ - ▁po
278
+ - ul
279
+ - ▁which
280
+ - ▁mi
281
+ - ▁some
282
+ - z
283
+ - ▁sp
284
+ - ▁un
285
+ - ▁going
286
+ - ▁pro
287
+ - ist
288
+ - ▁se
289
+ - ▁look
290
+ - ▁time
291
+ - ment
292
+ - de
293
+ - ▁more
294
+ - ▁had
295
+ - ng
296
+ - ▁would
297
+ - ge
298
+ - la
299
+ - ▁here
300
+ - ▁really
301
+ - x
302
+ - ▁your
303
+ - ▁them
304
+ - us
305
+ - me
306
+ - ▁en
307
+ - ▁two
308
+ - ▁k
309
+ - ▁li
310
+ - ▁world
311
+ - ne
312
+ - ow
313
+ - ▁way
314
+ - ▁want
315
+ - ▁work
316
+ - ▁don
317
+ - ▁lo
318
+ - ▁fa
319
+ - ▁were
320
+ - ▁their
321
+ - age
322
+ - vi
323
+ - ▁ha
324
+ - ac
325
+ - der
326
+ - est
327
+ - ▁bo
328
+ - am
329
+ - ▁other
330
+ - able
331
+ - ▁actually
332
+ - ▁sh
333
+ - ▁make
334
+ - ▁ba
335
+ - ▁la
336
+ - ine
337
+ - ▁into
338
+ - ▁where
339
+ - ▁could
340
+ - ▁comp
341
+ - ting
342
+ - ▁has
343
+ - ▁will
344
+ - ▁ne
345
+ - j
346
+ - ical
347
+ - ally
348
+ - ▁vi
349
+ - ▁things
350
+ - ▁te
351
+ - igh
352
+ - ▁say
353
+ - ▁years
354
+ - ers
355
+ - ▁ra
356
+ - ther
357
+ - ▁than
358
+ - ru
359
+ - ▁ro
360
+ - op
361
+ - ▁did
362
+ - ▁any
363
+ - ▁new
364
+ - ound
365
+ - ig
366
+ - ▁well
367
+ - mo
368
+ - ▁she
369
+ - ▁na
370
+ - ▁been
371
+ - he
372
+ - ▁thousand
373
+ - ▁car
374
+ - ▁take
375
+ - ▁right
376
+ - ▁then
377
+ - ▁need
378
+ - ▁start
379
+ - ▁hundred
380
+ - ▁something
381
+ - ▁over
382
+ - ▁com
383
+ - ia
384
+ - ▁kind
385
+ - um
386
+ - if
387
+ - ▁those
388
+ - ▁first
389
+ - ▁pre
390
+ - ta
391
+ - ▁said
392
+ - ize
393
+ - end
394
+ - ▁even
395
+ - ▁thing
396
+ - one
397
+ - ▁back
398
+ - ite
399
+ - ▁every
400
+ - ▁little
401
+ - ry
402
+ - ▁life
403
+ - ▁much
404
+ - ke
405
+ - ▁also
406
+ - ▁most
407
+ - ant
408
+ - per
409
+ - ▁three
410
+ - ▁come
411
+ - ▁lot
412
+ - ance
413
+ - ▁got
414
+ - ▁talk
415
+ - ▁per
416
+ - ▁inter
417
+ - ▁sa
418
+ - ▁use
419
+ - ▁mu
420
+ - ▁part
421
+ - ish
422
+ - ence
423
+ - ▁happen
424
+ - ▁bi
425
+ - ▁mean
426
+ - ough
427
+ - ▁qu
428
+ - ▁bu
429
+ - ▁day
430
+ - ▁ga
431
+ - ▁only
432
+ - ▁many
433
+ - ▁different
434
+ - ▁dr
435
+ - ▁th
436
+ - ▁show
437
+ - ful
438
+ - ▁down
439
+ - ated
440
+ - ▁good
441
+ - ▁tra
442
+ - ▁around
443
+ - ▁idea
444
+ - ▁human
445
+ - ous
446
+ - ▁put
447
+ - ▁through
448
+ - ▁five
449
+ - ▁why
450
+ - ▁change
451
+ - ▁real
452
+ - ff
453
+ - ible
454
+ - ▁fact
455
+ - ▁same
456
+ - ▁jo
457
+ - ▁live
458
+ - ▁year
459
+ - ▁problem
460
+ - ▁ph
461
+ - ▁four
462
+ - ▁give
463
+ - ▁big
464
+ - ▁tell
465
+ - ▁great
466
+ - ▁try
467
+ - ▁va
468
+ - ▁ru
469
+ - ▁system
470
+ - ▁six
471
+ - ▁plan
472
+ - ▁place
473
+ - ▁build
474
+ - ▁called
475
+ - ▁again
476
+ - ▁point
477
+ - ▁twenty
478
+ - ▁percent
479
+ - ▁nine
480
+ - ▁find
481
+ - ▁app
482
+ - ▁after
483
+ - ▁long
484
+ - ▁eight
485
+ - ▁imp
486
+ - ▁gene
487
+ - ▁design
488
+ - ▁today
489
+ - ▁should
490
+ - ▁made
491
+ - ious
492
+ - ▁came
493
+ - ▁learn
494
+ - ▁last
495
+ - ▁own
496
+ - way
497
+ - ▁turn
498
+ - ▁seven
499
+ - ▁high
500
+ - ▁question
501
+ - ▁person
502
+ - ▁brain
503
+ - ▁important
504
+ - ▁another
505
+ - ▁thought
506
+ - ▁trans
507
+ - ▁create
508
+ - ness
509
+ - ▁hu
510
+ - ▁power
511
+ - ▁act
512
+ - land
513
+ - ▁play
514
+ - ▁sort
515
+ - ▁old
516
+ - ▁before
517
+ - ▁course
518
+ - ▁understand
519
+ - ▁feel
520
+ - ▁might
521
+ - ▁each
522
+ - ▁million
523
+ - ▁better
524
+ - ▁together
525
+ - ▁ago
526
+ - ▁example
527
+ - ▁help
528
+ - ▁story
529
+ - ▁next
530
+ - ▁hand
531
+ - ▁school
532
+ - ▁water
533
+ - ▁develop
534
+ - ▁technology
535
+ - que
536
+ - ▁second
537
+ - ▁grow
538
+ - ▁still
539
+ - ▁cell
540
+ - ▁believe
541
+ - ▁number
542
+ - ▁small
543
+ - ▁between
544
+ - qui
545
+ - ▁data
546
+ - ▁become
547
+ - ▁america
548
+ - ▁maybe
549
+ - ▁space
550
+ - ▁project
551
+ - ▁organ
552
+ - ▁vo
553
+ - ▁children
554
+ - ▁book
555
+ - graph
556
+ - ▁open
557
+ - ▁fifty
558
+ - ▁picture
559
+ - ▁health
560
+ - ▁thirty
561
+ - ▁africa
562
+ - ▁reason
563
+ - ▁large
564
+ - ▁hard
565
+ - ▁computer
566
+ - ▁always
567
+ - ▁sense
568
+ - ▁money
569
+ - ▁women
570
+ - ▁everything
571
+ - ▁information
572
+ - ▁country
573
+ - ▁teach
574
+ - ▁energy
575
+ - ▁experience
576
+ - ▁food
577
+ - ▁process
578
+ - qua
579
+ - ▁interesting
580
+ - ▁future
581
+ - ▁science
582
+ - q
583
+ - '0'
584
+ - '5'
585
+ - '6'
586
+ - '9'
587
+ - '3'
588
+ - '8'
589
+ - '4'
590
+ - N
591
+ - A
592
+ - '7'
593
+ - S
594
+ - G
595
+ - F
596
+ - R
597
+ - L
598
+ - U
599
+ - E
600
+ - T
601
+ - H
602
+ - _
603
+ - B
604
+ - D
605
+ - J
606
+ - M
607
+ - ă
608
+ - ō
609
+ - ť
610
+ - '2'
611
+ - '-'
612
+ - '1'
613
+ - C
614
+ - <sos/eos>
615
+ init: null
616
+ input_size: null
617
+ ctc_conf:
618
+ dropout_rate: 0.0
619
+ ctc_type: builtin
620
+ reduce: true
621
+ ignore_nan_grad: null
622
+ zero_infinity: true
623
+ joint_net_conf:
624
+ joint_space_size: 320
625
+ use_preprocessor: true
626
+ token_type: bpe
627
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
628
+ non_linguistic_symbols: null
629
+ cleaner: null
630
+ g2p: null
631
+ speech_volume_normalize: null
632
+ rir_scp: null
633
+ rir_apply_prob: 1.0
634
+ noise_scp: null
635
+ noise_apply_prob: 1.0
636
+ noise_db_range: '13_15'
637
+ short_noise_thres: 0.5
638
+ aux_ctc_tasks: []
639
+ frontend: default
640
+ frontend_conf:
641
+ n_fft: 512
642
+ win_length: 400
643
+ hop_length: 160
644
+ fs: 16k
645
+ specaug: specaug
646
+ specaug_conf:
647
+ apply_time_warp: true
648
+ time_warp_window: 5
649
+ time_warp_mode: bicubic
650
+ apply_freq_mask: true
651
+ freq_mask_width_range:
652
+ - 0
653
+ - 27
654
+ num_freq_mask: 2
655
+ apply_time_mask: true
656
+ time_mask_width_ratio_range:
657
+ - 0.0
658
+ - 0.05
659
+ num_time_mask: 5
660
+ normalize: global_mvn
661
+ normalize_conf:
662
+ stats_file: exp/asr_stats_raw_en_bpe500_sp/train/feats_stats.npz
663
+ model: espnet
664
+ model_conf:
665
+ ctc_weight: 0.3
666
+ report_cer: false
667
+ report_wer: false
668
+ preencoder: null
669
+ preencoder_conf: {}
670
+ encoder: e_branchformer
671
+ encoder_conf:
672
+ output_size: 256
673
+ attention_heads: 4
674
+ attention_layer_type: rel_selfattn
675
+ pos_enc_layer_type: rel_pos
676
+ rel_pos_type: latest
677
+ cgmlp_linear_units: 1024
678
+ cgmlp_conv_kernel: 31
679
+ use_linear_after_conv: false
680
+ gate_activation: identity
681
+ num_blocks: 12
682
+ dropout_rate: 0.1
683
+ positional_dropout_rate: 0.1
684
+ attention_dropout_rate: 0.1
685
+ input_layer: conv2d
686
+ layer_drop_rate: 0.0
687
+ linear_units: 1024
688
+ positionwise_layer_type: linear
689
+ use_ffn: true
690
+ macaron_ffn: true
691
+ merge_conv_kernel: 31
692
+ postencoder: null
693
+ postencoder_conf: {}
694
+ decoder: transducer
695
+ decoder_conf:
696
+ rnn_type: lstm
697
+ num_layers: 1
698
+ hidden_size: 256
699
+ dropout: 0.1
700
+ dropout_embed: 0.2
701
+ preprocessor: default
702
+ preprocessor_conf: {}
703
+ required:
704
+ - output_dir
705
+ - token_list
706
+ version: '202301'
707
+ distributed: true
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/backward_time.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/cer_ctc.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/cer_transducer.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/forward_time.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/iter_time.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/loss.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/loss_ctc.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/loss_transducer.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/optim0_lr0.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/optim_step_time.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/train_time.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/images/wer_transducer.png ADDED
exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/valid.loss.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:090d3bd96afc73fd9e17602521fc1417743ea730d88624f29c9c605464f0ff3d
3
+ size 105304833
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202301'
2
+ files:
3
+ asr_model_file: exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/valid.loss.ave_10best.pth
4
+ python: "3.9.15 (main, Nov 24 2022, 14:31:59) \n[GCC 11.2.0]"
5
+ timestamp: 1675927850.063237
6
+ torch: 1.13.1
7
+ yaml_files:
8
+ asr_train_config: exp/asr_train_asr_transducer_e_branchformer_e12_raw_en_bpe500_sp/config.yaml