File size: 5,875 Bytes
778eef9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
2024-09-30 19:30:05,481 INFO [ctc_decode.py:844] Decoding started
2024-09-30 19:30:05,482 INFO [ctc_decode.py:852] Device: cuda:0
2024-09-30 19:30:05,482 INFO [ctc_decode.py:853] {
"attention_decoder_attention_dim": 512,
"attention_decoder_dim": 512,
"attention_decoder_feedforward_dim": 2048,
"attention_decoder_num_heads": 8,
"attention_decoder_num_layers": 6,
"avg": 25,
"batch_idx_train": 0,
"beam": 4,
"best_train_epoch": -1,
"best_train_loss": Infinity,
"best_valid_epoch": -1,
"best_valid_loss": Infinity,
"bpe_model": "data/lang_bpe_500/bpe.model",
"bucketing_sampler": true,
"causal": false,
"chunk_size": "16,32,64,-1",
"cnn_module_kernel": "31,31,15,15,15,31",
"concatenate_cuts": false,
"context_size": 2,
"decoder_dim": 512,
"decoding_method": "prefix-beam-search",
"device": "cuda:0",
"downsampling_factor": "1,2,4,8,4,2",
"drop_last": true,
"duration_factor": 1.0,
"enable_musan": true,
"enable_spec_aug": true,
"encoder_dim": "192,256,256,256,256,256",
"encoder_unmasked_dim": "192,192,192,192,192,192",
"env_info": {
"IP address": "10.30.14.169",
"hostname": "de-74279-k2-train-2-0904151501-7d58788f57-7cktm",
"icefall-git-branch": "cr-ctc",
"icefall-git-date": "Sun Sep 29 12:00:45 2024",
"icefall-git-sha1": "33fa9e8b-dirty",
"icefall-path": "/star-kw/kangwei/code/icefall_cotrain",
"k2-build-type": "Release",
"k2-git-date": "Fri Feb 23 01:48:38 2024",
"k2-git-sha1": "ff1d435a8d3c4eaa15828a84a7240678a70539a7",
"k2-path": "/star-kw/kangwei/envs/c5/lib/python3.8/site-packages/k2/__init__.py",
"k2-version": "1.24.4",
"k2-with-cuda": true,
"lhotse-path": "/star-kw/kangwei/envs/c5/lib/python3.8/site-packages/lhotse/__init__.py",
"lhotse-version": "1.25.0.dev+git.da4d70d.clean",
"python-version": "3.8",
"torch-cuda-available": true,
"torch-cuda-version": "11.8",
"torch-version": "2.1.0+cu118"
},
"epoch": 50,
"exp_dir": "zipformer/exp_cr_small",
"feature_dim": 80,
"feedforward_dim": "512,768,768,768,768,768",
"frame_shift_ms": 10,
"full_libri": true,
"gap": 1.0,
"hlg_scale": 0.6,
"ignore_id": -1,
"input_strategy": "PrecomputedFeatures",
"iter": 0,
"joiner_dim": 512,
"label_smoothing": 0.1,
"lang_dir": "data/lang_bpe_500",
"left_context_frames": "64,128,256,-1",
"lm_avg": 1,
"lm_dir": "data/lm",
"lm_epoch": 7,
"lm_exp_dir": null,
"lm_scale": 0.3,
"lm_type": "rnn",
"lm_vocab_size": 500,
"log_interval": 50,
"manifest_dir": "data/fbank",
"max_active_states": 10000,
"max_duration": 3000,
"min_active_states": 30,
"mini_libri": false,
"nbest_scale": 1.0,
"num_buckets": 30,
"num_encoder_layers": "2,2,2,2,2,2",
"num_heads": "4,4,4,8,4,4",
"num_paths": 100,
"num_workers": 2,
"on_the_fly_feats": false,
"output_beam": 8,
"pos_dim": 48,
"pos_head_dim": "4",
"query_head_dim": "32",
"res_dir": "zipformer/exp_cr_small/prefix-beam-search",
"reset_interval": 200,
"return_cuts": true,
"rnn_lm_embedding_dim": 2048,
"rnn_lm_hidden_dim": 2048,
"rnn_lm_num_layers": 3,
"rnn_lm_tie_weights": true,
"search_beam": 20,
"shuffle": true,
"skip_scoring": false,
"spec_aug_time_warp_factor": 80,
"subsampling_factor": 4,
"suffix": "epoch-50_avg-25_beam-4_use-averaged-model",
"transformer_lm_dim_feedforward": 2048,
"transformer_lm_embedding_dim": 768,
"transformer_lm_encoder_dim": 768,
"transformer_lm_exp_dir": null,
"transformer_lm_nhead": 8,
"transformer_lm_num_layers": 16,
"transformer_lm_tie_weights": true,
"use_attention_decoder": false,
"use_averaged_model": true,
"use_cr_ctc": false,
"use_ctc": true,
"use_double_scores": true,
"use_transducer": false,
"valid_interval": 3000,
"value_head_dim": "12",
"warm_step": 2000
}
2024-09-30 19:30:05,767 INFO [lexicon.py:168] Loading pre-compiled data/lang_bpe_500/Linv.pt
2024-09-30 19:30:05,823 INFO [ctc_decode.py:962] About to create model
2024-09-30 19:30:06,134 INFO [ctc_decode.py:1029] Calculating the averaged model over epoch range from 25 (excluded) to 50
2024-09-30 19:30:09,909 INFO [ctc_decode.py:1046] Number of model parameters: 22118279
2024-09-30 19:30:09,910 INFO [asr_datamodule.py:467] About to get test-clean cuts
2024-09-30 19:30:09,925 INFO [asr_datamodule.py:474] About to get test-other cuts
2024-09-30 19:30:15,829 INFO [ctc_decode.py:720] batch 0/?, cuts processed until now is 132
2024-09-30 19:31:00,334 INFO [ctc_decode.py:739] The transcripts are stored in zipformer/exp_cr_small/prefix-beam-search/recogs-test-clean-epoch-50_avg-25_beam-4_use-averaged-model.txt
2024-09-30 19:31:00,436 INFO [utils.py:668] [test-clean_prefix-beam-search] %WER 2.52% [1323 / 52576, 130 ins, 93 del, 1100 sub ]
2024-09-30 19:31:00,652 INFO [ctc_decode.py:767] Wrote detailed error stats to zipformer/exp_cr_small/prefix-beam-search/errs-test-clean-epoch-50_avg-25_beam-4_use-averaged-model.txt
2024-09-30 19:31:00,657 INFO [ctc_decode.py:783]
For test-clean, WER of different settings are:
prefix-beam-search 2.52 best for test-clean
2024-09-30 19:31:05,286 INFO [ctc_decode.py:720] batch 0/?, cuts processed until now is 86
2024-09-30 19:31:48,549 INFO [ctc_decode.py:739] The transcripts are stored in zipformer/exp_cr_small/prefix-beam-search/recogs-test-other-epoch-50_avg-25_beam-4_use-averaged-model.txt
2024-09-30 19:31:48,688 INFO [utils.py:668] [test-other_prefix-beam-search] %WER 5.85% [3063 / 52343, 272 ins, 232 del, 2559 sub ]
2024-09-30 19:31:48,910 INFO [ctc_decode.py:767] Wrote detailed error stats to zipformer/exp_cr_small/prefix-beam-search/errs-test-other-epoch-50_avg-25_beam-4_use-averaged-model.txt
2024-09-30 19:31:48,914 INFO [ctc_decode.py:783]
For test-other, WER of different settings are:
prefix-beam-search 5.85 best for test-other
2024-09-30 19:31:48,915 INFO [ctc_decode.py:1087] Done!
|