Sevain commited on
Commit
5281c5c
·
verified ·
1 Parent(s): f225b83

Upload hyperparams.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +97 -0
hyperparams.yaml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ################################
2
+ # Model: wav2vec2 + DNN + CTC
3
+ # Augmentation: SpecAugment
4
+ # Authors: Titouan Parcollet 2021
5
+ # ################################
6
+
7
+ sample_rate: 16000
8
+ wav2vec2_hub: LeBenchmark/wav2vec2-FR-7K-large
9
+
10
+ #BPE parameters
11
+ token_type: char # ["unigram", "bpe", "char"]
12
+ character_coverage: 1.0
13
+
14
+ tokenizer: !new:sentencepiece.SentencePieceProcessor
15
+
16
+ # Model parameters
17
+ activation: !name:torch.nn.LeakyReLU
18
+ dnn_layers: 2
19
+ dnn_neurons: 1024
20
+ emb_size: 128
21
+ dec_neurons: 1024
22
+
23
+ # Outputs
24
+ output_neurons: 63 # BPE size, index(blank/eos/bos) = 0
25
+
26
+ # Decoding parameters
27
+ # Be sure that the bos and eos index match with the BPEs ones
28
+ blank_index: 0
29
+ bos_index: 1
30
+ eos_index: 2
31
+ min_decode_ratio: 0.0
32
+ max_decode_ratio: 1.0
33
+ beam_size: 80
34
+ eos_threshold: 1.5
35
+ using_max_attn_shift: True
36
+ max_attn_shift: 140
37
+ ctc_weight_decode: 0.0
38
+ temperature: 1.50
39
+
40
+ enc: !new:speechbrain.nnet.containers.Sequential
41
+ input_shape: [null, null, 1024]
42
+ linear1: !name:speechbrain.nnet.linear.Linear
43
+ n_neurons: 1024
44
+ bias: True
45
+ bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
46
+ activation: !new:torch.nn.LeakyReLU
47
+ drop: !new:torch.nn.Dropout
48
+ p: 0.15
49
+ linear2: !name:speechbrain.nnet.linear.Linear
50
+ n_neurons: 1024
51
+ bias: True
52
+ bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
53
+ activation2: !new:torch.nn.LeakyReLU
54
+ drop2: !new:torch.nn.Dropout
55
+ p: 0.15
56
+ linear3: !name:speechbrain.nnet.linear.Linear
57
+ n_neurons: 1024
58
+ bias: True
59
+ bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
60
+ activation3: !new:torch.nn.LeakyReLU
61
+
62
+ wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
63
+ source: !ref <wav2vec2_hub>
64
+ output_norm: True
65
+ freeze: True
66
+ save_path: model_checkpoints
67
+
68
+ ctc_lin: !new:speechbrain.nnet.linear.Linear
69
+ input_size: !ref <dnn_neurons>
70
+ n_neurons: !ref <output_neurons>
71
+
72
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
73
+ apply_log: True
74
+
75
+ ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
76
+ blank_index: !ref <blank_index>
77
+
78
+ asr_model: !new:torch.nn.ModuleList
79
+ - [!ref <enc>, !ref <ctc_lin>]
80
+
81
+ encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
82
+ wav2vec2: !ref <wav2vec2>
83
+ enc: !ref <enc>
84
+ ctc_lin: !ref <ctc_lin>
85
+ log_softmax: !ref <log_softmax>
86
+
87
+ decoding_function: !name:speechbrain.decoders.ctc_greedy_decode
88
+ blank_id: !ref <blank_index>
89
+
90
+ modules:
91
+ encoder: !ref <encoder>
92
+
93
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
94
+ loadables:
95
+ wav2vec2: !ref <wav2vec2>
96
+ asr: !ref <asr_model>
97
+ tokenizer: !ref <tokenizer>