Update hyperparams.yaml
Browse files- hyperparams.yaml +13 -141
 
    	
        hyperparams.yaml
    CHANGED
    
    | 
         @@ -1,91 +1,10 @@ 
     | 
|
| 1 | 
         
            -
            # Generated 2022-11-21 from:
         
     | 
| 2 | 
         
            -
            # /home/cem/Dropbox/speechbrain-1/recipes/ESC50/classification/hparams/cnn14.yaml
         
     | 
| 3 | 
         
            -
            # yamllint disable
         
     | 
| 4 | 
         
            -
            # #################################
         
     | 
| 5 | 
         
            -
            # Basic training parameters for sound classification using the ESC50 dataset.
         
     | 
| 6 | 
         
            -
            # This recipe uses the ecapa-tdnn backbone for classification.
         
     | 
| 7 | 
         
            -
            #
         
     | 
| 8 | 
         
            -
            # Author:
         
     | 
| 9 | 
         
            -
            #  * Cem Subakan
         
     | 
| 10 | 
         
            -
            #  (based on the SpeechBrain UrbanSound8k recipe)
         
     | 
| 11 | 
         
            -
            # #################################
         
     | 
| 12 | 
         | 
| 13 | 
         
            -
            # Seed needs to be set at top of yaml, before objects with parameters are made
         
     | 
| 14 | 
         
            -
            seed: 11
         
     | 
| 15 | 
         
            -
            __set_seed: !!python/object/apply:torch.manual_seed [11]
         
     | 
| 16 | 
         
            -
             
     | 
| 17 | 
         
            -
            # Set up folders for reading from and writing to
         
     | 
| 18 | 
         
            -
            # Dataset must already exist at `audio_data_folder`
         
     | 
| 19 | 
         
            -
            data_folder: /data2/ESC-50-master
         
     | 
| 20 | 
         
            -
                                      # e.g., /localscratch/UrbanSound8K
         
     | 
| 21 | 
         
            -
            open_rir_folder: <data_folder>/RIRS # Change if needed
         
     | 
| 22 | 
         
            -
            audio_data_folder: /data2/ESC-50-master/audio
         
     | 
| 23 | 
         
            -
             
     | 
| 24 | 
         
            -
            # TODO the follwing folder will contain the resampled audio
         
     | 
| 25 | 
         
            -
            # files (mono channel and config SR) to train on
         
     | 
| 26 | 
         
            -
            #reasmpled_audio_data_folder: !ref <data_folder>/audio_mono16kHz
         
     | 
| 27 | 
         
            -
            #
         
     | 
| 28 | 
         
            -
            experiment_name: cnn14
         
     | 
| 29 | 
         
            -
            output_folder: ./results/cnn14/11
         
     | 
| 30 | 
         
            -
            save_folder: ./results/cnn14/11/save
         
     | 
| 31 | 
         
            -
            train_log: ./results/cnn14/11/train_log.txt
         
     | 
| 32 | 
         
            -
             
     | 
| 33 | 
         
            -
            test_only: false
         
     | 
| 34 | 
         
            -
             
     | 
| 35 | 
         
            -
            # Tensorboard logs
         
     | 
| 36 | 
         
            -
            use_tensorboard: false
         
     | 
| 37 | 
         
            -
            tensorboard_logs_folder: ./results/cnn14/11/tb_logs/
         
     | 
| 38 | 
         
            -
             
     | 
| 39 | 
         
            -
            # Path where data manifest files will be stored
         
     | 
| 40 | 
         
            -
            train_annotation: /data2/ESC-50-master/manifest/train.json
         
     | 
| 41 | 
         
            -
            valid_annotation: /data2/ESC-50-master/manifest/valid.json
         
     | 
| 42 | 
         
            -
            test_annotation: /data2/ESC-50-master/manifest/test.json
         
     | 
| 43 | 
         
            -
             
     | 
| 44 | 
         
            -
            # To standardize results, UrbanSound8k has pre-separated samples into
         
     | 
| 45 | 
         
            -
            # 10 folds for multi-fold validation
         
     | 
| 46 | 
         
            -
            train_fold_nums: [1, 2, 3]
         
     | 
| 47 | 
         
            -
            valid_fold_nums: [4]
         
     | 
| 48 | 
         
            -
            test_fold_nums: [5]
         
     | 
| 49 | 
         
            -
            skip_manifest_creation: false
         
     | 
| 50 | 
         
            -
             
     | 
| 51 | 
         
            -
            ckpt_interval_minutes: 15 # save checkpoint every N min
         
     | 
| 52 | 
         
            -
             
     | 
| 53 | 
         
            -
            # Training parameters
         
     | 
| 54 | 
         
            -
            number_of_epochs: 200
         
     | 
| 55 | 
         
            -
            batch_size: 32
         
     | 
| 56 | 
         
            -
            lr: 0.0002
         
     | 
| 57 | 
         
            -
            base_lr: 0.00000001
         
     | 
| 58 | 
         
            -
            max_lr: 0.0002
         
     | 
| 59 | 
         
            -
            step_size: 65000
         
     | 
| 60 | 
         
             
            sample_rate: 44100
         
     | 
| 61 | 
         | 
| 62 | 
         
             
            device: cpu
         
     | 
| 63 | 
         | 
| 64 | 
         
            -
            # Feature parameters
         
     | 
| 65 | 
         
            -
            n_mels: 80
         
     | 
| 66 | 
         
            -
            left_frames: 0
         
     | 
| 67 | 
         
            -
            right_frames: 0
         
     | 
| 68 | 
         
            -
            deltas: false
         
     | 
| 69 | 
         
            -
            amp_to_db: true
         
     | 
| 70 | 
         
            -
            normalize: true
         
     | 
| 71 | 
         
            -
            use_melspectra: true
         
     | 
| 72 | 
         
            -
             
     | 
| 73 | 
         
            -
            # Number of classes
         
     | 
| 74 | 
         
            -
            out_n_neurons: 50
         
     | 
| 75 | 
         
            -
             
     | 
| 76 | 
         
            -
            # Note that it's actually important to shuffle the data here
         
     | 
| 77 | 
         
            -
            # (or at the very least, not sort the data by duration)
         
     | 
| 78 | 
         
            -
            # Also note that this does not violate the UrbanSound8k "no-shuffle" policy
         
     | 
| 79 | 
         
            -
            # because this does not mix samples from folds in train to valid/test, only
         
     | 
| 80 | 
         
            -
            # within train or valid, or test
         
     | 
| 81 | 
         
            -
            shuffle: true
         
     | 
| 82 | 
         
            -
            dataloader_options:
         
     | 
| 83 | 
         
            -
              batch_size: 32
         
     | 
| 84 | 
         
            -
              shuffle: true
         
     | 
| 85 | 
         
            -
              num_workers: 0
         
     | 
| 86 | 
         
            -
             
     | 
| 87 | 
         
             
            # Functions
         
     | 
| 88 | 
         
            -
            compute_features:  
     | 
| 89 | 
         
             
              n_mels: 80
         
     | 
| 90 | 
         
             
              left_frames: 0
         
     | 
| 91 | 
         
             
              right_frames: 0
         
     | 
| 
         @@ -96,33 +15,16 @@ compute_features: &id003 !new:speechbrain.lobes.features.Fbank 
     | 
|
| 96 | 
         
             
              hop_length: 10
         
     | 
| 97 | 
         | 
| 98 | 
         
             
            use_pretrain: false
         
     | 
| 99 | 
         
            -
            embedding_model:  
     | 
| 100 | 
         
             
              mel_bins: 80
         
     | 
| 101 | 
         
             
              emb_dim: 2048
         
     | 
| 102 | 
         | 
| 103 | 
         
            -
            classifier:  
     | 
| 104 | 
         
             
              input_size: 2048
         
     | 
| 105 | 
         
             
              out_neurons: 50
         
     | 
| 106 | 
         
             
              lin_blocks: 1
         
     | 
| 107 | 
         | 
| 108 | 
         
            -
             
     | 
| 109 | 
         
            -
             
     | 
| 110 | 
         
            -
             
     | 
| 111 | 
         
            -
            # If you do not want to use the pretrained separator you can simply delete pretrained_separator field.
         
     | 
| 112 | 
         
            -
              limit: 200
         
     | 
| 113 | 
         
            -
             
     | 
| 114 | 
         
            -
             
     | 
| 115 | 
         
            -
            # Definition of the augmentation pipeline.
         
     | 
| 116 | 
         
            -
            # If concat_augment = False, the augmentation techniques are applied
         
     | 
| 117 | 
         
            -
            # in sequence. If concat_augment = True, all the augmented signals
         
     | 
| 118 | 
         
            -
            # # are concatenated in a single big batch.
         
     | 
| 119 | 
         
            -
             
     | 
| 120 | 
         
            -
            augment_pipeline: []
         
     | 
| 121 | 
         
            -
            concat_augment: true
         
     | 
| 122 | 
         
            -
             
     | 
| 123 | 
         
            -
             
     | 
| 124 | 
         
            -
            mean_var_norm: &id011 !new:speechbrain.processing.features.InputNormalization
         
     | 
| 125 | 
         
            -
             
     | 
| 126 | 
         
             
              norm_type: sentence
         
     | 
| 127 | 
         
             
              std_norm: false
         
     | 
| 128 | 
         | 
| 
         @@ -131,55 +33,25 @@ n_fft: 1024 
     | 
|
| 131 | 
         
             
            spec_mag_power: 0.5
         
     | 
| 132 | 
         
             
            hop_length: 11.6099
         
     | 
| 133 | 
         
             
            win_length: 23.2199
         
     | 
| 134 | 
         
            -
             
     | 
| 
         | 
|
| 135 | 
         
             
              n_fft: 1024
         
     | 
| 136 | 
         
             
              hop_length: 11.6099
         
     | 
| 137 | 
         
             
              win_length: 23.2199
         
     | 
| 138 | 
         
             
              sample_rate: 44100
         
     | 
| 139 | 
         | 
| 140 | 
         
            -
            compute_fbank:  
     | 
| 141 | 
         
             
              n_mels: 80
         
     | 
| 142 | 
         
             
              n_fft: 1024
         
     | 
| 143 | 
         
             
              sample_rate: 44100
         
     | 
| 144 | 
         | 
| 145 | 
         
             
            modules:
         
     | 
| 146 | 
         
            -
              compute_stft:  
     | 
| 147 | 
         
            -
              compute_fbank:  
     | 
| 148 | 
         
            -
              compute_features:  
     | 
| 149 | 
         
            -
              embedding_model:  
     | 
| 150 | 
         
            -
              classifier:  
     | 
| 151 | 
         
            -
              mean_var_norm:  
     | 
| 152 | 
         
            -
            compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper
         
     | 
| 153 | 
         
            -
              loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin
         
     | 
| 154 | 
         
            -
                margin: 0.2
         
     | 
| 155 | 
         
            -
                scale: 30
         
     | 
| 156 | 
         
            -
             
     | 
| 157 | 
         
            -
            # compute_error: !name:speechbrain.nnet.losses.classification_error
         
     | 
| 158 | 
         
            -
             
     | 
| 159 | 
         
            -
            opt_class: !name:torch.optim.Adam
         
     | 
| 160 | 
         
            -
              lr: 0.0002
         
     | 
| 161 | 
         
            -
              weight_decay: 0.000002
         
     | 
| 162 | 
         
            -
             
     | 
| 163 | 
         
            -
            lr_annealing: !new:speechbrain.nnet.schedulers.CyclicLRScheduler
         
     | 
| 164 | 
         
            -
              base_lr: 0.00000001
         
     | 
| 165 | 
         
            -
              max_lr: 0.0002
         
     | 
| 166 | 
         
            -
              step_size: 65000
         
     | 
| 167 | 
         
            -
             
     | 
| 168 | 
         
            -
            # Logging + checkpoints
         
     | 
| 169 | 
         
            -
            train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
         
     | 
| 170 | 
         
            -
              save_file: ./results/cnn14/11/train_log.txt
         
     | 
| 171 | 
         
            -
             
     | 
| 172 | 
         
            -
            error_stats: !name:speechbrain.utils.metric_stats.MetricStats
         
     | 
| 173 | 
         
            -
              metric: !name:speechbrain.nnet.losses.classification_error
         
     | 
| 174 | 
         
            -
                reduction: batch
         
     | 
| 175 | 
         
            -
             
     | 
| 176 | 
         
            -
            checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
         
     | 
| 177 | 
         
            -
              checkpoints_dir: ./results/cnn14/11/save
         
     | 
| 178 | 
         
            -
              recoverables:
         
     | 
| 179 | 
         
            -
                embedding_model: *id009
         
     | 
| 180 | 
         
            -
                classifier: *id010
         
     | 
| 181 | 
         
            -
                normalizer: *id011
         
     | 
| 182 | 
         
            -
                counter: *id012
         
     | 
| 183 | 
         | 
| 184 | 
         | 
| 185 | 
         
             
            label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 1 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 2 | 
         
             
            sample_rate: 44100
         
     | 
| 3 | 
         | 
| 4 | 
         
             
            device: cpu
         
     | 
| 5 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 6 | 
         
             
            # Functions
         
     | 
| 7 | 
         
            +
            compute_features: !new:speechbrain.lobes.features.Fbank
         
     | 
| 8 | 
         
             
              n_mels: 80
         
     | 
| 9 | 
         
             
              left_frames: 0
         
     | 
| 10 | 
         
             
              right_frames: 0
         
     | 
| 
         | 
|
| 15 | 
         
             
              hop_length: 10
         
     | 
| 16 | 
         | 
| 17 | 
         
             
            use_pretrain: false
         
     | 
| 18 | 
         
            +
            embedding_model: !new:speechbrain.lobes.models.Cnn14.Cnn14
         
     | 
| 19 | 
         
             
              mel_bins: 80
         
     | 
| 20 | 
         
             
              emb_dim: 2048
         
     | 
| 21 | 
         | 
| 22 | 
         
            +
            classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
         
     | 
| 23 | 
         
             
              input_size: 2048
         
     | 
| 24 | 
         
             
              out_neurons: 50
         
     | 
| 25 | 
         
             
              lin_blocks: 1
         
     | 
| 26 | 
         | 
| 27 | 
         
            +
            mean_var_norm: !new:speechbrain.processing.features.InputNormalization
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 28 | 
         
             
              norm_type: sentence
         
     | 
| 29 | 
         
             
              std_norm: false
         
     | 
| 30 | 
         | 
| 
         | 
|
| 33 | 
         
             
            spec_mag_power: 0.5
         
     | 
| 34 | 
         
             
            hop_length: 11.6099
         
     | 
| 35 | 
         
             
            win_length: 23.2199
         
     | 
| 36 | 
         
            +
             
     | 
| 37 | 
         
            +
            compute_stft: !new:speechbrain.processing.features.STFT
         
     | 
| 38 | 
         
             
              n_fft: 1024
         
     | 
| 39 | 
         
             
              hop_length: 11.6099
         
     | 
| 40 | 
         
             
              win_length: 23.2199
         
     | 
| 41 | 
         
             
              sample_rate: 44100
         
     | 
| 42 | 
         | 
| 43 | 
         
            +
            compute_fbank: !new:speechbrain.processing.features.Filterbank
         
     | 
| 44 | 
         
             
              n_mels: 80
         
     | 
| 45 | 
         
             
              n_fft: 1024
         
     | 
| 46 | 
         
             
              sample_rate: 44100
         
     | 
| 47 | 
         | 
| 48 | 
         
             
            modules:
         
     | 
| 49 | 
         
            +
              compute_stft: !ref <compute_stft>
         
     | 
| 50 | 
         
            +
              compute_fbank: !ref <compute_fbank>
         
     | 
| 51 | 
         
            +
              compute_features: !ref <compute_features>
         
     | 
| 52 | 
         
            +
              embedding_model: !ref <embedding_model>
         
     | 
| 53 | 
         
            +
              classifier: !ref <classifier>
         
     | 
| 54 | 
         
            +
              mean_var_norm: !ref <mean_var_norm>
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 55 | 
         | 
| 56 | 
         | 
| 57 | 
         
             
            label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
         
     |