|
#!/usr/bin/env bash |
|
|
|
export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python |
|
export PYTHONPATH=/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD:$PYTHONPATH |
|
export CUDA_VISIBLE_DEVICES="0,1,2,3" |
|
|
|
echo "Using device: ${CUDA_VISIBLE_DEVICES}" |
|
|
|
full_libri=1 |
|
|
|
base_lr=0.045 |
|
|
|
use_beats=1 |
|
use_ecapa=1 |
|
use_whisper=1 |
|
whisper_dim=1280 |
|
whisper_version=large-v3 |
|
|
|
beats_scale=1.0 |
|
ecapa_scale=10.0 |
|
speaker_input_idx=2 |
|
whisper_loss_scale=1.0 |
|
|
|
use_librispeech=1 |
|
repeat_librispeech=5 |
|
use_wenetspeech=0 |
|
repeat_wenetspeech=0 |
|
use_audioset=1 |
|
audioset_subset=unbalanced |
|
use_voxceleb=1 |
|
voxceleb_subset=vox2 |
|
|
|
stop_early=1 |
|
share_asr=1 |
|
md=1500 |
|
|
|
enable_spec_aug=0 |
|
enable_musan=0 |
|
|
|
causal=1 |
|
delta_t=6 |
|
|
|
exp_dir=exp_causal${causal}_delta${delta_t}KD_LS${use_librispeech}_${repeat_librispeech}fold+wenetspech${use_wenetspeech}_${repeat_wenetspeech}fold+as_unbalanced${use_audioset}+vox_${use_voxceleb}_${voxceleb_subset}_base_lr_${base_lr}_use_beats_${use_beats}_scale_${beats_scale}_use_ecapa_${use_ecapa}_layer_${speaker_input_idx}_scale_${ecapa_scale}_${use_whisper}_scale_${whisper_loss_scale}_specaug${enable_spec_aug}_musan${enable_musan}_with_task_ID_stop_early${stop_early}_share_asr${share_asr}_md${md}_amp_bf16 |
|
|
|
python multi_KD/train_multi_KD3.py \ |
|
--world-size 4 \ |
|
--num-epochs 35 \ |
|
--start-epoch 31 \ |
|
--causal $causal --delta-t $delta_t \ |
|
--use-fp16 0 --use-bf16 1 \ |
|
--inf-check 0 \ |
|
--base-lr $base_lr \ |
|
--exp-dir multi_KD/$exp_dir \ |
|
--manifest-dir data/fbank_LSVoxAs_with_whisper_${whisper_version}_with_taskID \ |
|
--full-libri $full_libri \ |
|
--use-librispeech $use_librispeech --repeat-librispeech $repeat_librispeech \ |
|
--use-wenetspeech $use_wenetspeech --repeat-wenetspeech $repeat_wenetspeech \ |
|
--use-audioset $use_audioset --audioset-subset $audioset_subset \ |
|
--use-voxceleb $use_voxceleb --voxceleb-subset $voxceleb_subset \ |
|
--stop-early $stop_early \ |
|
--max-duration $md \ |
|
--num-workers 1 \ |
|
--whisper-version $whisper_version \ |
|
--use-whisper $use_whisper --whisper-loss-scale $whisper_loss_scale --whisper-dim $whisper_dim \ |
|
--use-beats $use_beats --ecapa-loss-scale $ecapa_scale --speaker-input-idx $speaker_input_idx \ |
|
--use-ecapa $use_ecapa --beats-loss-scale $beats_scale \ |
|
--drop-features 0 --return-audio 0 --on-the-fly-feats 0 \ |
|
--use-task-id 1 \ |
|
--share-asr $share_asr \ |
|
--bucketing-sampler False \ |
|
--enable-spec-aug $enable_spec_aug \ |
|
--enable-musan $enable_musan \ |
|
--master-port 13440 |