Spaces:
Runtime error
Runtime error
File size: 6,555 Bytes
2b7bf83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
#!/bin/bash
# Copyright 2019 Tomoki Hayashi
# MIT License (https://opensource.org/licenses/MIT)
. ./cmd.sh || exit 1;
. ./path.sh || exit 1;
# basic settings
stage=-1 # stage to start
stop_stage=100 # stage to stop
verbose=1 # verbosity level (lower is less info)
n_gpus=1 # number of gpus in training
n_jobs=16 # number of parallel jobs in feature extraction
# NOTE(kan-bayashi): renamed to conf to avoid conflict in parse_options.sh
conf=conf/parallel_wavegan.v1.yaml
# directory path setting
download_dir=downloads # direcotry to save downloaded files
dumpdir=dump # directory to dump features
# training related setting
tag="" # tag for directory to save model
resume="" # checkpoint path to resume training
# (e.g. <path>/<to>/checkpoint-10000steps.pkl)
# decoding related setting
checkpoint="" # checkpoint path to be used for decoding
# if not provided, the latest one will be used
# (e.g. <path>/<to>/checkpoint-400000steps.pkl)
# shellcheck disable=SC1091
. utils/parse_options.sh || exit 1;
train_set="train_nodev" # name of training data directory
dev_set="dev" # name of development data direcotry
eval_set="eval" # name of evaluation data direcotry
set -euo pipefail
if [ "${stage}" -le -1 ] && [ "${stop_stage}" -ge -1 ]; then
echo "Stage -1: Data download"
local/data_download.sh "${download_dir}"
fi
if [ "${stage}" -le 0 ] && [ "${stop_stage}" -ge 0 ]; then
echo "Stage 0: Data preparation"
local/data_prep.sh \
--train_set "${train_set}" \
--dev_set "${dev_set}" \
--eval_set "${eval_set}" \
--shuffle true \
"${download_dir}/sc_all" data
fi
stats_ext=$(grep -q "hdf5" <(yq ".format" "${conf}") && echo "h5" || echo "npy")
if [ "${stage}" -le 1 ] && [ "${stop_stage}" -ge 1 ]; then
echo "Stage 1: Feature extraction"
# extract raw features
pids=()
for name in "${train_set}" "${dev_set}" "${eval_set}"; do
(
[ ! -e "${dumpdir}/${name}/raw" ] && mkdir -p "${dumpdir}/${name}/raw"
echo "Feature extraction start. See the progress via ${dumpdir}/${name}/raw/preprocessing.*.log."
utils/make_subset_data.sh "data/${name}" "${n_jobs}" "${dumpdir}/${name}/raw"
${train_cmd} JOB=1:${n_jobs} "${dumpdir}/${name}/raw/preprocessing.JOB.log" \
parallel-wavegan-preprocess \
--config "${conf}" \
--scp "${dumpdir}/${name}/raw/wav.JOB.scp" \
--dumpdir "${dumpdir}/${name}/raw/dump.JOB" \
--verbose "${verbose}"
echo "Successfully finished feature extraction of ${name} set."
) &
pids+=($!)
done
i=0; for pid in "${pids[@]}"; do wait "${pid}" || ((++i)); done
[ "${i}" -gt 0 ] && echo "$0: ${i} background jobs are failed." && exit 1;
echo "Successfully finished feature extraction."
# calculate statistics for normalization
echo "Statistics computation start. See the progress via ${dumpdir}/${train_set}/compute_statistics.log."
${train_cmd} "${dumpdir}/${train_set}/compute_statistics.log" \
parallel-wavegan-compute-statistics \
--config "${conf}" \
--rootdir "${dumpdir}/${train_set}/raw" \
--dumpdir "${dumpdir}/${train_set}" \
--verbose "${verbose}"
echo "Successfully finished calculation of statistics."
# normalize and dump them
pids=()
for name in "${train_set}" "${dev_set}" "${eval_set}"; do
(
[ ! -e "${dumpdir}/${name}/norm" ] && mkdir -p "${dumpdir}/${name}/norm"
echo "Nomalization start. See the progress via ${dumpdir}/${name}/norm/normalize.*.log."
${train_cmd} JOB=1:${n_jobs} "${dumpdir}/${name}/norm/normalize.JOB.log" \
parallel-wavegan-normalize \
--config "${conf}" \
--stats "${dumpdir}/${train_set}/stats.${stats_ext}" \
--rootdir "${dumpdir}/${name}/raw/dump.JOB" \
--dumpdir "${dumpdir}/${name}/norm/dump.JOB" \
--verbose "${verbose}"
echo "Successfully finished normalization of ${name} set."
) &
pids+=($!)
done
i=0; for pid in "${pids[@]}"; do wait "${pid}" || ((++i)); done
[ "${i}" -gt 0 ] && echo "$0: ${i} background jobs are failed." && exit 1;
echo "Successfully finished normalization."
fi
if [ -z "${tag}" ]; then
expdir="exp/${train_set}_speech_commands_$(basename "${conf}" .yaml)"
else
expdir="exp/${train_set}_speech_commands_${tag}"
fi
if [ "${stage}" -le 2 ] && [ "${stop_stage}" -ge 2 ]; then
echo "Stage 2: Network training"
[ ! -e "${expdir}" ] && mkdir -p "${expdir}"
cp "${dumpdir}/${train_set}/stats.${stats_ext}" "${expdir}"
if [ "${n_gpus}" -gt 1 ]; then
train="python -m parallel_wavegan.distributed.launch --nproc_per_node ${n_gpus} -c parallel-wavegan-train"
else
train="parallel-wavegan-train"
fi
echo "Training start. See the progress via ${expdir}/train.log."
${cuda_cmd} --gpu "${n_gpus}" "${expdir}/train.log" \
${train} \
--config "${conf}" \
--train-dumpdir "${dumpdir}/${train_set}/norm" \
--dev-dumpdir "${dumpdir}/${dev_set}/norm" \
--outdir "${expdir}" \
--resume "${resume}" \
--verbose "${verbose}"
echo "Successfully finished training."
fi
if [ "${stage}" -le 3 ] && [ "${stop_stage}" -ge 3 ]; then
echo "Stage 3: Network decoding"
# shellcheck disable=SC2012
[ -z "${checkpoint}" ] && checkpoint="$(ls -dt "${expdir}"/*.pkl | head -1 || true)"
outdir="${expdir}/wav/$(basename "${checkpoint}" .pkl)"
pids=()
for name in "${dev_set}" "${eval_set}"; do
(
[ ! -e "${outdir}/${name}" ] && mkdir -p "${outdir}/${name}"
[ "${n_gpus}" -gt 1 ] && n_gpus=1
echo "Decoding start. See the progress via ${outdir}/${name}/decode.log."
${cuda_cmd} --gpu "${n_gpus}" "${outdir}/${name}/decode.log" \
parallel-wavegan-decode \
--dumpdir "${dumpdir}/${name}/norm" \
--checkpoint "${checkpoint}" \
--outdir "${outdir}/${name}" \
--verbose "${verbose}"
echo "Successfully finished decoding of ${name} set."
) &
pids+=($!)
done
i=0; for pid in "${pids[@]}"; do wait "${pid}" || ((++i)); done
[ "${i}" -gt 0 ] && echo "$0: ${i} background jobs are failed." && exit 1;
echo "Successfully finished decoding."
fi
echo "Finished."
|