|
|
|
|
|
|
|
import argparse |
|
import os |
|
import sys |
|
import string |
|
from argparse import RawTextHelpFormatter |
|
|
|
from pathlib import Path |
|
|
|
from TTS.utils.manage import ModelManager |
|
from TTS.utils.synthesizer import Synthesizer |
|
|
|
|
|
def str2bool(v): |
|
if isinstance(v, bool): |
|
return v |
|
if v.lower() in ('yes', 'true', 't', 'y', '1'): |
|
return True |
|
if v.lower() in ('no', 'false', 'f', 'n', '0'): |
|
return False |
|
raise argparse.ArgumentTypeError('Boolean value expected.') |
|
|
|
|
|
def main(): |
|
|
|
parser = argparse.ArgumentParser(description='''Synthesize speech on command line.\n\n''' |
|
|
|
'''You can either use your trained model or choose a model from the provided list.\n'''\ |
|
|
|
''' |
|
Example runs: |
|
|
|
# list provided models |
|
./TTS/bin/synthesize.py --list_models |
|
|
|
# run a model from the list |
|
./TTS/bin/synthesize.py --text "Text for TTS" --model_name "<language>/<dataset>/<model_name>" --vocoder_name "<language>/<dataset>/<model_name>" --output_path |
|
|
|
# run your own TTS model (Using Griffin-Lim Vocoder) |
|
./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/model.pth.tar --config_path path/to/config.json --out_path output/path/speech.wav |
|
|
|
# run your own TTS and Vocoder models |
|
./TTS/bin/synthesize.py --text "Text for TTS" --model_path path/to/config.json --config_path path/to/model.pth.tar --out_path output/path/speech.wav |
|
--vocoder_path path/to/vocoder.pth.tar --vocoder_config_path path/to/vocoder_config.json |
|
|
|
''', |
|
formatter_class=RawTextHelpFormatter) |
|
|
|
parser.add_argument( |
|
'--list_models', |
|
type=str2bool, |
|
nargs='?', |
|
const=True, |
|
default=False, |
|
help='list available pre-trained tts and vocoder models.' |
|
) |
|
parser.add_argument( |
|
'--text', |
|
type=str, |
|
default=None, |
|
help='Text to generate speech.' |
|
) |
|
|
|
|
|
parser.add_argument( |
|
'--model_name', |
|
type=str, |
|
default=None, |
|
help= |
|
'Name of one of the pre-trained tts models in format <language>/<dataset>/<model_name>' |
|
) |
|
parser.add_argument( |
|
'--vocoder_name', |
|
type=str, |
|
default=None, |
|
help= |
|
'Name of one of the pre-trained vocoder models in format <language>/<dataset>/<model_name>' |
|
) |
|
|
|
|
|
parser.add_argument( |
|
'--config_path', |
|
default=None, |
|
type=str, |
|
help='Path to model config file.' |
|
) |
|
parser.add_argument( |
|
'--model_path', |
|
type=str, |
|
default=None, |
|
help='Path to model file.', |
|
) |
|
parser.add_argument( |
|
'--out_path', |
|
type=str, |
|
default=Path(__file__).resolve().parent, |
|
help='Path to save final wav file. Wav file will be named as the given text.', |
|
) |
|
parser.add_argument( |
|
'--use_cuda', |
|
type=bool, |
|
help='Run model on CUDA.', |
|
default=False |
|
) |
|
parser.add_argument( |
|
'--vocoder_path', |
|
type=str, |
|
help= |
|
'Path to vocoder model file. If it is not defined, model uses GL as vocoder. Please make sure that you installed vocoder library before (WaveRNN).', |
|
default=None, |
|
) |
|
parser.add_argument( |
|
'--vocoder_config_path', |
|
type=str, |
|
help='Path to vocoder model config file.', |
|
default=None) |
|
|
|
|
|
parser.add_argument( |
|
'--speakers_json', |
|
type=str, |
|
help="JSON file for multi-speaker model.", |
|
default=None) |
|
parser.add_argument( |
|
'--speaker_idx', |
|
type=str, |
|
help="if the tts model is trained with x-vectors, then speaker_idx is a file present in speakers.json else speaker_idx is the speaker id corresponding to a speaker in the speaker embedding layer.", |
|
default=None) |
|
parser.add_argument( |
|
'--gst_style', |
|
help="Wav path file for GST stylereference.", |
|
default=None) |
|
|
|
|
|
parser.add_argument( |
|
'--save_spectogram', |
|
type=bool, |
|
help="If true save raw spectogram for further (vocoder) processing in out_path.", |
|
default=False) |
|
|
|
args = parser.parse_args() |
|
|
|
|
|
path = Path(__file__).parent / "../.models.json" |
|
manager = ModelManager(path) |
|
|
|
model_path = None |
|
config_path = None |
|
vocoder_path = None |
|
vocoder_config_path = None |
|
|
|
|
|
if args.list_models: |
|
manager.list_models() |
|
sys.exit() |
|
|
|
|
|
if args.model_name is not None: |
|
model_path, config_path = manager.download_model(args.model_name) |
|
|
|
if args.vocoder_name is not None: |
|
vocoder_path, vocoder_config_path = manager.download_model(args.vocoder_name) |
|
|
|
|
|
if args.model_path is not None: |
|
model_path = args.model_path |
|
config_path = args.config_path |
|
|
|
if args.vocoder_path is not None: |
|
vocoder_path = args.vocoder_path |
|
vocoder_config_path = args.vocoder_config_path |
|
|
|
|
|
|
|
synthesizer = Synthesizer(model_path, config_path, vocoder_path, vocoder_config_path, args.use_cuda) |
|
|
|
use_griffin_lim = vocoder_path is None |
|
print(" > Text: {}".format(args.text)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wav = synthesizer.tts(args.text) |
|
|
|
|
|
file_name = args.text.replace(" ", "_")[0:20] |
|
file_name = file_name.translate( |
|
str.maketrans('', '', string.punctuation.replace('_', ''))) + '.wav' |
|
out_path = os.path.join(args.out_path, file_name) |
|
print(" > Saving output to {}".format(out_path)) |
|
synthesizer.save_wav(wav, out_path) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |