Spaces:
Running
Running
from IMSToucan.InferenceInterfaces.AnonFastSpeech2 import AnonFastSpeech2 | |
TAGS_TO_MODELS = { | |
'Libri100': 'trained_on_ground_truth_phonemes.pt', | |
'Libri100 + finetuned': 'trained_on_asr_phoneme_outputs.pt', | |
'Libri600': 'trained_on_libri600_asr_phoneme_outputs.pt', | |
'Libri600 + finetuned' : 'trained_on_libri600_ground_truth_phonemes.pt' | |
} | |
class DemoTTS: | |
def __init__(self, model_paths, model_tag, device): | |
self.device = device | |
self.model_tag = model_tag | |
fastspeech_path = model_paths / 'FastSpeech2_Multi' / TAGS_TO_MODELS[self.model_tag] | |
hifigan_path = model_paths / 'HiFiGAN_combined' / 'best.pt' | |
self.model = AnonFastSpeech2(device=self.device, path_to_hifigan_model=hifigan_path, | |
path_to_fastspeech_model=fastspeech_path) | |
def read_text(self, transcription, speaker_embedding, text_is_phonemes=False): | |
self.model.default_utterance_embedding = speaker_embedding.to(self.device) | |
wav = self.model(text=transcription, text_is_phonemes=text_is_phonemes) | |
return wav | |