File size: 1,091 Bytes
574ab7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from IMSToucan.InferenceInterfaces.AnonFastSpeech2 import AnonFastSpeech2

TAGS_TO_MODELS = {
    'Libri100': 'trained_on_ground_truth_phonemes.pt',
    'Libri100 + finetuned': 'trained_on_asr_phoneme_outputs.pt',
    'Libri600': 'trained_on_libri600_asr_phoneme_outputs.pt',
    'Libri600 + finetuned' : 'trained_on_libri600_ground_truth_phonemes.pt'
}


class DemoTTS:

    def __init__(self, model_paths, model_tag, device):
        self.device = device
        self.model_tag = model_tag
        fastspeech_path = model_paths / 'FastSpeech2_Multi' / TAGS_TO_MODELS[self.model_tag]
        hifigan_path = model_paths / 'HiFiGAN_combined' / 'best.pt'
        self.model = AnonFastSpeech2(device=self.device, path_to_hifigan_model=hifigan_path,
                                     path_to_fastspeech_model=fastspeech_path)

    def read_text(self, transcription, speaker_embedding, text_is_phonemes=False):
        self.model.default_utterance_embedding = speaker_embedding.to(self.device)
        wav = self.model(text=transcription, text_is_phonemes=text_is_phonemes)
        return wav