Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,15 @@
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
from pypinyin import lazy_pinyin, Style
|
|
|
3 |
import torch
|
|
|
|
|
4 |
from ttts.utils.infer_utils import load_model
|
|
|
5 |
from ttts.vocoder.feature_extractors import MelSpectrogramFeatures
|
|
|
6 |
import torchaudio
|
7 |
MODELS = {
|
8 |
'vqvae.pth':'./TTTS/vae-30.pt',
|
@@ -13,9 +20,12 @@ MODELS = {
|
|
13 |
'rlg_auto.pth': '',
|
14 |
'rlg_diffuser.pth': '',
|
15 |
}
|
|
|
16 |
from ttts.gpt.voice_tokenizer import VoiceBpeTokenizer
|
|
|
17 |
import torch.nn.functional as F
|
18 |
cond_audio = 'ttts/3.wav'
|
|
|
19 |
audio,sr = torchaudio.load(cond_audio)
|
20 |
if audio.shape[0]>1:
|
21 |
audio = audio[0].unsqueeze(0)
|
@@ -39,7 +49,7 @@ from ttts.diffusion.aa_model import denormalize_tacotron_mel, normalize_tacotron
|
|
39 |
# print(device)
|
40 |
|
41 |
vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
42 |
-
|
43 |
|
44 |
def speak(text):
|
45 |
pinyin = ' '.join(lazy_pinyin(text, style=Style.TONE3, neutral_tone_with_five=True))
|
|
|
1 |
+
print("import gradio")
|
2 |
import gradio as gr
|
3 |
+
print("import ppn")
|
4 |
from pypinyin import lazy_pinyin, Style
|
5 |
+
print("import torch")
|
6 |
import torch
|
7 |
+
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
8 |
+
print("import ttts")
|
9 |
from ttts.utils.infer_utils import load_model
|
10 |
+
print("import mel")
|
11 |
from ttts.vocoder.feature_extractors import MelSpectrogramFeatures
|
12 |
+
print("import torchaudio")
|
13 |
import torchaudio
|
14 |
MODELS = {
|
15 |
'vqvae.pth':'./TTTS/vae-30.pt',
|
|
|
20 |
'rlg_auto.pth': '',
|
21 |
'rlg_diffuser.pth': '',
|
22 |
}
|
23 |
+
print("import tokenizer")
|
24 |
from ttts.gpt.voice_tokenizer import VoiceBpeTokenizer
|
25 |
+
print("import f")
|
26 |
import torch.nn.functional as F
|
27 |
cond_audio = 'ttts/3.wav'
|
28 |
+
print("load audio")
|
29 |
audio,sr = torchaudio.load(cond_audio)
|
30 |
if audio.shape[0]>1:
|
31 |
audio = audio[0].unsqueeze(0)
|
|
|
49 |
# print(device)
|
50 |
|
51 |
vocos = Vocos.from_pretrained("charactr/vocos-mel-24khz")
|
52 |
+
|
53 |
|
54 |
def speak(text):
|
55 |
pinyin = ' '.join(lazy_pinyin(text, style=Style.TONE3, neutral_tone_with_five=True))
|