AdalAbilbekov
commited on
Commit
·
9e9a056
1
Parent(s):
08f5766
- app.py +36 -28
- flagged/log.csv +3 -0
- flagged/output/2e9a3b60dc40f07d4db8/audio.wav +0 -0
- flagged/output/85c2e39535a1879bccc5/audio.wav +0 -0
app.py
CHANGED
@@ -65,35 +65,42 @@ vocoder.remove_weight_norm()
|
|
65 |
emotions = sorted(["angry", "surprise", "fear", "happy", "neutral", "sad"])
|
66 |
spekears = ['Madi', 'Marzhan', 'Akzhol']
|
67 |
|
68 |
-
def generate_audio(text, quantity, speaker, emotion_1, emotion_2):
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
97 |
sr = 22050
|
98 |
return (sr, audio)
|
99 |
|
@@ -102,6 +109,7 @@ demo = gr.Interface(
|
|
102 |
[
|
103 |
gr.Textbox(value='Батпақ соры шабындыққа және жыл бойғы жайылымға пайдаланылады.', label="Text you want to synthesize"),
|
104 |
gr.Slider(0, 100, value=50, step=10, label="Count", info="Choose between 0 and 100"),
|
|
|
105 |
gr.Dropdown(spekears, value=spekears[1], label="Narrator", info="Select a narrator."
|
106 |
),
|
107 |
gr.Dropdown(emotions, value=emotions[0], label="Emotion 1", info="Select first emotion"),
|
|
|
65 |
emotions = sorted(["angry", "surprise", "fear", "happy", "neutral", "sad"])
|
66 |
spekears = ['Madi', 'Marzhan', 'Akzhol']
|
67 |
|
68 |
+
def generate_audio(text, quantity, guid, speaker, emotion_1, emotion_2):
|
69 |
+
y_dec = torch.tensor([torch.nan])
|
70 |
+
gui = guid
|
71 |
+
while torch.isnan(y_dec).sum() != 0:
|
72 |
+
x, x_lengths = convert_text(text)
|
73 |
+
emo_1, emo_2 = emotions.index(emotion_1), emotions.index(emotion_2)
|
74 |
+
emo1 = torch.LongTensor([emo_1]).to(device)
|
75 |
+
emo2 = torch.LongTensor([emo_2]).to(device)
|
76 |
+
sid = torch.LongTensor([spekears.index(speaker)]).to(device)
|
77 |
+
intensity = quantity / 100
|
78 |
|
79 |
+
y_enc, y_dec, attn = gradtts_uncond_model.classifier_guidance_decode_two_mixture(
|
80 |
+
x, x_lengths,
|
81 |
+
n_timesteps=100,
|
82 |
+
temperature=2.0,
|
83 |
+
stoc=args.stoc,
|
84 |
+
spk=sid,
|
85 |
+
emo1=emo1,
|
86 |
+
emo2=emo2,
|
87 |
+
emo1_weight=intensity,
|
88 |
+
length_scale=1.,
|
89 |
+
classifier_func=model.forward,
|
90 |
+
guidance=gui,
|
91 |
+
classifier_type=model.model_type
|
92 |
+
)
|
93 |
+
y_dec = y_dec.detach()
|
94 |
+
res = y_dec.squeeze().to(device).numpy()
|
95 |
+
x = torch.from_numpy(res).unsqueeze(0)
|
96 |
+
y_g_hat = vocoder(x)
|
97 |
+
audio = y_g_hat.squeeze()
|
98 |
+
audio = audio * 32768.0
|
99 |
+
audio = audio.detach().cpu().numpy().astype('int16')
|
100 |
+
gui -= 50
|
101 |
+
if gui <= 0:
|
102 |
+
print('shabuya')
|
103 |
+
break
|
104 |
sr = 22050
|
105 |
return (sr, audio)
|
106 |
|
|
|
109 |
[
|
110 |
gr.Textbox(value='Батпақ соры шабындыққа және жыл бойғы жайылымға пайдаланылады.', label="Text you want to synthesize"),
|
111 |
gr.Slider(0, 100, value=50, step=10, label="Count", info="Choose between 0 and 100"),
|
112 |
+
gr.Slider(0, 1000, value=100, step=10, label="Guidance", info="Choose between 0 and 1000"),
|
113 |
gr.Dropdown(spekears, value=spekears[1], label="Narrator", info="Select a narrator."
|
114 |
),
|
115 |
gr.Dropdown(emotions, value=emotions[0], label="Emotion 1", info="Select first emotion"),
|
flagged/log.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
Text you want to synthesize,Count,Guidance,Narrator,Emotion 1,Emotion 2,output,flag,username,timestamp
|
2 |
+
Батпақ соры шабындыққа және жыл бойғы жайылымға пайдаланылады.,50,20,Marzhan,angry,neutral,flagged/output/2e9a3b60dc40f07d4db8/audio.wav,,,2024-03-26 16:38:18.508507
|
3 |
+
Батпақ соры шабындыққа және жыл бойғы жайылымға пайдаланылады.,50,300,Marzhan,happy,neutral,flagged/output/85c2e39535a1879bccc5/audio.wav,,,2024-03-29 22:08:19.838874
|
flagged/output/2e9a3b60dc40f07d4db8/audio.wav
ADDED
Binary file (171 kB). View file
|
|
flagged/output/85c2e39535a1879bccc5/audio.wav
ADDED
Binary file (173 kB). View file
|
|