update
Browse files- app.py +38 -4
- requirements.txt +3 -1
app.py
CHANGED
@@ -8,6 +8,9 @@ import traceback
|
|
8 |
import edge_tts
|
9 |
import gradio as gr
|
10 |
import librosa
|
|
|
|
|
|
|
11 |
|
12 |
from src.rmvpe import RMVPE
|
13 |
from model_loader import ModelLoader
|
@@ -39,9 +42,34 @@ rmvpe_model = RMVPE(
|
|
39 |
|
40 |
model_loader.load("char2")
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
def tts(
|
44 |
-
rvc,
|
|
|
45 |
speed,
|
46 |
pitch,
|
47 |
tts_text,
|
@@ -61,6 +89,8 @@ def tts(
|
|
61 |
print(f"tts_voice: {tts_voice}")
|
62 |
print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
|
63 |
|
|
|
|
|
64 |
try:
|
65 |
if limitation and len(tts_text) > 280:
|
66 |
print("Error: Text too long")
|
@@ -86,9 +116,11 @@ def tts(
|
|
86 |
)
|
87 |
t1 = time.time()
|
88 |
edge_time = t1 - t0
|
89 |
-
|
90 |
-
# audio_opt = f.read()
|
91 |
if not rvc:
|
|
|
|
|
|
|
92 |
info = f"Success. Time: edge-tts: {edge_time}s"
|
93 |
print(info)
|
94 |
return (
|
@@ -206,7 +238,8 @@ with app:
|
|
206 |
label="Input Text",
|
207 |
value="I'm Never Gonna Give You Up",
|
208 |
)
|
209 |
-
rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=
|
|
|
210 |
with gr.Column():
|
211 |
but0 = gr.Button("Convert", variant="primary")
|
212 |
info_text = gr.Textbox(label="Output info")
|
@@ -216,6 +249,7 @@ with app:
|
|
216 |
tts,
|
217 |
[
|
218 |
rvc,
|
|
|
219 |
speed,
|
220 |
pitch,
|
221 |
tts_text,
|
|
|
8 |
import edge_tts
|
9 |
import gradio as gr
|
10 |
import librosa
|
11 |
+
import numpy as np
|
12 |
+
from pydub import AudioSegment
|
13 |
+
from scipy.io import wavfile
|
14 |
|
15 |
from src.rmvpe import RMVPE
|
16 |
from model_loader import ModelLoader
|
|
|
42 |
|
43 |
model_loader.load("char2")
|
44 |
|
45 |
+
def add_robotic_effect(mp3_path):
|
46 |
+
audio = AudioSegment.from_mp3(mp3_path)
|
47 |
+
|
48 |
+
# Convert to numpy array
|
49 |
+
data = np.array(audio.get_array_of_samples())
|
50 |
+
sample_rate = audio.frame_rate
|
51 |
+
|
52 |
+
# If stereo, average the channels to mono
|
53 |
+
if audio.channels == 2:
|
54 |
+
data = data.reshape((-1, 2)).mean(axis=1).astype(np.int16)
|
55 |
+
|
56 |
+
# Apply delay effect
|
57 |
+
delay = 0.05
|
58 |
+
alpha = 0.55
|
59 |
+
delay_samples = int(delay * sample_rate)
|
60 |
+
|
61 |
+
delayed_data = np.zeros_like(data)
|
62 |
+
delayed_data[delay_samples:] = data[:-delay_samples] * alpha
|
63 |
+
delayed_data += data
|
64 |
+
|
65 |
+
# Clip the values to int16 range
|
66 |
+
delayed_data = np.clip(delayed_data, -32768, 32767)
|
67 |
+
wavfile.write("processed.wav", sample_rate, delayed_data.astype(np.int16))
|
68 |
+
return "processed.wav"
|
69 |
|
70 |
def tts(
|
71 |
+
rvc,
|
72 |
+
effect,
|
73 |
speed,
|
74 |
pitch,
|
75 |
tts_text,
|
|
|
89 |
print(f"tts_voice: {tts_voice}")
|
90 |
print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
|
91 |
|
92 |
+
edge_output_filename = "edge_output.mp3"
|
93 |
+
|
94 |
try:
|
95 |
if limitation and len(tts_text) > 280:
|
96 |
print("Error: Text too long")
|
|
|
116 |
)
|
117 |
t1 = time.time()
|
118 |
edge_time = t1 - t0
|
119 |
+
|
|
|
120 |
if not rvc:
|
121 |
+
if effect:
|
122 |
+
edge_output_filename = add_robotic_effect(edge_output_filename)
|
123 |
+
|
124 |
info = f"Success. Time: edge-tts: {edge_time}s"
|
125 |
print(info)
|
126 |
return (
|
|
|
238 |
label="Input Text",
|
239 |
value="I'm Never Gonna Give You Up",
|
240 |
)
|
241 |
+
rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=False)
|
242 |
+
effect = gr.Checkbox(label="Add Effect", info="Would you like to apply Effect?", value=True)
|
243 |
with gr.Column():
|
244 |
but0 = gr.Button("Convert", variant="primary")
|
245 |
info_text = gr.Textbox(label="Output info")
|
|
|
249 |
tts,
|
250 |
[
|
251 |
rvc,
|
252 |
+
effect,
|
253 |
speed,
|
254 |
pitch,
|
255 |
tts_text,
|
requirements.txt
CHANGED
@@ -8,4 +8,6 @@ pyworld==0.3.4
|
|
8 |
torchcrepe==0.0.21
|
9 |
scikit-learn==1.3.0
|
10 |
gradio
|
11 |
-
gradio_client
|
|
|
|
|
|
8 |
torchcrepe==0.0.21
|
9 |
scikit-learn==1.3.0
|
10 |
gradio
|
11 |
+
gradio_client
|
12 |
+
pydub
|
13 |
+
scipy
|