Spaces:

RUI-LONG
/

test-rvc

Sleeping

App Files Files Community

RUI-LONG commited on Jun 19, 2024

Commit

c4b86ad

1 Parent(s): 387a00b

update

Browse files

Files changed (2) hide show

app.py +38 -4
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -8,6 +8,9 @@ import traceback
 import edge_tts
 import gradio as gr
 import librosa
 from src.rmvpe import RMVPE
 from model_loader import ModelLoader
@@ -39,9 +42,34 @@ rmvpe_model = RMVPE(
 model_loader.load("char2")
 def tts(
-    rvc,
     speed,
     pitch,
     tts_text,
@@ -61,6 +89,8 @@ def tts(
     print(f"tts_voice: {tts_voice}")
     print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
     try:
         if limitation and len(tts_text) > 280:
             print("Error: Text too long")
@@ -86,9 +116,11 @@ def tts(
         )
         t1 = time.time()
         edge_time = t1 - t0
-        # with open(edge_output_filename, "rb") as f:
-        #     audio_opt = f.read()
         if not rvc:
             info = f"Success. Time: edge-tts: {edge_time}s"
             print(info)
             return (
@@ -206,7 +238,8 @@ with app:
                 label="Input Text",
                 value="I'm Never Gonna Give You Up",
             )
-            rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=True)
         with gr.Column():
             but0 = gr.Button("Convert", variant="primary")
             info_text = gr.Textbox(label="Output info")
@@ -216,6 +249,7 @@ with app:
             tts,
             [
                 rvc,
                 speed,
                 pitch,
                 tts_text,

 import edge_tts
 import gradio as gr
 import librosa
+import numpy as np
+from pydub import AudioSegment
+from scipy.io import wavfile
 from src.rmvpe import RMVPE
 from model_loader import ModelLoader
 model_loader.load("char2")
+def add_robotic_effect(mp3_path):
+    audio = AudioSegment.from_mp3(mp3_path)
+    # Convert to numpy array
+    data = np.array(audio.get_array_of_samples())
+    sample_rate = audio.frame_rate
+    # If stereo, average the channels to mono
+    if audio.channels == 2:
+        data = data.reshape((-1, 2)).mean(axis=1).astype(np.int16)
+    # Apply delay effect
+    delay = 0.05
+    alpha = 0.55
+    delay_samples = int(delay * sample_rate)
+    delayed_data = np.zeros_like(data)
+    delayed_data[delay_samples:] = data[:-delay_samples] * alpha
+    delayed_data += data
+    # Clip the values to int16 range
+    delayed_data = np.clip(delayed_data, -32768, 32767)
+    wavfile.write("processed.wav", sample_rate, delayed_data.astype(np.int16))
+    return "processed.wav"
 def tts(
+    rvc,
+    effect,
     speed,
     pitch,
     tts_text,
     print(f"tts_voice: {tts_voice}")
     print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
+    edge_output_filename = "edge_output.mp3"
     try:
         if limitation and len(tts_text) > 280:
             print("Error: Text too long")
         )
         t1 = time.time()
         edge_time = t1 - t0
         if not rvc:
+            if effect:
+                edge_output_filename = add_robotic_effect(edge_output_filename)
             info = f"Success. Time: edge-tts: {edge_time}s"
             print(info)
             return (
                 label="Input Text",
                 value="I'm Never Gonna Give You Up",
             )
+            rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=False)
+            effect = gr.Checkbox(label="Add Effect", info="Would you like to apply Effect?", value=True)
         with gr.Column():
             but0 = gr.Button("Convert", variant="primary")
             info_text = gr.Textbox(label="Output info")
             tts,
             [
                 rvc,
+                effect,
                 speed,
                 pitch,
                 tts_text,

requirements.txt CHANGED Viewed

@@ -8,4 +8,6 @@ pyworld==0.3.4
 torchcrepe==0.0.21
 scikit-learn==1.3.0
 gradio
-gradio_client

 torchcrepe==0.0.21
 scikit-learn==1.3.0
 gradio
+gradio_client
+pydub
+scipy