rvc

Runtime error

App Files Files Community

Spark808 commited on Dec 4, 2023

Commit

ddb9100

1 Parent(s): db48aff

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -34

app.py CHANGED Viewed

@@ -3,23 +3,35 @@ import json
 import argparse
 import traceback
 import logging
-from datetime import datetime
 import gradio as gr
 import numpy as np
 import librosa
 import torch
 from fairseq import checkpoint_utils
 from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
 from vc_infer_pipeline import VC
-from config import is_half, device
 logging.getLogger("numba").setLevel(logging.WARNING)
 def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
-    def vc_fn(input_audio, f0_up_key, f0_method, index_rate):
         try:
-            # Check if input_audio is a tuple (Gradio input)
-            if isinstance(input_audio, tuple):
                 sampling_rate, audio = input_audio
                 duration = audio.shape[0] / sampling_rate
                 if duration > 10000000:
@@ -29,9 +41,6 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
                     audio = librosa.to_mono(audio.transpose(1, 0))
                 if sampling_rate != 16000:
                     audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
-            else:  # Assume it's a file path
-                audio, sr = librosa.load(input_audio, sr=16000, mono=True)
             times = [0, 0, 0]
             f0_up_key = int(f0_up_key)
             audio_opt = vc.pipeline(
@@ -51,11 +60,10 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
                 f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
             )
             return "Success", (tgt_sr, audio_opt)
-        except Exception as e:
             info = traceback.format_exc()
             print(info)
-            return str(e), (None, None)
     return vc_fn
 def load_hubert():
@@ -78,13 +86,10 @@ if __name__ == '__main__':
     parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
     parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
     args, unknown = parser.parse_known_args()
     load_hubert()
     models = []
     with open("weights/model_info.json", "r", encoding="utf-8") as f:
         models_info = json.load(f)
     for name, info in models_info.items():
         if not info['enable']:
             continue
@@ -95,7 +100,6 @@ if __name__ == '__main__':
         cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
         tgt_sr = cpt["config"][-1]
         if_f0 = cpt.get("f0", 1)
         if if_f0 == 1:
             net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
         else:
@@ -103,15 +107,12 @@ if __name__ == '__main__':
         del net_g.enc_q
         print(net_g.load_state_dict(cpt["weight"], strict=False))
         net_g.eval().to(device)
         if is_half:
             net_g = net_g.half()
         else:
             net_g = net_g.float()
         vc = VC(tgt_sr, device, is_half)
         models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC generator\n"
@@ -153,18 +154,4 @@ if __name__ == '__main__':
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")
                 vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
-        app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)
-    # Note: The following code is outside the `__main__` block, as it was causing indentation issues in the provided code.
-    iface = gr.Interface(
-        fn=create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy),
-        inputs=[
-            gr.inputs.Audio(source="microphone", type="numpy", sample_rate=16000),
-            gr.inputs.Slider(minimum=-12, maximum=12, step=1, default=0),
-            gr.inputs.Radio(["world", "dio"], label="F0 method"),
-            gr.inputs.Slider(minimum=0, maximum=1, step=0.01, default=0.5)
-        ],
-        outputs="text",
-    )
-    iface.launch()

 import argparse
 import traceback
 import logging
 import gradio as gr
 import numpy as np
 import librosa
 import torch
+import asyncio
+import edge_tts
+from datetime import datetime
 from fairseq import checkpoint_utils
 from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
 from vc_infer_pipeline import VC
+from config import (
+    is_half,
+    device
+)
 logging.getLogger("numba").setLevel(logging.WARNING)
 def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
+    def vc_fn(
+        input_audio,
+        f0_up_key,
+        f0_method,
+        index_rate
+    ):
         try:
+            if args.files:
+                audio, sr = librosa.load(input_audio, sr=16000, mono=True)
+            else:
+                if input_audio is None:
+                    return "You need to upload an audio", None
                 sampling_rate, audio = input_audio
                 duration = audio.shape[0] / sampling_rate
                 if duration > 10000000:
                     audio = librosa.to_mono(audio.transpose(1, 0))
                 if sampling_rate != 16000:
                     audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
             times = [0, 0, 0]
             f0_up_key = int(f0_up_key)
             audio_opt = vc.pipeline(
                 f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
             )
             return "Success", (tgt_sr, audio_opt)
+        except:
             info = traceback.format_exc()
             print(info)
+            return info, (None, None)
     return vc_fn
 def load_hubert():
     parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
     parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
     args, unknown = parser.parse_known_args()
     load_hubert()
     models = []
     with open("weights/model_info.json", "r", encoding="utf-8") as f:
         models_info = json.load(f)
     for name, info in models_info.items():
         if not info['enable']:
             continue
         cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
         tgt_sr = cpt["config"][-1]
         if_f0 = cpt.get("f0", 1)
         if if_f0 == 1:
             net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
         else:
         del net_g.enc_q
         print(net_g.load_state_dict(cpt["weight"], strict=False))
         net_g.eval().to(device)
         if is_half:
             net_g = net_g.half()
         else:
             net_g = net_g.float()
         vc = VC(tgt_sr, device, is_half)
         models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
     with gr.Blocks() as app:
         gr.Markdown(
             "# <center> RVC generator\n"
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")
                 vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
+        app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)