Spark808 commited on
Commit
ddb9100
·
1 Parent(s): db48aff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -34
app.py CHANGED
@@ -3,23 +3,35 @@ import json
3
  import argparse
4
  import traceback
5
  import logging
6
- from datetime import datetime
7
  import gradio as gr
8
  import numpy as np
9
  import librosa
10
  import torch
 
 
 
11
  from fairseq import checkpoint_utils
12
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
13
  from vc_infer_pipeline import VC
14
- from config import is_half, device
15
-
 
 
16
  logging.getLogger("numba").setLevel(logging.WARNING)
17
 
18
  def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
19
- def vc_fn(input_audio, f0_up_key, f0_method, index_rate):
 
 
 
 
 
20
  try:
21
- # Check if input_audio is a tuple (Gradio input)
22
- if isinstance(input_audio, tuple):
 
 
 
23
  sampling_rate, audio = input_audio
24
  duration = audio.shape[0] / sampling_rate
25
  if duration > 10000000:
@@ -29,9 +41,6 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
29
  audio = librosa.to_mono(audio.transpose(1, 0))
30
  if sampling_rate != 16000:
31
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
32
- else: # Assume it's a file path
33
- audio, sr = librosa.load(input_audio, sr=16000, mono=True)
34
-
35
  times = [0, 0, 0]
36
  f0_up_key = int(f0_up_key)
37
  audio_opt = vc.pipeline(
@@ -51,11 +60,10 @@ def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
51
  f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
52
  )
53
  return "Success", (tgt_sr, audio_opt)
54
- except Exception as e:
55
  info = traceback.format_exc()
56
  print(info)
57
- return str(e), (None, None)
58
-
59
  return vc_fn
60
 
61
  def load_hubert():
@@ -78,13 +86,10 @@ if __name__ == '__main__':
78
  parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
79
  parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
80
  args, unknown = parser.parse_known_args()
81
-
82
  load_hubert()
83
  models = []
84
-
85
  with open("weights/model_info.json", "r", encoding="utf-8") as f:
86
  models_info = json.load(f)
87
-
88
  for name, info in models_info.items():
89
  if not info['enable']:
90
  continue
@@ -95,7 +100,6 @@ if __name__ == '__main__':
95
  cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
96
  tgt_sr = cpt["config"][-1]
97
  if_f0 = cpt.get("f0", 1)
98
-
99
  if if_f0 == 1:
100
  net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
101
  else:
@@ -103,15 +107,12 @@ if __name__ == '__main__':
103
  del net_g.enc_q
104
  print(net_g.load_state_dict(cpt["weight"], strict=False))
105
  net_g.eval().to(device)
106
-
107
  if is_half:
108
  net_g = net_g.half()
109
  else:
110
  net_g = net_g.float()
111
-
112
  vc = VC(tgt_sr, device, is_half)
113
  models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
114
-
115
  with gr.Blocks() as app:
116
  gr.Markdown(
117
  "# <center> RVC generator\n"
@@ -153,18 +154,4 @@ if __name__ == '__main__':
153
  vc_output1 = gr.Textbox(label="Output Message")
154
  vc_output2 = gr.Audio(label="Output Audio")
155
  vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
156
- app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)
157
-
158
- # Note: The following code is outside the `__main__` block, as it was causing indentation issues in the provided code.
159
- iface = gr.Interface(
160
- fn=create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy),
161
- inputs=[
162
- gr.inputs.Audio(source="microphone", type="numpy", sample_rate=16000),
163
- gr.inputs.Slider(minimum=-12, maximum=12, step=1, default=0),
164
- gr.inputs.Radio(["world", "dio"], label="F0 method"),
165
- gr.inputs.Slider(minimum=0, maximum=1, step=0.01, default=0.5)
166
- ],
167
- outputs="text",
168
- )
169
-
170
- iface.launch()
 
3
  import argparse
4
  import traceback
5
  import logging
 
6
  import gradio as gr
7
  import numpy as np
8
  import librosa
9
  import torch
10
+ import asyncio
11
+ import edge_tts
12
+ from datetime import datetime
13
  from fairseq import checkpoint_utils
14
  from infer_pack.models import SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono
15
  from vc_infer_pipeline import VC
16
+ from config import (
17
+ is_half,
18
+ device
19
+ )
20
  logging.getLogger("numba").setLevel(logging.WARNING)
21
 
22
  def create_vc_fn(tgt_sr, net_g, vc, if_f0, file_index, file_big_npy):
23
+ def vc_fn(
24
+ input_audio,
25
+ f0_up_key,
26
+ f0_method,
27
+ index_rate
28
+ ):
29
  try:
30
+ if args.files:
31
+ audio, sr = librosa.load(input_audio, sr=16000, mono=True)
32
+ else:
33
+ if input_audio is None:
34
+ return "You need to upload an audio", None
35
  sampling_rate, audio = input_audio
36
  duration = audio.shape[0] / sampling_rate
37
  if duration > 10000000:
 
41
  audio = librosa.to_mono(audio.transpose(1, 0))
42
  if sampling_rate != 16000:
43
  audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
 
 
 
44
  times = [0, 0, 0]
45
  f0_up_key = int(f0_up_key)
46
  audio_opt = vc.pipeline(
 
60
  f"[{datetime.now().strftime('%Y-%m-%d %H:%M')}]: npy: {times[0]}, f0: {times[1]}s, infer: {times[2]}s"
61
  )
62
  return "Success", (tgt_sr, audio_opt)
63
+ except:
64
  info = traceback.format_exc()
65
  print(info)
66
+ return info, (None, None)
 
67
  return vc_fn
68
 
69
  def load_hubert():
 
86
  parser.add_argument("--share", action="store_true", default=False, help="share gradio app")
87
  parser.add_argument("--files", action="store_true", default=False, help="load audio from path")
88
  args, unknown = parser.parse_known_args()
 
89
  load_hubert()
90
  models = []
 
91
  with open("weights/model_info.json", "r", encoding="utf-8") as f:
92
  models_info = json.load(f)
 
93
  for name, info in models_info.items():
94
  if not info['enable']:
95
  continue
 
100
  cpt = torch.load(f"weights/{name}/{name}.pth", map_location="cpu")
101
  tgt_sr = cpt["config"][-1]
102
  if_f0 = cpt.get("f0", 1)
 
103
  if if_f0 == 1:
104
  net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half)
105
  else:
 
107
  del net_g.enc_q
108
  print(net_g.load_state_dict(cpt["weight"], strict=False))
109
  net_g.eval().to(device)
 
110
  if is_half:
111
  net_g = net_g.half()
112
  else:
113
  net_g = net_g.float()
 
114
  vc = VC(tgt_sr, device, is_half)
115
  models.append((name, title, cover, create_vc_fn(tgt_sr, net_g, vc, if_f0, index, npy)))
 
116
  with gr.Blocks() as app:
117
  gr.Markdown(
118
  "# <center> RVC generator\n"
 
154
  vc_output1 = gr.Textbox(label="Output Message")
155
  vc_output2 = gr.Audio(label="Output Audio")
156
  vc_submit.click(vc_fn, [vc_input, vc_transpose, vc_f0method, vc_index_ratio], [vc_output1, vc_output2])
157
+ app.queue(concurrency_count=1, max_size=20, api_open=args.api).launch(share=args.share)