Hev832 commited on
Commit
69d13ea
·
verified ·
1 Parent(s): af2ee36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -206
app.py CHANGED
@@ -1,12 +1,31 @@
1
- from module import *
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from download_model import download_online_model
 
3
  main_dir = Path().resolve()
4
  print(main_dir)
5
 
6
  os.chdir(main_dir)
7
  models_dir = main_dir / "rvc_models"
8
  audio_separat_dir = main_dir / "audio_input"
9
-
10
  AUDIO_DIR = main_dir / 'audio_input'
11
 
12
 
@@ -16,35 +35,28 @@ def get_folders():
16
  return [folder.name for folder in models_dir.iterdir() if folder.is_dir()]
17
  return []
18
 
 
19
  # Function to refresh and return the list of folders
20
  def refresh_folders():
21
  return gr.Dropdown.update(choices=get_folders())
22
 
23
 
24
-
25
-
26
  # Function to get the list of audio files in the specified directory
27
  def get_audio_files():
28
  if not os.path.exists(AUDIO_DIR):
29
  os.makedirs(AUDIO_DIR)
30
- # List all supported audio file formats
31
  return [f for f in os.listdir(AUDIO_DIR) if f.lower().endswith(('.mp3', '.wav', '.flac', '.ogg', '.aac'))]
32
 
 
33
  # Function to return the full path of audio files for playback
34
  def load_audio_files():
35
  audio_files = get_audio_files()
36
  return [os.path.join(AUDIO_DIR, f) for f in audio_files]
37
 
38
- # Refresh function to update the list of files
39
  def refresh_audio_list():
40
  audio_files = load_audio_files()
41
- return gr.update(choices=audio_files)
42
-
43
- # Function to play selected audio file
44
- def play_audio(file_path):
45
- return file_path
46
-
47
-
48
 
49
 
50
  def download_audio(url):
@@ -61,88 +73,7 @@ def download_audio(url):
61
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
62
  info_dict = ydl.extract_info(url, download=True)
63
  file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
64
- sample_rate, audio_data = read(file_path)
65
- audio_array = np.asarray(audio_data, dtype=np.int16)
66
-
67
- return sample_rate, audio_array
68
-
69
-
70
-
71
- # Define a function to handle the entire separation process
72
- def separate_audio(input_audio, model_voc_inst, model_deecho, model_back_voc):
73
- output_dir = audio_separat_dir
74
- separator = Separator(output_dir=output_dir)
75
-
76
- # Define output files
77
- vocals = os.path.join(output_dir, 'Vocals.wav')
78
- instrumental = os.path.join(output_dir, 'Instrumental.wav')
79
- vocals_reverb = os.path.join(output_dir, 'Vocals (Reverb).wav')
80
- vocals_no_reverb = os.path.join(output_dir, 'Vocals (No Reverb).wav')
81
- lead_vocals = os.path.join(output_dir, 'Lead Vocals.wav')
82
- backing_vocals = os.path.join(output_dir, 'Backing Vocals.wav')
83
-
84
- # Splitting a track into Vocal and Instrumental
85
- separator.load_model(model_filename=model_voc_inst)
86
- voc_inst = separator.separate(input_audio)
87
- os.rename(os.path.join(output_dir, voc_inst[0]), instrumental) # Rename to “Instrumental.wav”
88
- os.rename(os.path.join(output_dir, voc_inst[1]), vocals) # Rename to “Vocals.wav”
89
-
90
- # Applying DeEcho-DeReverb to Vocals
91
- separator.load_model(model_filename=model_deecho)
92
- voc_no_reverb = separator.separate(vocals)
93
- os.rename(os.path.join(output_dir, voc_no_reverb[0]), vocals_no_reverb) # Rename to “Vocals (No Reverb).wav”
94
- os.rename(os.path.join(output_dir, voc_no_reverb[1]), vocals_reverb) # Rename to “Vocals (Reverb).wav”
95
-
96
- # Separating Back Vocals from Main Vocals
97
- separator.load_model(model_filename=model_back_voc)
98
- backing_voc = separator.separate(vocals_no_reverb)
99
- os.rename(os.path.join(output_dir, backing_voc[0]), backing_vocals) # Rename to “Backing Vocals.wav”
100
- os.rename(os.path.join(output_dir, backing_voc[1]), lead_vocals) # Rename to “Lead Vocals.wav”
101
-
102
- return "separation done..."
103
-
104
- # Main function to process audio (Inference)
105
- def process_audio(MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE,
106
- FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP,
107
- KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT, upload_audio=None):
108
-
109
- # If no sound path is given, use the uploaded file
110
- if not SOUND_PATH and upload_audio is not None:
111
- SOUND_PATH = os.path.join("uploaded_audio", upload_audio.name)
112
- with open(SOUND_PATH, "wb") as f:
113
- f.write(upload_audio.read())
114
-
115
- # Check if a model name is provided
116
- if not MODEL_NAME:
117
- return "Please provide a model name."
118
-
119
- # Run the inference
120
- os.system("chmod +x stftpitchshift")
121
- inferred_audio = infer_audio(
122
- MODEL_NAME,
123
- SOUND_PATH,
124
- F0_CHANGE,
125
- F0_METHOD,
126
- MIN_PITCH,
127
- MAX_PITCH,
128
- CREPE_HOP_LENGTH,
129
- INDEX_RATE,
130
- FILTER_RADIUS,
131
- RMS_MIX_RATE,
132
- PROTECT,
133
- SPLIT_INFER,
134
- MIN_SILENCE,
135
- SILENCE_THRESHOLD,
136
- SEEK_STEP,
137
- KEEP_SILENCE,
138
- FORMANT_SHIFT,
139
- QUEFRENCY,
140
- TIMBRE,
141
- F0_AUTOTUNE,
142
- OUTPUT_FORMAT
143
- )
144
-
145
- return inferred_audio
146
 
147
 
148
  async def text_to_speech_edge(text, language_code):
@@ -154,152 +85,77 @@ async def text_to_speech_edge(text, language_code):
154
  return tmp_path
155
 
156
 
157
-
158
-
159
-
160
  if __name__ == '__main__':
161
- parser = ArgumentParser(description='Generate a AI song in the song_output/id directory.', add_help=True)
162
- parser.add_argument("--share", action="store_true", dest="share_enabled", default=False, help="Enable sharing")
163
- parser.add_argument("--listen", action="store_true", default=False, help="Make the UI reachable from your local network.")
164
- parser.add_argument('--listen-host', type=str, help='The hostname that the server will use.')
165
- parser.add_argument('--listen-port', type=int, help='The listening port that the server will use.')
166
  args = parser.parse_args()
167
 
168
-
169
-
170
-
171
- # Gradio Blocks Interface with Tabs
172
  with gr.Blocks(title="Hex RVC", theme=gr.themes.Base(primary_hue="red", secondary_hue="pink")) as app:
173
  gr.Markdown("# Hex RVC")
174
- gr.Markdown(" join [AIHub](https://discord.gg/aihub) to get the rvc model!")
175
 
176
  with gr.Tab("Inference"):
177
  with gr.Row():
178
  MODEL_NAME = gr.Dropdown(
179
  label="Select a Model",
180
  choices=get_folders(),
181
- interactive=True,
182
- elem_id="model_folder"
183
  )
184
  SOUND_PATH = gr.Dropdown(
185
  choices=load_audio_files(),
186
  label="Select an audio file",
187
- interactive=True,
188
- value=None,
189
  )
190
- # Button to refresh the list of folders
191
-
192
- with gr.Row():
193
- # = gr.Textbox(label="Model Name", placeholder="Enter model name")
194
- # SOUND_PATH = gr.Textbox(label="Audio Path (Optional)", placeholder="Leave blank to upload audio")
195
- upload_audio = gr.Audio(label="Upload Audio", type='filepath', visible=False)
196
-
197
-
198
 
199
-
200
-
201
-
202
-
203
-
204
- with gr.Accordion("Conversion Settings"):
205
- with gr.Row():
206
- F0_CHANGE = gr.Number(label="Pitch Change (semitones)", value=0)
207
- F0_METHOD = gr.Dropdown(choices=["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe_legacy", "fcpe", "fcpe_legacy", "hybrid[rmvpe+fcpe]"], label="F0 Method", value="fcpe")
208
- with gr.Row():
209
- MIN_PITCH = gr.Textbox(label="Min Pitch", value="50")
210
- MAX_PITCH = gr.Textbox(label="Max Pitch", value="1100")
211
- CREPE_HOP_LENGTH = gr.Number(label="Crepe Hop Length", value=120)
212
- INDEX_RATE = gr.Slider(label="Index Rate", minimum=0, maximum=1, value=0.75)
213
- FILTER_RADIUS = gr.Number(label="Filter Radius", value=3)
214
- RMS_MIX_RATE = gr.Slider(label="RMS Mix Rate", minimum=0, maximum=1, value=0.25)
215
- PROTECT = gr.Slider(label="Protect", minimum=0, maximum=1, value=0.33)
216
-
217
- with gr.Accordion("Hex TTS", open=False):
218
  input_text = gr.Textbox(lines=5, label="Input Text")
219
- #output_text = gr.Textbox(label="Output Text")
220
- #output_audio = gr.Audio(type="filepath", label="Exported Audio")
221
  language = gr.Dropdown(choices=list(language_dict.keys()), label="Choose the Voice Model")
222
  tts_convert = gr.Button("Convert")
223
- tts_convert.click(fn=text_to_speech_edge, inputs=[input_text, language], outputs=[upload_audio])
224
- with gr.Accordion("Advanced Settings", open=False):
225
- SPLIT_INFER = gr.Checkbox(label="Enable Split Inference", value=False)
226
- MIN_SILENCE = gr.Number(label="Min Silence (ms)", value=500)
227
- SILENCE_THRESHOLD = gr.Number(label="Silence Threshold (dBFS)", value=-50)
228
- SEEK_STEP = gr.Slider(label="Seek Step (ms)", minimum=1, maximum=10, value=1)
229
- KEEP_SILENCE = gr.Number(label="Keep Silence (ms)", value=200)
230
- FORMANT_SHIFT = gr.Checkbox(label="Enable Formant Shift", value=False)
231
- QUEFRENCY = gr.Number(label="Quefrency", value=0)
232
- TIMBRE = gr.Number(label="Timbre", value=1)
233
- F0_AUTOTUNE = gr.Checkbox(label="Enable F0 Autotune", value=False)
234
- OUTPUT_FORMAT = gr.Dropdown(choices=["wav", "flac", "mp3"], label="Output Format", value="wav")
235
 
236
  output_audio = gr.Audio(label="Generated Audio", type='filepath')
237
-
238
  with gr.Row():
239
  refresh_btn = gr.Button("Refresh")
240
  run_button = gr.Button("Convert")
241
-
242
- #ref_btn.click(update_models_list, None, outputs=MODEL_NAME)
243
  refresh_btn.click(
244
- lambda: (refresh_audio_list(), refresh_folders()),
245
  outputs=[SOUND_PATH, MODEL_NAME]
246
  )
247
- run_button.click(
248
- process_audio,
249
- inputs=[MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE,
250
- FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP,
251
- KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT, upload_audio],
252
- outputs=output_audio
253
- )
254
 
255
  with gr.Tab("Download RVC Model"):
256
- with gr.Row():
257
- url = gr.Textbox(label="Your model URL")
258
- dirname = gr.Textbox(label="Your Model name")
259
- outout_pah = gr.Textbox(label="output download", interactive=False)
260
- button_model = gr.Button("Download model")
261
-
262
- button_model.click(fn=download_online_model, inputs=[url, dirname], outputs=[outout_pah])
263
- with gr.Tab("Audio Separation"):
264
- with gr.Row():
265
- input_audio = gr.Audio(type="filepath", label="Upload Audio File")
266
-
267
- with gr.Row():
268
- with gr.Accordion("Separation by Link", open = False):
269
- with gr.Row():
270
- roformer_link = gr.Textbox(
271
- label = "Link",
272
- placeholder = "Paste the link here",
273
- interactive = True
274
- )
275
- with gr.Row():
276
- gr.Markdown("You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
277
- with gr.Row():
278
- roformer_download_button = gr.Button(
279
- "Download!",
280
- variant = "primary"
281
- )
282
 
283
- roformer_download_button.click(download_audio, [roformer_link], [input_audio])
284
-
285
- with gr.Row():
286
- model_voc_inst = gr.Textbox(value='model_bs_roformer_ep_317_sdr_12.9755.ckpt', label="Vocal & Instrumental Model", visible=False)
287
- model_deecho = gr.Textbox(value='UVR-DeEcho-DeReverb.pth', label="DeEcho-DeReverb Model", visible=False)
288
- model_back_voc = gr.Textbox(value='mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt', label="Backing Vocals Model", visible=False)
289
 
290
  separate_button = gr.Button("Separate Audio")
291
-
292
- with gr.Row():
293
- outout_paht = gr.Textbox(label="output download", interactive=False)
294
-
295
- separate_button.click(
296
- separate_audio,
297
- inputs=[input_audio, model_voc_inst, model_deecho, model_back_voc],
298
- outputs=[outout_paht]
299
- )
300
 
 
301
 
302
- # Launch the Gradio app
303
  app.launch(
304
  share=args.share_enabled,
305
  server_name=None if not args.listen else (args.listen_host or '0.0.0.0'),
 
1
+ import os
2
+ import re
3
+ import random
4
+ from scipy.io.wavfile import write
5
+ from scipy.io.wavfile import read
6
+ import numpy as np
7
+ import gradio as gr
8
+ import yt_dlp
9
+ import subprocess
10
+ from pydub import AudioSegment
11
+ from audio_separator.separator import Separator
12
+ from lib.infer import infer_audio
13
+ import edge_tts
14
+ import tempfile
15
+ import anyio
16
+ from pathlib import Path
17
+ from lib.language_tts import language_dict
18
+ import shutil
19
+ import time
20
+ from argparse import ArgumentParser
21
  from download_model import download_online_model
22
+
23
  main_dir = Path().resolve()
24
  print(main_dir)
25
 
26
  os.chdir(main_dir)
27
  models_dir = main_dir / "rvc_models"
28
  audio_separat_dir = main_dir / "audio_input"
 
29
  AUDIO_DIR = main_dir / 'audio_input'
30
 
31
 
 
35
  return [folder.name for folder in models_dir.iterdir() if folder.is_dir()]
36
  return []
37
 
38
+
39
  # Function to refresh and return the list of folders
40
  def refresh_folders():
41
  return gr.Dropdown.update(choices=get_folders())
42
 
43
 
 
 
44
  # Function to get the list of audio files in the specified directory
45
  def get_audio_files():
46
  if not os.path.exists(AUDIO_DIR):
47
  os.makedirs(AUDIO_DIR)
 
48
  return [f for f in os.listdir(AUDIO_DIR) if f.lower().endswith(('.mp3', '.wav', '.flac', '.ogg', '.aac'))]
49
 
50
+
51
  # Function to return the full path of audio files for playback
52
  def load_audio_files():
53
  audio_files = get_audio_files()
54
  return [os.path.join(AUDIO_DIR, f) for f in audio_files]
55
 
56
+
57
  def refresh_audio_list():
58
  audio_files = load_audio_files()
59
+ return gr.Dropdown.update(choices=audio_files)
 
 
 
 
 
 
60
 
61
 
62
  def download_audio(url):
 
73
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
74
  info_dict = ydl.extract_info(url, download=True)
75
  file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
76
+ return file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
 
79
  async def text_to_speech_edge(text, language_code):
 
85
  return tmp_path
86
 
87
 
 
 
 
88
  if __name__ == '__main__':
89
+ parser = ArgumentParser()
90
+ parser.add_argument("--share", action="store_true", dest="share_enabled", default=False)
91
+ parser.add_argument("--listen", action="store_true", default=False)
92
+ parser.add_argument('--listen-host', type=str)
93
+ parser.add_argument('--listen-port', type=int)
94
  args = parser.parse_args()
95
 
96
+ # Gradio Interface
 
 
 
97
  with gr.Blocks(title="Hex RVC", theme=gr.themes.Base(primary_hue="red", secondary_hue="pink")) as app:
98
  gr.Markdown("# Hex RVC")
99
+ gr.Markdown("Join [AIHub](https://discord.gg/aihub) to get the RVC model!")
100
 
101
  with gr.Tab("Inference"):
102
  with gr.Row():
103
  MODEL_NAME = gr.Dropdown(
104
  label="Select a Model",
105
  choices=get_folders(),
106
+ interactive=True
 
107
  )
108
  SOUND_PATH = gr.Dropdown(
109
  choices=load_audio_files(),
110
  label="Select an audio file",
111
+ interactive=True
 
112
  )
113
+ upload_audio = gr.Audio(label="Upload Audio", type='filepath')
 
 
 
 
 
 
 
114
 
115
+ with gr.Accordion("Hex TTS"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  input_text = gr.Textbox(lines=5, label="Input Text")
 
 
117
  language = gr.Dropdown(choices=list(language_dict.keys()), label="Choose the Voice Model")
118
  tts_convert = gr.Button("Convert")
119
+ tts_output = gr.Audio(label="Generated TTS Audio", type='filepath')
120
+
121
+ tts_convert.click(
122
+ fn=text_to_speech_edge,
123
+ inputs=[input_text, language],
124
+ outputs=tts_output
125
+ )
 
 
 
 
 
126
 
127
  output_audio = gr.Audio(label="Generated Audio", type='filepath')
 
128
  with gr.Row():
129
  refresh_btn = gr.Button("Refresh")
130
  run_button = gr.Button("Convert")
131
+
 
132
  refresh_btn.click(
133
+ lambda: (refresh_audio_list(), refresh_folders()),
134
  outputs=[SOUND_PATH, MODEL_NAME]
135
  )
 
 
 
 
 
 
 
136
 
137
  with gr.Tab("Download RVC Model"):
138
+ url = gr.Textbox(label="Your Model URL")
139
+ dirname = gr.Textbox(label="Your Model Name")
140
+ download_button = gr.Button("Download Model")
141
+ download_output = gr.Textbox(label="Download Status")
142
+
143
+ download_button.click(
144
+ download_online_model,
145
+ inputs=[url, dirname],
146
+ outputs=download_output
147
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ with gr.Tab("Audio Separation"):
150
+ input_audio = gr.Audio(type="filepath", label="Upload Audio")
151
+ roformer_link = gr.Textbox(label="Audio Link")
152
+ roformer_download_button = gr.Button("Download")
 
 
153
 
154
  separate_button = gr.Button("Separate Audio")
155
+ separation_output = gr.Textbox(label="Separation Output Path")
 
 
 
 
 
 
 
 
156
 
157
+ roformer_download_button.click(download_audio, inputs=[roformer_link], outputs=[input_audio])
158
 
 
159
  app.launch(
160
  share=args.share_enabled,
161
  server_name=None if not args.listen else (args.listen_host or '0.0.0.0'),