NeoPy commited on
Commit
fa2fd5b
·
verified ·
1 Parent(s): 8713074

Update Test.py

Browse files
Files changed (1) hide show
  1. Test.py +461 -20
Test.py CHANGED
@@ -1,32 +1,473 @@
 
 
 
 
1
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from audio_separator.separator import Separator
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- input = "/content/input.mp3"
5
- output = "/content/output"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
 
 
 
 
 
 
 
 
 
8
 
9
- def separation_uvr(input, output):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  separator = Separator(output_dir=output)
11
- vocals = os.path.join(output, 'Vocals.wav')
12
- instrumental = os.path.join(output, 'Instrumental.wav')
13
- vocals_reverb = os.path.join(output, 'Vocals (Reverb).wav')
14
- vocals_no_reverb = os.path.join(output, 'Vocals (No Reverb).wav')
15
-
16
- lead_vocals = os.path.join(output, 'Lead Vocals.wav')
17
- backing_vocals = os.path.join(output, 'Backing Vocals.wav')
18
-
 
19
  separator.load_model(model_filename='model_bs_roformer_ep_317_sdr_12.9755.ckpt')
20
- voc_inst = separator.separate(input)
21
- os.rename(os.path.join(output, voc_inst[0]), instrumental)
22
- os.rename(os.path.join(output, voc_inst[1]), vocals)
23
 
24
  separator.load_model(model_filename='UVR-DeEcho-DeReverb.pth')
25
- voc_no_reverb = separator.separate(vocals)
26
- os.rename(os.path.join(output, voc_no_reverb[0]), vocals_no_reverb)
27
- os.rename(os.path.join(output, voc_no_reverb[1]), vocals_reverb)
28
 
29
  separator.load_model(model_filename='mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt')
30
- backing_voc = separator.separate(vocals_no_reverb)
31
- os.rename(os.path.join(output, backing_voc[0]), backing_vocals)
32
- os.rename(os.path.join(output, backing_voc[1]), lead_vocals)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import gc
3
+ import hashlib
4
+ import json
5
  import os
6
+ import shlex
7
+ import subprocess
8
+ from contextlib import suppress
9
+ from urllib.parse import urlparse, parse_qs
10
+
11
+ import gradio as gr
12
+ import librosa
13
+ import numpy as np
14
+ import soundfile as sf
15
+ import sox
16
+ import yt_dlp
17
+ from pedalboard import Pedalboard, Reverb, Compressor, HighpassFilter
18
+ from pedalboard.io import AudioFile
19
+ from pydub import AudioSegment
20
  from audio_separator.separator import Separator
21
+ from rvc import Config, load_hubert, get_vc, rvc_infer
22
+
23
+ # Base directories
24
+ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
25
+ mdxnet_models_dir = os.path.join(BASE_DIR, 'mdxnet_models')
26
+ rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')
27
+ output_dir = os.path.join(BASE_DIR, 'song_output')
28
+
29
+
30
+ def get_youtube_video_id(url, ignore_playlist=True):
31
+ """
32
+ Extract the YouTube video ID from various URL formats.
33
+
34
+ Examples:
35
+ http://youtu.be/SA2iWivDJiE
36
+ http://www.youtube.com/watch?v=_oPAwA_Udwc&feature=feedu
37
+ http://www.youtube.com/embed/SA2iWivDJiE
38
+ http://www.youtube.com/v/SA2iWivDJiE?version=3&hl=en_US
39
+ """
40
+ parsed_url = urlparse(url)
41
+ hostname = parsed_url.hostname or ''
42
+ path = parsed_url.path
43
+
44
+ if hostname.lower() == 'youtu.be':
45
+ return path.lstrip('/')
46
+
47
+ if hostname.lower() in {'www.youtube.com', 'youtube.com', 'music.youtube.com'}:
48
+ if not ignore_playlist:
49
+ with suppress(KeyError):
50
+ return parse_qs(parsed_url.query)['list'][0]
51
+ if parsed_url.path == '/watch':
52
+ return parse_qs(parsed_url.query).get('v', [None])[0]
53
+ if parsed_url.path.startswith('/watch/'):
54
+ return parsed_url.path.split('/')[1]
55
+ if parsed_url.path.startswith('/embed/'):
56
+ return parsed_url.path.split('/')[2]
57
+ if parsed_url.path.startswith('/v/'):
58
+ return parsed_url.path.split('/')[2]
59
+
60
+ return None
61
+
62
 
63
+ def yt_download(link):
64
+ """
65
+ Download the audio from a YouTube link as an mp3 file.
66
+ """
67
+ ydl_opts = {
68
+ 'format': 'bestaudio',
69
+ 'outtmpl': '%(title)s',
70
+ 'nocheckcertificate': True,
71
+ 'ignoreerrors': True,
72
+ 'no_warnings': True,
73
+ 'quiet': True,
74
+ 'extractaudio': True,
75
+ 'postprocessors': [{
76
+ 'key': 'FFmpegExtractAudio',
77
+ 'preferredcodec': 'mp3'
78
+ }],
79
+ }
80
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
81
+ result = ydl.extract_info(link, download=True)
82
+ download_path = ydl.prepare_filename(result, outtmpl='%(title)s.mp3')
83
+ return download_path
84
 
85
 
86
+ def display_progress(message, percent, is_webui, progress=None):
87
+ """
88
+ Display progress either via the provided progress callback or by printing.
89
+ """
90
+ if is_webui and progress is not None:
91
+ progress(percent, desc=message)
92
+ else:
93
+ print(message)
94
 
95
+
96
+ def raise_exception(error_msg, is_webui):
97
+ """
98
+ Raise an exception. If running in a web UI, use gr.Error.
99
+ """
100
+ if is_webui:
101
+ raise gr.Error(error_msg)
102
+ else:
103
+ raise Exception(error_msg)
104
+
105
+
106
+ def get_rvc_model(voice_model, is_webui):
107
+ """
108
+ Search the specified RVC model directory for the model (.pth) and index (.index) files.
109
+ """
110
+ rvc_model_filename, rvc_index_filename = None, None
111
+ model_dir = os.path.join(rvc_models_dir, voice_model)
112
+ if not os.path.exists(model_dir):
113
+ raise_exception(f'Model directory {model_dir} does not exist.', is_webui)
114
+ for file in os.listdir(model_dir):
115
+ ext = os.path.splitext(file)[1]
116
+ if ext == '.pth':
117
+ rvc_model_filename = file
118
+ if ext == '.index':
119
+ rvc_index_filename = file
120
+
121
+ if rvc_model_filename is None:
122
+ error_msg = f'No model file exists in {model_dir}.'
123
+ raise_exception(error_msg, is_webui)
124
+
125
+ model_path = os.path.join(model_dir, rvc_model_filename)
126
+ index_path = os.path.join(model_dir, rvc_index_filename) if rvc_index_filename else ''
127
+ return model_path, index_path
128
+
129
+
130
+ def separation_uvr(filename, output):
131
+ """
132
+ Run the separation steps using different pre-trained models.
133
+ Returns a tuple of four file paths:
134
+ - vocals_no_reverb: The vocals after initial de-echo/de-reverb (used as intermediate vocals)
135
+ - instrumental_path: The separated instrumental audio
136
+ - main_vocals_dereverb: The lead vocals after final de-reverb processing
137
+ - backup_vocals: The backup vocals extracted in the final stage
138
+ """
139
  separator = Separator(output_dir=output)
140
+ base_name = os.path.splitext(os.path.basename(filename))[0]
141
+
142
+ instrumental_path = os.path.join(output, f'{base_name}_Instrumental.wav')
143
+ initial_vocals = os.path.join(output, f'{base_name}_Vocals.wav')
144
+ vocals_no_reverb = os.path.join(output, f'{base_name}_Vocals (No Reverb).wav')
145
+ vocals_reverb = os.path.join(output, f'{base_name}_Vocals (Reverb).wav')
146
+ main_vocals_dereverb = os.path.join(output, f'{base_name}_Vocals_Main_DeReverb.wav')
147
+ backup_vocals = os.path.join(output, f'{base_name}_Vocals_Backup.wav')
148
+
149
  separator.load_model(model_filename='model_bs_roformer_ep_317_sdr_12.9755.ckpt')
150
+ voc_inst = separator.separate(filename)
151
+ os.rename(os.path.join(output, voc_inst[0]), instrumental_path)
152
+ os.rename(os.path.join(output, voc_inst[1]), initial_vocals)
153
 
154
  separator.load_model(model_filename='UVR-DeEcho-DeReverb.pth')
155
+ voc_no_reverb = separator.separate(initial_vocals)
156
+ os.rename(os.path.join(output, voc_no_reverb[0]), vocals_no_reverb)
157
+ os.rename(os.path.join(output, voc_no_reverb[1]), vocals_reverb)
158
 
159
  separator.load_model(model_filename='mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt')
160
+ voc_split = separator.separate(vocals_no_reverb)
161
+ os.rename(os.path.join(output, voc_split[0]), backup_vocals)
162
+ os.rename(os.path.join(output, voc_split[1]), main_vocals_dereverb)
163
+
164
+ if os.path.exists(vocals_reverb):
165
+ os.remove(vocals_reverb)
166
+
167
+ return vocals_no_reverb, instrumental_path, main_vocals_dereverb, backup_vocals
168
+
169
+
170
+ def get_audio_paths(song_dir):
171
+ """
172
+ Search the given directory for expected audio files.
173
+ Returns:
174
+ orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
175
+ """
176
+ orig_song_path = None
177
+ instrumentals_path = None
178
+ main_vocals_dereverb_path = None
179
+ backup_vocals_path = None
180
+
181
+ for file in os.listdir(song_dir):
182
+ if file.endswith('_Instrumental.wav'):
183
+ instrumentals_path = os.path.join(song_dir, file)
184
+ orig_song_path = instrumentals_path.replace('_Instrumental', '')
185
+ elif file.endswith('_Vocals_Main_DeReverb.wav'):
186
+ main_vocals_dereverb_path = os.path.join(song_dir, file)
187
+ elif file.endswith('_Vocals_Backup.wav'):
188
+ backup_vocals_path = os.path.join(song_dir, file)
189
+
190
+ return orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path
191
+
192
+
193
+ def convert_to_stereo(audio_path):
194
+ """
195
+ Convert the given audio file to stereo (2 channels) if it is mono.
196
+ """
197
+ wave, sr = librosa.load(audio_path, mono=False, sr=44100)
198
+ if wave.ndim == 1:
199
+ stereo_path = f'{os.path.splitext(audio_path)[0]}_stereo.wav'
200
+ command = shlex.split(f'ffmpeg -y -loglevel error -i "{audio_path}" -ac 2 -f wav "{stereo_path}"')
201
+ subprocess.run(command, check=True)
202
+ return stereo_path
203
+ return audio_path
204
+
205
+
206
+ def pitch_shift(audio_path, pitch_change):
207
+ """
208
+ Shift the pitch of the audio by the specified amount.
209
+ """
210
+ output_path = f'{os.path.splitext(audio_path)[0]}_p{pitch_change}.wav'
211
+ if not os.path.exists(output_path):
212
+ y, sr = sf.read(audio_path)
213
+ tfm = sox.Transformer()
214
+ tfm.pitch(pitch_change)
215
+ y_shifted = tfm.build_array(input_array=y, sample_rate_in=sr)
216
+ sf.write(output_path, y_shifted, sr)
217
+ return output_path
218
+
219
+
220
+ def get_hash(filepath):
221
+ """
222
+ Calculate a short BLAKE2b hash for the given file.
223
+ """
224
+ with open(filepath, 'rb') as f:
225
+ file_hash = hashlib.blake2b()
226
+ while chunk := f.read(8192):
227
+ file_hash.update(chunk)
228
+ return file_hash.hexdigest()[:11]
229
+
230
+
231
+ def preprocess_song(song_input, song_id, is_webui, input_type, progress):
232
+ """
233
+ Preprocess the input song:
234
+ - Download if YouTube URL.
235
+ - Convert to stereo.
236
+ - Separate vocals and instrumentals.
237
+ Returns a tuple with six values matching the expected unpacking in the pipeline.
238
+ """
239
+ if input_type == 'yt':
240
+ display_progress('[~] Downloading song...', 0, is_webui, progress)
241
+ song_link = song_input.split('&')[0]
242
+ orig_song_path = yt_download(song_link)
243
+ elif input_type == 'local':
244
+ orig_song_path = song_input
245
+ else:
246
+ orig_song_path = None
247
+
248
+ song_output_dir = os.path.join(output_dir, song_id)
249
+ if not os.path.exists(song_output_dir):
250
+ os.makedirs(song_output_dir)
251
+
252
+ orig_song_path = convert_to_stereo(orig_song_path)
253
+
254
+ display_progress('[~] Separating Vocals from Instrumental...', 0.1, is_webui, progress)
255
+ vocals_no_reverb, instrumental_path, main_vocals_dereverb, backup_vocals = separation_uvr(orig_song_path, song_output_dir)
256
+ return orig_song_path, vocals_no_reverb, instrumental_path, main_vocals_dereverb, backup_vocals, main_vocals_dereverb
257
+
258
+
259
+ def voice_change(voice_model, vocals_path, output_path, pitch_change, f0_method,
260
+ index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui):
261
+ """
262
+ Convert the input vocals using the specified RVC model.
263
+ """
264
+ rvc_model_path, rvc_index_path = get_rvc_model(voice_model, is_webui)
265
+ device = 'cuda:0'
266
+ config = Config(device, True)
267
+ hubert_model = load_hubert(embedder_model="contentvec", embedder_model_custom=None)
268
+ cpt, version, net_g, tgt_sr, vc = get_vc(device, config.is_half, config, rvc_model_path)
269
+
270
+ rvc_infer(
271
+ rvc_index_path, index_rate, vocals_path, output_path, pitch_change, f0_method,
272
+ cpt, version, net_g, filter_radius, tgt_sr, rms_mix_rate, protect,
273
+ crepe_hop_length, vc, hubert_model
274
+ )
275
+ del hubert_model, cpt
276
+ gc.collect()
277
+
278
+
279
+ def add_audio_effects(audio_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping):
280
+ """
281
+ Apply a chain of audio effects (highpass, compression, reverb) to the input audio.
282
+ """
283
+ output_path = f'{os.path.splitext(audio_path)[0]}_mixed.wav'
284
+ board = Pedalboard([
285
+ HighpassFilter(),
286
+ Compressor(ratio=4, threshold_db=-15),
287
+ Reverb(room_size=reverb_rm_size, dry_level=reverb_dry, wet_level=reverb_wet, damping=reverb_damping)
288
+ ])
289
+
290
+ with AudioFile(audio_path) as f:
291
+ with AudioFile(output_path, 'w', f.samplerate, f.num_channels) as o:
292
+ while f.tell() < f.frames:
293
+ chunk = f.read(int(f.samplerate))
294
+ effected = board(chunk, f.samplerate, reset=False)
295
+ o.write(effected)
296
+ return output_path
297
+
298
+
299
+ def combine_audio(audio_paths, output_path, main_gain, backup_gain, inst_gain, output_format):
300
+ """
301
+ Combine main vocals, backup vocals, and instrumental audio into a final mix.
302
+ """
303
+ main_vocal_audio = AudioSegment.from_wav(audio_paths[0]) - 4 + main_gain
304
+ backup_vocal_audio = AudioSegment.from_wav(audio_paths[1]) - 6 + backup_gain
305
+ instrumental_audio = AudioSegment.from_wav(audio_paths[2]) - 7 + inst_gain
306
+ final_audio = main_vocal_audio.overlay(backup_vocal_audio).overlay(instrumental_audio)
307
+ final_audio.export(output_path, format=output_format)
308
+
309
+
310
+ def song_cover_pipeline(song_input, voice_model, pitch_change, keep_files,
311
+ is_webui=0, main_gain=0, backup_gain=0, inst_gain=0, index_rate=0.5, filter_radius=3,
312
+ rms_mix_rate=0.25, f0_method='rmvpe', crepe_hop_length=128, protect=0.33, pitch_change_all=0,
313
+ reverb_rm_size=0.15, reverb_wet=0.2, reverb_dry=0.8, reverb_damping=0.7, output_format='mp3',
314
+ progress=gr.Progress()):
315
+ """
316
+ Main pipeline that orchestrates the AI cover song generation.
317
+ """
318
+ try:
319
+ if not song_input or not voice_model:
320
+ raise_exception('Ensure that the song input field and voice model field is filled.', is_webui)
321
+
322
+ display_progress('[~] Starting AI Cover Generation Pipeline...', 0, is_webui, progress)
323
+
324
+ if urlparse(song_input).scheme == 'https':
325
+ input_type = 'yt'
326
+ song_id = get_youtube_video_id(song_input)
327
+ if song_id is None:
328
+ raise_exception('Invalid YouTube url.', is_webui)
329
+ else:
330
+ input_type = 'local'
331
+ song_input = song_input.strip('\"')
332
+ if os.path.exists(song_input):
333
+ song_id = get_hash(song_input)
334
+ else:
335
+ raise_exception(f'{song_input} does not exist.', is_webui)
336
+
337
+ song_dir = os.path.join(output_dir, song_id)
338
+
339
+ if not os.path.exists(song_dir):
340
+ os.makedirs(song_dir)
341
+ (orig_song_path, vocals_path, instrumentals_path,
342
+ main_vocals_path, backup_vocals_path, main_vocals_dereverb_path) = preprocess_song(
343
+ song_input, song_id, is_webui, input_type, progress
344
+ )
345
+ else:
346
+ vocals_path, main_vocals_path = None, None
347
+ paths = get_audio_paths(song_dir)
348
+ if any(path is None for path in paths) or keep_files:
349
+ (orig_song_path, vocals_path, instrumentals_path,
350
+ main_vocals_path, backup_vocals_path, main_vocals_dereverb_path) = preprocess_song(
351
+ song_input, song_id, is_webui, input_type, progress
352
+ )
353
+ else:
354
+ orig_song_path, instrumentals_path, main_vocals_dereverb_path, backup_vocals_path = paths
355
+ main_vocals_path = main_vocals_dereverb_path
356
+
357
+ pitch_change += pitch_change_all
358
+
359
+ base_song_name = os.path.splitext(os.path.basename(orig_song_path))[0]
360
+ algo_suffix = f"_{crepe_hop_length}" if f0_method == "mangio-crepe" else ""
361
+ ai_vocals_path = os.path.join(
362
+ song_dir,
363
+ f'{base_song_name}_lead_{voice_model}_p{pitch_change}_i{index_rate}_fr{filter_radius}_'
364
+ f'rms{rms_mix_rate}_pro{protect}_{f0_method}{algo_suffix}.wav'
365
+ )
366
+ ai_backing_path = os.path.join(
367
+ song_dir,
368
+ f'{base_song_name}_backing_{voice_model}_p{pitch_change}_i{index_rate}_fr{filter_radius}_'
369
+ f'rms{rms_mix_rate}_pro{protect}_{f0_method}{algo_suffix}.wav'
370
+ )
371
+ ai_cover_path = os.path.join(song_dir, f'{base_song_name} ({voice_model} Ver).{output_format}')
372
+ ai_cover_backing_path = os.path.join(song_dir, f'{base_song_name} ({voice_model} Ver With Backing).{output_format}')
373
+
374
+ if not os.path.exists(ai_vocals_path):
375
+ display_progress('[~] Converting lead voice using RVC...', 0.5, is_webui, progress)
376
+ voice_change(voice_model, main_vocals_dereverb_path, ai_vocals_path, pitch_change,
377
+ f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui)
378
+
379
+ display_progress('[~] Converting backing voice using RVC...', 0.65, is_webui, progress)
380
+ voice_change(voice_model, backup_vocals_path, ai_backing_path, pitch_change,
381
+ f0_method, index_rate, filter_radius, rms_mix_rate, protect, crepe_hop_length, is_webui)
382
+
383
+ display_progress('[~] Applying audio effects to Vocals...', 0.8, is_webui, progress)
384
+ ai_vocals_mixed_path = add_audio_effects(ai_vocals_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
385
+ ai_backing_mixed_path = add_audio_effects(ai_backing_path, reverb_rm_size, reverb_wet, reverb_dry, reverb_damping)
386
+
387
+ if pitch_change_all != 0:
388
+ display_progress('[~] Applying overall pitch change', 0.85, is_webui, progress)
389
+ instrumentals_path = pitch_shift(instrumentals_path, pitch_change_all)
390
+ backup_vocals_path = pitch_shift(backup_vocals_path, pitch_change_all)
391
+
392
+ display_progress('[~] Combining AI Vocals and Instrumentals...', 0.9, is_webui, progress)
393
+ combine_audio([ai_vocals_mixed_path, backup_vocals_path, instrumentals_path],
394
+ ai_cover_path, main_gain, backup_gain, inst_gain, output_format)
395
+ combine_audio([ai_vocals_mixed_path, ai_backing_mixed_path, instrumentals_path],
396
+ ai_cover_backing_path, main_gain, backup_gain, inst_gain, output_format)
397
+
398
+ if not keep_files:
399
+ display_progress('[~] Removing intermediate audio files...', 0.95, is_webui, progress)
400
+ intermediate_files = [vocals_path, main_vocals_path, ai_vocals_mixed_path, ai_backing_mixed_path]
401
+ if pitch_change_all != 0:
402
+ intermediate_files += [instrumentals_path, backup_vocals_path]
403
+ for file in intermediate_files:
404
+ if file and os.path.exists(file):
405
+ os.remove(file)
406
+
407
+ return ai_cover_path, ai_cover_backing_path
408
+
409
+ except Exception as e:
410
+ raise_exception(str(e), is_webui)
411
+
412
+
413
+ if __name__ == '__main__':
414
+ parser = argparse.ArgumentParser(
415
+ description='AICoverGen: Mod.',
416
+ add_help=True
417
+ )
418
+ parser.add_argument('-i', '--song-input', type=str, required=True,
419
+ help='Link to a YouTube video or the filepath to a local mp3/wav file to create an AI cover of')
420
+ parser.add_argument('-dir', '--rvc-dirname', type=str, required=True,
421
+ help='Name of the folder in the rvc_models directory containing the RVC model file and optional index file to use')
422
+ parser.add_argument('-p', '--pitch-change', type=int, required=True,
423
+ help='Change the pitch of AI Vocals only. Generally, use 1 for male to female and -1 for vice-versa. (Octaves)')
424
+ parser.add_argument('-k', '--keep-files', action=argparse.BooleanOptionalAction,
425
+ help='Whether to keep all intermediate audio files generated in the song_output/id directory, e.g. Isolated Vocals/Instrumentals')
426
+ parser.add_argument('-ir', '--index-rate', type=float, default=0.5,
427
+ help='A decimal number e.g. 0.5, used to reduce/resolve the timbre leakage problem. If set to 1, more biased towards the timbre quality of the training dataset')
428
+ parser.add_argument('-fr', '--filter-radius', type=int, default=3,
429
+ help='A number between 0 and 7. If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness.')
430
+ parser.add_argument('-rms', '--rms-mix-rate', type=float, default=0.25,
431
+ help="A decimal number e.g. 0.25. Control how much to use the original vocal's loudness (0) or a fixed loudness (1).")
432
+ parser.add_argument('-palgo', '--pitch-detection-algo', type=str, default='rmvpe',
433
+ help='Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).')
434
+ parser.add_argument('-hop', '--crepe-hop-length', type=int, default=128,
435
+ help='If pitch detection algo is mangio-crepe, controls how often it checks for pitch changes in milliseconds. Recommended: 128.')
436
+ parser.add_argument('-pro', '--protect', type=float, default=0.33,
437
+ help='A decimal number e.g. 0.33. Protect voiceless consonants and breath sounds to prevent artifacts such as tearing in electronic music.')
438
+ parser.add_argument('-mv', '--main-vol', type=int, default=0,
439
+ help='Volume change for AI main vocals in decibels. Use -3 to decrease by 3 dB and 3 to increase by 3 dB')
440
+ parser.add_argument('-bv', '--backup-vol', type=int, default=0,
441
+ help='Volume change for backup vocals in decibels')
442
+ parser.add_argument('-iv', '--inst-vol', type=int, default=0,
443
+ help='Volume change for instrumentals in decibels')
444
+ parser.add_argument('-pall', '--pitch-change-all', type=int, default=0,
445
+ help='Change the pitch/key of vocals and instrumentals. Changing this slightly reduces sound quality')
446
+ parser.add_argument('-rsize', '--reverb-size', type=float, default=0.15,
447
+ help='Reverb room size between 0 and 1')
448
+ parser.add_argument('-rwet', '--reverb-wetness', type=float, default=0.2,
449
+ help='Reverb wet level between 0 and 1')
450
+ parser.add_argument('-rdry', '--reverb-dryness', type=float, default=0.8,
451
+ help='Reverb dry level between 0 and 1')
452
+ parser.add_argument('-rdamp', '--reverb-damping', type=float, default=0.7,
453
+ help='Reverb damping between 0 and 1')
454
+ parser.add_argument('-oformat', '--output-format', type=str, default='mp3',
455
+ help='Output format of audio file. mp3 for smaller file size, wav for best quality')
456
+ args = parser.parse_args()
457
+
458
+ rvc_dir = os.path.join(rvc_models_dir, args.rvc_dirname)
459
+ if not os.path.exists(rvc_dir):
460
+ raise Exception(f'The folder {rvc_dir} does not exist.')
461
+
462
+ cover_path, cover_with_backing = song_cover_pipeline(
463
+ args.song_input, args.rvc_dirname, args.pitch_change, args.keep_files,
464
+ main_gain=args.main_vol, backup_gain=args.backup_vol, inst_gain=args.inst_vol,
465
+ index_rate=args.index_rate, filter_radius=args.filter_radius,
466
+ rms_mix_rate=args.rms_mix_rate, f0_method=args.pitch_detection_algo,
467
+ crepe_hop_length=args.crepe_hop_length, protect=args.protect,
468
+ pitch_change_all=args.pitch_change_all,
469
+ reverb_rm_size=args.reverb_size, reverb_wet=args.reverb_wetness,
470
+ reverb_dry=args.reverb_dryness, reverb_damping=args.reverb_damping,
471
+ output_format=args.output_format
472
+ )
473
+ print(f'[+] Cover generated at {cover_path}')