Files changed (3) hide show
  1. EasierGUI.py +2101 -0
  2. infer-web.py +0 -0
  3. infer_uvr5.py +363 -0
EasierGUI.py ADDED
@@ -0,0 +1,2101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess, torch, os, traceback, sys, warnings, shutil, numpy as np
2
+ from mega import Mega
3
+ os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1"
4
+ import threading
5
+ from time import sleep
6
+ from subprocess import Popen
7
+ import faiss
8
+ from random import shuffle
9
+ import json, datetime, requests
10
+ from gtts import gTTS
11
+ now_dir = os.getcwd()
12
+ sys.path.append(now_dir)
13
+ tmp = os.path.join(now_dir, "TEMP")
14
+ shutil.rmtree(tmp, ignore_errors=True)
15
+ shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True)
16
+ os.makedirs(tmp, exist_ok=True)
17
+ os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True)
18
+ os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True)
19
+ os.environ["TEMP"] = tmp
20
+ warnings.filterwarnings("ignore")
21
+ torch.manual_seed(114514)
22
+ from i18n import I18nAuto
23
+
24
+ import signal
25
+
26
+ import math
27
+
28
+ from my_utils import load_audio, CSVutil
29
+
30
+ global DoFormant, Quefrency, Timbre
31
+
32
+ if not os.path.isdir('csvdb/'):
33
+ os.makedirs('csvdb')
34
+ frmnt, stp = open("csvdb/formanting.csv", 'w'), open("csvdb/stop.csv", 'w')
35
+ frmnt.close()
36
+ stp.close()
37
+
38
+ try:
39
+ DoFormant, Quefrency, Timbre = CSVutil('csvdb/formanting.csv', 'r', 'formanting')
40
+ DoFormant = (
41
+ lambda DoFormant: True if DoFormant.lower() == 'true' else (False if DoFormant.lower() == 'false' else DoFormant)
42
+ )(DoFormant)
43
+ except (ValueError, TypeError, IndexError):
44
+ DoFormant, Quefrency, Timbre = False, 1.0, 1.0
45
+ CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, Quefrency, Timbre)
46
+
47
+ #from MDXNet import MDXNetDereverb
48
+
49
+ # Check if we're in a Google Colab environment
50
+ if os.path.exists('/content/'):
51
+ print("\n-------------------------------\nRVC v2 Easy GUI (Colab Edition)\n-------------------------------\n")
52
+
53
+ print("-------------------------------")
54
+ # Check if the file exists at the specified path
55
+ if os.path.exists('/content/Retrieval-based-Voice-Conversion-WebUI/hubert_base.pt'):
56
+ # If the file exists, print a statement saying so
57
+ print("File /content/Retrieval-based-Voice-Conversion-WebUI/hubert_base.pt already exists. No need to download.")
58
+ else:
59
+ # If the file doesn't exist, print a statement saying it's downloading
60
+ print("File /content/Retrieval-based-Voice-Conversion-WebUI/hubert_base.pt does not exist. Starting download.")
61
+
62
+ # Make a request to the URL
63
+ response = requests.get('https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt')
64
+
65
+ # Ensure the request was successful
66
+ if response.status_code == 200:
67
+ # If the response was a success, save the content to the specified file path
68
+ with open('/content/Retrieval-based-Voice-Conversion-WebUI/hubert_base.pt', 'wb') as f:
69
+ f.write(response.content)
70
+ print("Download complete. File saved to /content/Retrieval-based-Voice-Conversion-WebUI/hubert_base.pt.")
71
+ else:
72
+ # If the response was a failure, print an error message
73
+ print("Failed to download file. Status code: " + str(response.status_code) + ".")
74
+ else:
75
+ print("\n-------------------------------\nRVC v2 Easy GUI (Local Edition)\n-------------------------------\n")
76
+ print("-------------------------------\nNot running on Google Colab, skipping download.")
77
+
78
+ def formant_apply(qfrency, tmbre):
79
+ Quefrency = qfrency
80
+ Timbre = tmbre
81
+ DoFormant = True
82
+ CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, qfrency, tmbre)
83
+
84
+ return ({"value": Quefrency, "__type__": "update"}, {"value": Timbre, "__type__": "update"})
85
+
86
+ def get_fshift_presets():
87
+ fshift_presets_list = []
88
+ for dirpath, _, filenames in os.walk("./formantshiftcfg/"):
89
+ for filename in filenames:
90
+ if filename.endswith(".txt"):
91
+ fshift_presets_list.append(os.path.join(dirpath,filename).replace('\\','/'))
92
+
93
+ if len(fshift_presets_list) > 0:
94
+ return fshift_presets_list
95
+ else:
96
+ return ''
97
+
98
+
99
+
100
+ def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button):
101
+
102
+ if (cbox):
103
+
104
+ DoFormant = True
105
+ CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, qfrency, tmbre)
106
+ #print(f"is checked? - {cbox}\ngot {DoFormant}")
107
+
108
+ return (
109
+ {"value": True, "__type__": "update"},
110
+ {"visible": True, "__type__": "update"},
111
+ {"visible": True, "__type__": "update"},
112
+ {"visible": True, "__type__": "update"},
113
+ {"visible": True, "__type__": "update"},
114
+ {"visible": True, "__type__": "update"},
115
+ )
116
+
117
+
118
+ else:
119
+
120
+ DoFormant = False
121
+ CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, qfrency, tmbre)
122
+
123
+ #print(f"is checked? - {cbox}\ngot {DoFormant}")
124
+ return (
125
+ {"value": False, "__type__": "update"},
126
+ {"visible": False, "__type__": "update"},
127
+ {"visible": False, "__type__": "update"},
128
+ {"visible": False, "__type__": "update"},
129
+ {"visible": False, "__type__": "update"},
130
+ {"visible": False, "__type__": "update"},
131
+ {"visible": False, "__type__": "update"},
132
+ )
133
+
134
+
135
+
136
+ def preset_apply(preset, qfer, tmbr):
137
+ if str(preset) != '':
138
+ with open(str(preset), 'r') as p:
139
+ content = p.readlines()
140
+ qfer, tmbr = content[0].split('\n')[0], content[1]
141
+
142
+ formant_apply(qfer, tmbr)
143
+ else:
144
+ pass
145
+ return ({"value": qfer, "__type__": "update"}, {"value": tmbr, "__type__": "update"})
146
+
147
+ def update_fshift_presets(preset, qfrency, tmbre):
148
+
149
+ qfrency, tmbre = preset_apply(preset, qfrency, tmbre)
150
+
151
+ if (str(preset) != ''):
152
+ with open(str(preset), 'r') as p:
153
+ content = p.readlines()
154
+ qfrency, tmbre = content[0].split('\n')[0], content[1]
155
+
156
+ formant_apply(qfrency, tmbre)
157
+ else:
158
+ pass
159
+ return (
160
+ {"choices": get_fshift_presets(), "__type__": "update"},
161
+ {"value": qfrency, "__type__": "update"},
162
+ {"value": tmbre, "__type__": "update"},
163
+ )
164
+
165
+ i18n = I18nAuto()
166
+ #i18n.print()
167
+ # 判断是否有能用来训练和加速推理的N卡
168
+ ngpu = torch.cuda.device_count()
169
+ gpu_infos = []
170
+ mem = []
171
+ if (not torch.cuda.is_available()) or ngpu == 0:
172
+ if_gpu_ok = False
173
+ else:
174
+ if_gpu_ok = False
175
+ for i in range(ngpu):
176
+ gpu_name = torch.cuda.get_device_name(i)
177
+ if (
178
+ "10" in gpu_name
179
+ or "16" in gpu_name
180
+ or "20" in gpu_name
181
+ or "30" in gpu_name
182
+ or "40" in gpu_name
183
+ or "A2" in gpu_name.upper()
184
+ or "A3" in gpu_name.upper()
185
+ or "A4" in gpu_name.upper()
186
+ or "P4" in gpu_name.upper()
187
+ or "A50" in gpu_name.upper()
188
+ or "A60" in gpu_name.upper()
189
+ or "70" in gpu_name
190
+ or "80" in gpu_name
191
+ or "90" in gpu_name
192
+ or "M4" in gpu_name.upper()
193
+ or "T4" in gpu_name.upper()
194
+ or "TITAN" in gpu_name.upper()
195
+ ): # A10#A100#V100#A40#P40#M40#K80#A4500
196
+ if_gpu_ok = True # 至少有一张能用的N卡
197
+ gpu_infos.append("%s\t%s" % (i, gpu_name))
198
+ mem.append(
199
+ int(
200
+ torch.cuda.get_device_properties(i).total_memory
201
+ / 1024
202
+ / 1024
203
+ / 1024
204
+ + 0.4
205
+ )
206
+ )
207
+ if if_gpu_ok == True and len(gpu_infos) > 0:
208
+ gpu_info = "\n".join(gpu_infos)
209
+ default_batch_size = min(mem) // 2
210
+ else:
211
+ gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练")
212
+ default_batch_size = 1
213
+ gpus = "-".join([i[0] for i in gpu_infos])
214
+ from lib.infer_pack.models import (
215
+ SynthesizerTrnMs256NSFsid,
216
+ SynthesizerTrnMs256NSFsid_nono,
217
+ SynthesizerTrnMs768NSFsid,
218
+ SynthesizerTrnMs768NSFsid_nono,
219
+ )
220
+ import soundfile as sf
221
+ from fairseq import checkpoint_utils
222
+ import gradio as gr
223
+ import logging
224
+ from vc_infer_pipeline import VC
225
+ from config import Config
226
+
227
+ config = Config()
228
+ # from trainset_preprocess_pipeline import PreProcess
229
+ logging.getLogger("numba").setLevel(logging.WARNING)
230
+
231
+ hubert_model = None
232
+
233
+ def load_hubert():
234
+ global hubert_model
235
+ models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
236
+ ["hubert_base.pt"],
237
+ suffix="",
238
+ )
239
+ hubert_model = models[0]
240
+ hubert_model = hubert_model.to(config.device)
241
+ if config.is_half:
242
+ hubert_model = hubert_model.half()
243
+ else:
244
+ hubert_model = hubert_model.float()
245
+ hubert_model.eval()
246
+
247
+
248
+ weight_root = "weights"
249
+ index_root = "logs"
250
+ names = []
251
+ for name in os.listdir(weight_root):
252
+ if name.endswith(".pth"):
253
+ names.append(name)
254
+ index_paths = []
255
+ for root, dirs, files in os.walk(index_root, topdown=False):
256
+ for name in files:
257
+ if name.endswith(".index") and "trained" not in name:
258
+ index_paths.append("%s/%s" % (root, name))
259
+
260
+
261
+
262
+ def vc_single(
263
+ sid,
264
+ input_audio_path,
265
+ f0_up_key,
266
+ f0_file,
267
+ f0_method,
268
+ file_index,
269
+ #file_index2,
270
+ # file_big_npy,
271
+ index_rate,
272
+ filter_radius,
273
+ resample_sr,
274
+ rms_mix_rate,
275
+ protect,
276
+ crepe_hop_length,
277
+ ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0
278
+ global tgt_sr, net_g, vc, hubert_model, version
279
+ if input_audio_path is None:
280
+ return "You need to upload an audio", None
281
+ f0_up_key = int(f0_up_key)
282
+ try:
283
+ audio = load_audio(input_audio_path, 16000, DoFormant, Quefrency, Timbre)
284
+ audio_max = np.abs(audio).max() / 0.95
285
+ if audio_max > 1:
286
+ audio /= audio_max
287
+ times = [0, 0, 0]
288
+ if hubert_model == None:
289
+ load_hubert()
290
+ if_f0 = cpt.get("f0", 1)
291
+ file_index = (
292
+ (
293
+ file_index.strip(" ")
294
+ .strip('"')
295
+ .strip("\n")
296
+ .strip('"')
297
+ .strip(" ")
298
+ .replace("trained", "added")
299
+ )
300
+ ) # 防止小白写错,自动帮他替换掉
301
+ # file_big_npy = (
302
+ # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
303
+ # )
304
+ audio_opt = vc.pipeline(
305
+ hubert_model,
306
+ net_g,
307
+ sid,
308
+ audio,
309
+ input_audio_path,
310
+ times,
311
+ f0_up_key,
312
+ f0_method,
313
+ file_index,
314
+ # file_big_npy,
315
+ index_rate,
316
+ if_f0,
317
+ filter_radius,
318
+ tgt_sr,
319
+ resample_sr,
320
+ rms_mix_rate,
321
+ version,
322
+ protect,
323
+ crepe_hop_length,
324
+ f0_file=f0_file,
325
+ )
326
+ if resample_sr >= 16000 and tgt_sr != resample_sr:
327
+ tgt_sr = resample_sr
328
+ index_info = (
329
+ "Using index:%s." % file_index
330
+ if os.path.exists(file_index)
331
+ else "Index not used."
332
+ )
333
+ return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
334
+ index_info,
335
+ times[0],
336
+ times[1],
337
+ times[2],
338
+ ), (tgt_sr, audio_opt)
339
+ except:
340
+ info = traceback.format_exc()
341
+ print(info)
342
+ return info, (None, None)
343
+
344
+
345
+ def vc_multi(
346
+ sid,
347
+ dir_path,
348
+ opt_root,
349
+ paths,
350
+ f0_up_key,
351
+ f0_method,
352
+ file_index,
353
+ file_index2,
354
+ # file_big_npy,
355
+ index_rate,
356
+ filter_radius,
357
+ resample_sr,
358
+ rms_mix_rate,
359
+ protect,
360
+ format1,
361
+ crepe_hop_length,
362
+ ):
363
+ try:
364
+ dir_path = (
365
+ dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
366
+ ) # 防止小白拷路径头尾带了空格和"和回车
367
+ opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
368
+ os.makedirs(opt_root, exist_ok=True)
369
+ try:
370
+ if dir_path != "":
371
+ paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)]
372
+ else:
373
+ paths = [path.name for path in paths]
374
+ except:
375
+ traceback.print_exc()
376
+ paths = [path.name for path in paths]
377
+ infos = []
378
+ for path in paths:
379
+ info, opt = vc_single(
380
+ sid,
381
+ path,
382
+ f0_up_key,
383
+ None,
384
+ f0_method,
385
+ file_index,
386
+ # file_big_npy,
387
+ index_rate,
388
+ filter_radius,
389
+ resample_sr,
390
+ rms_mix_rate,
391
+ protect,
392
+ crepe_hop_length
393
+ )
394
+ if "Success" in info:
395
+ try:
396
+ tgt_sr, audio_opt = opt
397
+ if format1 in ["wav", "flac"]:
398
+ sf.write(
399
+ "%s/%s.%s" % (opt_root, os.path.basename(path), format1),
400
+ audio_opt,
401
+ tgt_sr,
402
+ )
403
+ else:
404
+ path = "%s/%s.wav" % (opt_root, os.path.basename(path))
405
+ sf.write(
406
+ path,
407
+ audio_opt,
408
+ tgt_sr,
409
+ )
410
+ if os.path.exists(path):
411
+ os.system(
412
+ "ffmpeg -i %s -vn %s -q:a 2 -y"
413
+ % (path, path[:-4] + ".%s" % format1)
414
+ )
415
+ except:
416
+ info += traceback.format_exc()
417
+ infos.append("%s->%s" % (os.path.basename(path), info))
418
+ yield "\n".join(infos)
419
+ yield "\n".join(infos)
420
+ except:
421
+ yield traceback.format_exc()
422
+
423
+ # 一个选项卡全局只能有一个音色
424
+ def get_vc(sid):
425
+ global n_spk, tgt_sr, net_g, vc, cpt, version
426
+ if sid == "" or sid == []:
427
+ global hubert_model
428
+ if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
429
+ print("clean_empty_cache")
430
+ del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt
431
+ hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None
432
+ if torch.cuda.is_available():
433
+ torch.cuda.empty_cache()
434
+ ###楼下不这么折腾清理不干净
435
+ if_f0 = cpt.get("f0", 1)
436
+ version = cpt.get("version", "v1")
437
+ if version == "v1":
438
+ if if_f0 == 1:
439
+ net_g = SynthesizerTrnMs256NSFsid(
440
+ *cpt["config"], is_half=config.is_half
441
+ )
442
+ else:
443
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
444
+ elif version == "v2":
445
+ if if_f0 == 1:
446
+ net_g = SynthesizerTrnMs768NSFsid(
447
+ *cpt["config"], is_half=config.is_half
448
+ )
449
+ else:
450
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
451
+ del net_g, cpt
452
+ if torch.cuda.is_available():
453
+ torch.cuda.empty_cache()
454
+ cpt = None
455
+ return {"visible": False, "__type__": "update"}
456
+ person = "%s/%s" % (weight_root, sid)
457
+ print("loading %s" % person)
458
+ cpt = torch.load(person, map_location="cpu")
459
+ tgt_sr = cpt["config"][-1]
460
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
461
+ if_f0 = cpt.get("f0", 1)
462
+ version = cpt.get("version", "v1")
463
+ if version == "v1":
464
+ if if_f0 == 1:
465
+ net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half)
466
+ else:
467
+ net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"])
468
+ elif version == "v2":
469
+ if if_f0 == 1:
470
+ net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half)
471
+ else:
472
+ net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"])
473
+ del net_g.enc_q
474
+ print(net_g.load_state_dict(cpt["weight"], strict=False))
475
+ net_g.eval().to(config.device)
476
+ if config.is_half:
477
+ net_g = net_g.half()
478
+ else:
479
+ net_g = net_g.float()
480
+ vc = VC(tgt_sr, config)
481
+ n_spk = cpt["config"][-3]
482
+ return {"visible": False, "maximum": n_spk, "__type__": "update"}
483
+
484
+
485
+ def change_choices():
486
+ names = []
487
+ for name in os.listdir(weight_root):
488
+ if name.endswith(".pth"):
489
+ names.append(name)
490
+ index_paths = []
491
+ for root, dirs, files in os.walk(index_root, topdown=False):
492
+ for name in files:
493
+ if name.endswith(".index") and "trained" not in name:
494
+ index_paths.append("%s/%s" % (root, name))
495
+ return {"choices": sorted(names), "__type__": "update"}, {
496
+ "choices": sorted(index_paths),
497
+ "__type__": "update",
498
+ }
499
+
500
+
501
+ def clean():
502
+ return {"value": "", "__type__": "update"}
503
+
504
+
505
+ sr_dict = {
506
+ "32k": 32000,
507
+ "40k": 40000,
508
+ "48k": 48000,
509
+ }
510
+
511
+
512
+ def if_done(done, p):
513
+ while 1:
514
+ if p.poll() == None:
515
+ sleep(0.5)
516
+ else:
517
+ break
518
+ done[0] = True
519
+
520
+
521
+ def if_done_multi(done, ps):
522
+ while 1:
523
+ # poll==None代表进程未结束
524
+ # 只要有一个进程未结束都不停
525
+ flag = 1
526
+ for p in ps:
527
+ if p.poll() == None:
528
+ flag = 0
529
+ sleep(0.5)
530
+ break
531
+ if flag == 1:
532
+ break
533
+ done[0] = True
534
+
535
+
536
+ def preprocess_dataset(trainset_dir, exp_dir, sr, n_p):
537
+ sr = sr_dict[sr]
538
+ os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
539
+ f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w")
540
+ f.close()
541
+ cmd = (
542
+ config.python_cmd
543
+ + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s "
544
+ % (trainset_dir, sr, n_p, now_dir, exp_dir)
545
+ + str(config.noparallel)
546
+ )
547
+ print(cmd)
548
+ p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir
549
+ ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
550
+ done = [False]
551
+ threading.Thread(
552
+ target=if_done,
553
+ args=(
554
+ done,
555
+ p,
556
+ ),
557
+ ).start()
558
+ while 1:
559
+ with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
560
+ yield (f.read())
561
+ sleep(1)
562
+ if done[0] == True:
563
+ break
564
+ with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f:
565
+ log = f.read()
566
+ print(log)
567
+ yield log
568
+
569
+ # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2])
570
+ def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, echl):
571
+ gpus = gpus.split("-")
572
+ os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True)
573
+ f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w")
574
+ f.close()
575
+ if if_f0:
576
+ cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s %s" % (
577
+ now_dir,
578
+ exp_dir,
579
+ n_p,
580
+ f0method,
581
+ echl,
582
+ )
583
+ print(cmd)
584
+ p = Popen(cmd, shell=True, cwd=now_dir) # , stdin=PIPE, stdout=PIPE,stderr=PIPE
585
+ ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
586
+ done = [False]
587
+ threading.Thread(
588
+ target=if_done,
589
+ args=(
590
+ done,
591
+ p,
592
+ ),
593
+ ).start()
594
+ while 1:
595
+ with open(
596
+ "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r"
597
+ ) as f:
598
+ yield (f.read())
599
+ sleep(1)
600
+ if done[0] == True:
601
+ break
602
+ with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
603
+ log = f.read()
604
+ print(log)
605
+ yield log
606
+ ####对不同part分别开多进程
607
+ """
608
+ n_part=int(sys.argv[1])
609
+ i_part=int(sys.argv[2])
610
+ i_gpu=sys.argv[3]
611
+ exp_dir=sys.argv[4]
612
+ os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu)
613
+ """
614
+ leng = len(gpus)
615
+ ps = []
616
+ for idx, n_g in enumerate(gpus):
617
+ cmd = (
618
+ config.python_cmd
619
+ + " extract_feature_print.py %s %s %s %s %s/logs/%s %s"
620
+ % (
621
+ config.device,
622
+ leng,
623
+ idx,
624
+ n_g,
625
+ now_dir,
626
+ exp_dir,
627
+ version19,
628
+ )
629
+ )
630
+ print(cmd)
631
+ p = Popen(
632
+ cmd, shell=True, cwd=now_dir
633
+ ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
634
+ ps.append(p)
635
+ ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读
636
+ done = [False]
637
+ threading.Thread(
638
+ target=if_done_multi,
639
+ args=(
640
+ done,
641
+ ps,
642
+ ),
643
+ ).start()
644
+ while 1:
645
+ with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
646
+ yield (f.read())
647
+ sleep(1)
648
+ if done[0] == True:
649
+ break
650
+ with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f:
651
+ log = f.read()
652
+ print(log)
653
+ yield log
654
+
655
+
656
+ def change_sr2(sr2, if_f0_3, version19):
657
+ path_str = "" if version19 == "v1" else "_v2"
658
+ f0_str = "f0" if if_f0_3 else ""
659
+ if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK)
660
+ if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK)
661
+ if (if_pretrained_generator_exist == False):
662
+ print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
663
+ if (if_pretrained_discriminator_exist == False):
664
+ print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
665
+ return (
666
+ ("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "",
667
+ ("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "",
668
+ {"visible": True, "__type__": "update"}
669
+ )
670
+
671
+ def change_version19(sr2, if_f0_3, version19):
672
+ path_str = "" if version19 == "v1" else "_v2"
673
+ f0_str = "f0" if if_f0_3 else ""
674
+ if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK)
675
+ if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK)
676
+ if (if_pretrained_generator_exist == False):
677
+ print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
678
+ if (if_pretrained_discriminator_exist == False):
679
+ print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model")
680
+ return (
681
+ ("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "",
682
+ ("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "",
683
+ )
684
+
685
+
686
+ def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15
687
+ path_str = "" if version19 == "v1" else "_v2"
688
+ if_pretrained_generator_exist = os.access("pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK)
689
+ if_pretrained_discriminator_exist = os.access("pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK)
690
+ if (if_pretrained_generator_exist == False):
691
+ print("pretrained%s/f0G%s.pth" % (path_str, sr2), "not exist, will not use pretrained model")
692
+ if (if_pretrained_discriminator_exist == False):
693
+ print("pretrained%s/f0D%s.pth" % (path_str, sr2), "not exist, will not use pretrained model")
694
+ if if_f0_3:
695
+ return (
696
+ {"visible": True, "__type__": "update"},
697
+ "pretrained%s/f0G%s.pth" % (path_str, sr2) if if_pretrained_generator_exist else "",
698
+ "pretrained%s/f0D%s.pth" % (path_str, sr2) if if_pretrained_discriminator_exist else "",
699
+ )
700
+ return (
701
+ {"visible": False, "__type__": "update"},
702
+ ("pretrained%s/G%s.pth" % (path_str, sr2)) if if_pretrained_generator_exist else "",
703
+ ("pretrained%s/D%s.pth" % (path_str, sr2)) if if_pretrained_discriminator_exist else "",
704
+ )
705
+
706
+
707
+ global log_interval
708
+
709
+
710
+ def set_log_interval(exp_dir, batch_size12):
711
+ log_interval = 1
712
+
713
+ folder_path = os.path.join(exp_dir, "1_16k_wavs")
714
+
715
+ if os.path.exists(folder_path) and os.path.isdir(folder_path):
716
+ wav_files = [f for f in os.listdir(folder_path) if f.endswith(".wav")]
717
+ if wav_files:
718
+ sample_size = len(wav_files)
719
+ log_interval = math.ceil(sample_size / batch_size12)
720
+ if log_interval > 1:
721
+ log_interval += 1
722
+ return log_interval
723
+
724
+ # but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16])
725
+ def click_train(
726
+ exp_dir1,
727
+ sr2,
728
+ if_f0_3,
729
+ spk_id5,
730
+ save_epoch10,
731
+ total_epoch11,
732
+ batch_size12,
733
+ if_save_latest13,
734
+ pretrained_G14,
735
+ pretrained_D15,
736
+ gpus16,
737
+ if_cache_gpu17,
738
+ if_save_every_weights18,
739
+ version19,
740
+ ):
741
+ CSVutil('csvdb/stop.csv', 'w+', 'formanting', False)
742
+ # 生成filelist
743
+ exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
744
+ os.makedirs(exp_dir, exist_ok=True)
745
+ gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir)
746
+ feature_dir = (
747
+ "%s/3_feature256" % (exp_dir)
748
+ if version19 == "v1"
749
+ else "%s/3_feature768" % (exp_dir)
750
+ )
751
+
752
+ log_interval = set_log_interval(exp_dir, batch_size12)
753
+
754
+ if if_f0_3:
755
+ f0_dir = "%s/2a_f0" % (exp_dir)
756
+ f0nsf_dir = "%s/2b-f0nsf" % (exp_dir)
757
+ names = (
758
+ set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
759
+ & set([name.split(".")[0] for name in os.listdir(feature_dir)])
760
+ & set([name.split(".")[0] for name in os.listdir(f0_dir)])
761
+ & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
762
+ )
763
+ else:
764
+ names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
765
+ [name.split(".")[0] for name in os.listdir(feature_dir)]
766
+ )
767
+ opt = []
768
+ for name in names:
769
+ if if_f0_3:
770
+ opt.append(
771
+ "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
772
+ % (
773
+ gt_wavs_dir.replace("\\", "\\\\"),
774
+ name,
775
+ feature_dir.replace("\\", "\\\\"),
776
+ name,
777
+ f0_dir.replace("\\", "\\\\"),
778
+ name,
779
+ f0nsf_dir.replace("\\", "\\\\"),
780
+ name,
781
+ spk_id5,
782
+ )
783
+ )
784
+ else:
785
+ opt.append(
786
+ "%s/%s.wav|%s/%s.npy|%s"
787
+ % (
788
+ gt_wavs_dir.replace("\\", "\\\\"),
789
+ name,
790
+ feature_dir.replace("\\", "\\\\"),
791
+ name,
792
+ spk_id5,
793
+ )
794
+ )
795
+ fea_dim = 256 if version19 == "v1" else 768
796
+ if if_f0_3:
797
+ for _ in range(2):
798
+ opt.append(
799
+ "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
800
+ % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
801
+ )
802
+ else:
803
+ for _ in range(2):
804
+ opt.append(
805
+ "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
806
+ % (now_dir, sr2, now_dir, fea_dim, spk_id5)
807
+ )
808
+ shuffle(opt)
809
+ with open("%s/filelist.txt" % exp_dir, "w") as f:
810
+ f.write("\n".join(opt))
811
+ print("write filelist done")
812
+ # 生成config#无需生成config
813
+ # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0"
814
+ print("use gpus:", gpus16)
815
+ if pretrained_G14 == "":
816
+ print("no pretrained Generator")
817
+ if pretrained_D15 == "":
818
+ print("no pretrained Discriminator")
819
+ if gpus16:
820
+ cmd = (
821
+ config.python_cmd
822
+ + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s -li %s"
823
+ % (
824
+ exp_dir1,
825
+ sr2,
826
+ 1 if if_f0_3 else 0,
827
+ batch_size12,
828
+ gpus16,
829
+ total_epoch11,
830
+ save_epoch10,
831
+ ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "",
832
+ ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "",
833
+ 1 if if_save_latest13 == True else 0,
834
+ 1 if if_cache_gpu17 == True else 0,
835
+ 1 if if_save_every_weights18 == True else 0,
836
+ version19,
837
+ log_interval,
838
+ )
839
+ )
840
+ else:
841
+ cmd = (
842
+ config.python_cmd
843
+ + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s -li %s"
844
+ % (
845
+ exp_dir1,
846
+ sr2,
847
+ 1 if if_f0_3 else 0,
848
+ batch_size12,
849
+ total_epoch11,
850
+ save_epoch10,
851
+ ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "\b",
852
+ ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "\b",
853
+ 1 if if_save_latest13 == True else 0,
854
+ 1 if if_cache_gpu17 == True else 0,
855
+ 1 if if_save_every_weights18 == True else 0,
856
+ version19,
857
+ log_interval,
858
+ )
859
+ )
860
+ print(cmd)
861
+ p = Popen(cmd, shell=True, cwd=now_dir)
862
+ global PID
863
+ PID = p.pid
864
+ p.wait()
865
+ return ("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"})
866
+
867
+
868
+ # but4.click(train_index, [exp_dir1], info3)
869
+ def train_index(exp_dir1, version19):
870
+ exp_dir = "%s/logs/%s" % (now_dir, exp_dir1)
871
+ os.makedirs(exp_dir, exist_ok=True)
872
+ feature_dir = (
873
+ "%s/3_feature256" % (exp_dir)
874
+ if version19 == "v1"
875
+ else "%s/3_feature768" % (exp_dir)
876
+ )
877
+ if os.path.exists(feature_dir) == False:
878
+ return "请先进行特征提取!"
879
+ listdir_res = list(os.listdir(feature_dir))
880
+ if len(listdir_res) == 0:
881
+ return "请先进行特征提取!"
882
+ npys = []
883
+ for name in sorted(listdir_res):
884
+ phone = np.load("%s/%s" % (feature_dir, name))
885
+ npys.append(phone)
886
+ big_npy = np.concatenate(npys, 0)
887
+ big_npy_idx = np.arange(big_npy.shape[0])
888
+ np.random.shuffle(big_npy_idx)
889
+ big_npy = big_npy[big_npy_idx]
890
+ np.save("%s/total_fea.npy" % exp_dir, big_npy)
891
+ # n_ivf = big_npy.shape[0] // 39
892
+ n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
893
+ infos = []
894
+ infos.append("%s,%s" % (big_npy.shape, n_ivf))
895
+ yield "\n".join(infos)
896
+ index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
897
+ # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf)
898
+ infos.append("training")
899
+ yield "\n".join(infos)
900
+ index_ivf = faiss.extract_index_ivf(index) #
901
+ index_ivf.nprobe = 1
902
+ index.train(big_npy)
903
+ faiss.write_index(
904
+ index,
905
+ "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
906
+ % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
907
+ )
908
+ # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
909
+ infos.append("adding")
910
+ yield "\n".join(infos)
911
+ batch_size_add = 8192
912
+ for i in range(0, big_npy.shape[0], batch_size_add):
913
+ index.add(big_npy[i : i + batch_size_add])
914
+ faiss.write_index(
915
+ index,
916
+ "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
917
+ % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
918
+ )
919
+ infos.append(
920
+ "成功构建索引,added_IVF%s_Flat_nprobe_%s_%s_%s.index"
921
+ % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
922
+ )
923
+ # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19))
924
+ # infos.append("成功构建索引,added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19))
925
+ yield "\n".join(infos)
926
+
927
+
928
+ # but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3)
929
+ def train1key(
930
+ exp_dir1,
931
+ sr2,
932
+ if_f0_3,
933
+ trainset_dir4,
934
+ spk_id5,
935
+ np7,
936
+ f0method8,
937
+ save_epoch10,
938
+ total_epoch11,
939
+ batch_size12,
940
+ if_save_latest13,
941
+ pretrained_G14,
942
+ pretrained_D15,
943
+ gpus16,
944
+ if_cache_gpu17,
945
+ if_save_every_weights18,
946
+ version19,
947
+ echl
948
+ ):
949
+ infos = []
950
+
951
+ def get_info_str(strr):
952
+ infos.append(strr)
953
+ return "\n".join(infos)
954
+
955
+ model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1)
956
+ preprocess_log_path = "%s/preprocess.log" % model_log_dir
957
+ extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir
958
+ gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir
959
+ feature_dir = (
960
+ "%s/3_feature256" % model_log_dir
961
+ if version19 == "v1"
962
+ else "%s/3_feature768" % model_log_dir
963
+ )
964
+
965
+ os.makedirs(model_log_dir, exist_ok=True)
966
+ #########step1:处理数据
967
+ open(preprocess_log_path, "w").close()
968
+ cmd = (
969
+ config.python_cmd
970
+ + " trainset_preprocess_pipeline_print.py %s %s %s %s "
971
+ % (trainset_dir4, sr_dict[sr2], np7, model_log_dir)
972
+ + str(config.noparallel)
973
+ )
974
+ yield get_info_str(i18n("step1:正在处理数据"))
975
+ yield get_info_str(cmd)
976
+ p = Popen(cmd, shell=True)
977
+ p.wait()
978
+ with open(preprocess_log_path, "r") as f:
979
+ print(f.read())
980
+ #########step2a:提取音高
981
+ open(extract_f0_feature_log_path, "w")
982
+ if if_f0_3:
983
+ yield get_info_str("step2a:正在提取音高")
984
+ cmd = config.python_cmd + " extract_f0_print.py %s %s %s %s" % (
985
+ model_log_dir,
986
+ np7,
987
+ f0method8,
988
+ echl
989
+ )
990
+ yield get_info_str(cmd)
991
+ p = Popen(cmd, shell=True, cwd=now_dir)
992
+ p.wait()
993
+ with open(extract_f0_feature_log_path, "r") as f:
994
+ print(f.read())
995
+ else:
996
+ yield get_info_str(i18n("step2a:无需提取音高"))
997
+ #######step2b:提取特征
998
+ yield get_info_str(i18n("step2b:正在提取���征"))
999
+ gpus = gpus16.split("-")
1000
+ leng = len(gpus)
1001
+ ps = []
1002
+ for idx, n_g in enumerate(gpus):
1003
+ cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % (
1004
+ config.device,
1005
+ leng,
1006
+ idx,
1007
+ n_g,
1008
+ model_log_dir,
1009
+ version19,
1010
+ )
1011
+ yield get_info_str(cmd)
1012
+ p = Popen(
1013
+ cmd, shell=True, cwd=now_dir
1014
+ ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir
1015
+ ps.append(p)
1016
+ for p in ps:
1017
+ p.wait()
1018
+ with open(extract_f0_feature_log_path, "r") as f:
1019
+ print(f.read())
1020
+ #######step3a:训练模型
1021
+ yield get_info_str(i18n("step3a:正在训练模型"))
1022
+ # 生成filelist
1023
+ if if_f0_3:
1024
+ f0_dir = "%s/2a_f0" % model_log_dir
1025
+ f0nsf_dir = "%s/2b-f0nsf" % model_log_dir
1026
+ names = (
1027
+ set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)])
1028
+ & set([name.split(".")[0] for name in os.listdir(feature_dir)])
1029
+ & set([name.split(".")[0] for name in os.listdir(f0_dir)])
1030
+ & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)])
1031
+ )
1032
+ else:
1033
+ names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set(
1034
+ [name.split(".")[0] for name in os.listdir(feature_dir)]
1035
+ )
1036
+ opt = []
1037
+ for name in names:
1038
+ if if_f0_3:
1039
+ opt.append(
1040
+ "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s"
1041
+ % (
1042
+ gt_wavs_dir.replace("\\", "\\\\"),
1043
+ name,
1044
+ feature_dir.replace("\\", "\\\\"),
1045
+ name,
1046
+ f0_dir.replace("\\", "\\\\"),
1047
+ name,
1048
+ f0nsf_dir.replace("\\", "\\\\"),
1049
+ name,
1050
+ spk_id5,
1051
+ )
1052
+ )
1053
+ else:
1054
+ opt.append(
1055
+ "%s/%s.wav|%s/%s.npy|%s"
1056
+ % (
1057
+ gt_wavs_dir.replace("\\", "\\\\"),
1058
+ name,
1059
+ feature_dir.replace("\\", "\\\\"),
1060
+ name,
1061
+ spk_id5,
1062
+ )
1063
+ )
1064
+ fea_dim = 256 if version19 == "v1" else 768
1065
+ if if_f0_3:
1066
+ for _ in range(2):
1067
+ opt.append(
1068
+ "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s"
1069
+ % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5)
1070
+ )
1071
+ else:
1072
+ for _ in range(2):
1073
+ opt.append(
1074
+ "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s"
1075
+ % (now_dir, sr2, now_dir, fea_dim, spk_id5)
1076
+ )
1077
+ shuffle(opt)
1078
+ with open("%s/filelist.txt" % model_log_dir, "w") as f:
1079
+ f.write("\n".join(opt))
1080
+ yield get_info_str("write filelist done")
1081
+ if gpus16:
1082
+ cmd = (
1083
+ config.python_cmd
1084
+ +" train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
1085
+ % (
1086
+ exp_dir1,
1087
+ sr2,
1088
+ 1 if if_f0_3 else 0,
1089
+ batch_size12,
1090
+ gpus16,
1091
+ total_epoch11,
1092
+ save_epoch10,
1093
+ ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "",
1094
+ ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "",
1095
+ 1 if if_save_latest13 == True else 0,
1096
+ 1 if if_cache_gpu17 == True else 0,
1097
+ 1 if if_save_every_weights18 == True else 0,
1098
+ version19,
1099
+ )
1100
+ )
1101
+ else:
1102
+ cmd = (
1103
+ config.python_cmd
1104
+ + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s"
1105
+ % (
1106
+ exp_dir1,
1107
+ sr2,
1108
+ 1 if if_f0_3 else 0,
1109
+ batch_size12,
1110
+ total_epoch11,
1111
+ save_epoch10,
1112
+ ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "",
1113
+ ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "",
1114
+ 1 if if_save_latest13 == True else 0,
1115
+ 1 if if_cache_gpu17 == True else 0,
1116
+ 1 if if_save_every_weights18 == True else 0,
1117
+ version19,
1118
+ )
1119
+ )
1120
+ yield get_info_str(cmd)
1121
+ p = Popen(cmd, shell=True, cwd=now_dir)
1122
+ p.wait()
1123
+ yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log"))
1124
+ #######step3b:训练索引
1125
+ npys = []
1126
+ listdir_res = list(os.listdir(feature_dir))
1127
+ for name in sorted(listdir_res):
1128
+ phone = np.load("%s/%s" % (feature_dir, name))
1129
+ npys.append(phone)
1130
+ big_npy = np.concatenate(npys, 0)
1131
+
1132
+ big_npy_idx = np.arange(big_npy.shape[0])
1133
+ np.random.shuffle(big_npy_idx)
1134
+ big_npy = big_npy[big_npy_idx]
1135
+ np.save("%s/total_fea.npy" % model_log_dir, big_npy)
1136
+
1137
+ # n_ivf = big_npy.shape[0] // 39
1138
+ n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39)
1139
+ yield get_info_str("%s,%s" % (big_npy.shape, n_ivf))
1140
+ index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf)
1141
+ yield get_info_str("training index")
1142
+ index_ivf = faiss.extract_index_ivf(index) #
1143
+ index_ivf.nprobe = 1
1144
+ index.train(big_npy)
1145
+ faiss.write_index(
1146
+ index,
1147
+ "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index"
1148
+ % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
1149
+ )
1150
+ yield get_info_str("adding index")
1151
+ batch_size_add = 8192
1152
+ for i in range(0, big_npy.shape[0], batch_size_add):
1153
+ index.add(big_npy[i : i + batch_size_add])
1154
+ faiss.write_index(
1155
+ index,
1156
+ "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index"
1157
+ % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19),
1158
+ )
1159
+ yield get_info_str(
1160
+ "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index"
1161
+ % (n_ivf, index_ivf.nprobe, exp_dir1, version19)
1162
+ )
1163
+ yield get_info_str(i18n("全流程结束!"))
1164
+
1165
+
1166
+ def whethercrepeornah(radio):
1167
+ mango = True if radio == 'mangio-crepe' or radio == 'mangio-crepe-tiny' else False
1168
+ return ({"visible": mango, "__type__": "update"})
1169
+
1170
+ # ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__])
1171
+ def change_info_(ckpt_path):
1172
+ if (
1173
+ os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log"))
1174
+ == False
1175
+ ):
1176
+ return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
1177
+ try:
1178
+ with open(
1179
+ ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r"
1180
+ ) as f:
1181
+ info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1])
1182
+ sr, f0 = info["sample_rate"], info["if_f0"]
1183
+ version = "v2" if ("version" in info and info["version"] == "v2") else "v1"
1184
+ return sr, str(f0), version
1185
+ except:
1186
+ traceback.print_exc()
1187
+ return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"}
1188
+
1189
+
1190
+ from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM
1191
+
1192
+
1193
+ def export_onnx(ModelPath, ExportedPath, MoeVS=True):
1194
+ cpt = torch.load(ModelPath, map_location="cpu")
1195
+ cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
1196
+ hidden_channels = 256 if cpt.get("version","v1")=="v1"else 768#cpt["config"][-2] # hidden_channels,为768Vec做准备
1197
+
1198
+ test_phone = torch.rand(1, 200, hidden_channels) # hidden unit
1199
+ test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用)
1200
+ test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹)
1201
+ test_pitchf = torch.rand(1, 200) # nsf基频
1202
+ test_ds = torch.LongTensor([0]) # 说话人ID
1203
+ test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子)
1204
+
1205
+ device = "cpu" # 导出时设备(不影响使用模型)
1206
+
1207
+
1208
+ net_g = SynthesizerTrnMsNSFsidM(
1209
+ *cpt["config"], is_half=False,version=cpt.get("version","v1")
1210
+ ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16)
1211
+ net_g.load_state_dict(cpt["weight"], strict=False)
1212
+ input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"]
1213
+ output_names = [
1214
+ "audio",
1215
+ ]
1216
+ # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出
1217
+ torch.onnx.export(
1218
+ net_g,
1219
+ (
1220
+ test_phone.to(device),
1221
+ test_phone_lengths.to(device),
1222
+ test_pitch.to(device),
1223
+ test_pitchf.to(device),
1224
+ test_ds.to(device),
1225
+ test_rnd.to(device),
1226
+ ),
1227
+ ExportedPath,
1228
+ dynamic_axes={
1229
+ "phone": [1],
1230
+ "pitch": [1],
1231
+ "pitchf": [1],
1232
+ "rnd": [2],
1233
+ },
1234
+ do_constant_folding=False,
1235
+ opset_version=16,
1236
+ verbose=False,
1237
+ input_names=input_names,
1238
+ output_names=output_names,
1239
+ )
1240
+ return "Finished"
1241
+
1242
+ #region RVC WebUI App
1243
+
1244
+ def get_presets():
1245
+ data = None
1246
+ with open('../inference-presets.json', 'r') as file:
1247
+ data = json.load(file)
1248
+ preset_names = []
1249
+ for preset in data['presets']:
1250
+ preset_names.append(preset['name'])
1251
+
1252
+ return preset_names
1253
+
1254
+ def change_choices2():
1255
+ audio_files=[]
1256
+ for filename in os.listdir("./audios"):
1257
+ if filename.endswith(('.wav','.mp3','.ogg','.flac','.m4a','.aac','.mp4')):
1258
+ audio_files.append(os.path.join('./audios',filename).replace('\\', '/'))
1259
+ return {"choices": sorted(audio_files), "__type__": "update"}, {"__type__": "update"}
1260
+
1261
+ audio_files=[]
1262
+ for filename in os.listdir("./audios"):
1263
+ if filename.endswith(('.wav','.mp3','.ogg','.flac','.m4a','.aac','.mp4')):
1264
+ audio_files.append(os.path.join('./audios',filename).replace('\\', '/'))
1265
+
1266
+ def get_index():
1267
+ if check_for_name() != '':
1268
+ chosen_model=sorted(names)[0].split(".")[0]
1269
+ logs_path="./logs/"+chosen_model
1270
+ if os.path.exists(logs_path):
1271
+ for file in os.listdir(logs_path):
1272
+ if file.endswith(".index"):
1273
+ return os.path.join(logs_path, file)
1274
+ return ''
1275
+ else:
1276
+ return ''
1277
+
1278
+ def get_indexes():
1279
+ indexes_list=[]
1280
+ for dirpath, dirnames, filenames in os.walk("./logs/"):
1281
+ for filename in filenames:
1282
+ if filename.endswith(".index"):
1283
+ indexes_list.append(os.path.join(dirpath,filename))
1284
+ if len(indexes_list) > 0:
1285
+ return indexes_list
1286
+ else:
1287
+ return ''
1288
+
1289
+ def get_name():
1290
+ if len(audio_files) > 0:
1291
+ return sorted(audio_files)[0]
1292
+ else:
1293
+ return ''
1294
+
1295
+ def save_to_wav(record_button):
1296
+ if record_button is None:
1297
+ pass
1298
+ else:
1299
+ path_to_file=record_button
1300
+ new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav'
1301
+ new_path='./audios/'+new_name
1302
+ shutil.move(path_to_file,new_path)
1303
+ return new_path
1304
+
1305
+ def save_to_wav2(dropbox):
1306
+ file_path=dropbox.name
1307
+ shutil.move(file_path,'./audios')
1308
+ return os.path.join('./audios',os.path.basename(file_path))
1309
+
1310
+ def match_index(sid0):
1311
+ folder=sid0.split(".")[0]
1312
+ parent_dir="./logs/"+folder
1313
+ if os.path.exists(parent_dir):
1314
+ for filename in os.listdir(parent_dir):
1315
+ if filename.endswith(".index"):
1316
+ index_path=os.path.join(parent_dir,filename)
1317
+ return index_path
1318
+ else:
1319
+ return ''
1320
+
1321
+ def check_for_name():
1322
+ if len(names) > 0:
1323
+ return sorted(names)[0]
1324
+ else:
1325
+ return ''
1326
+
1327
+ def download_from_url(url, model):
1328
+ if url == '':
1329
+ return "URL cannot be left empty."
1330
+ if model =='':
1331
+ return "You need to name your model. For example: My-Model"
1332
+ url = url.strip()
1333
+ zip_dirs = ["zips", "unzips"]
1334
+ for directory in zip_dirs:
1335
+ if os.path.exists(directory):
1336
+ shutil.rmtree(directory)
1337
+ os.makedirs("zips", exist_ok=True)
1338
+ os.makedirs("unzips", exist_ok=True)
1339
+ zipfile = model + '.zip'
1340
+ zipfile_path = './zips/' + zipfile
1341
+ try:
1342
+ if "drive.google.com" in url:
1343
+ subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path])
1344
+ elif "mega.nz" in url:
1345
+ m = Mega()
1346
+ m.download_url(url, './zips')
1347
+ else:
1348
+ subprocess.run(["wget", url, "-O", zipfile_path])
1349
+ for filename in os.listdir("./zips"):
1350
+ if filename.endswith(".zip"):
1351
+ zipfile_path = os.path.join("./zips/",filename)
1352
+ shutil.unpack_archive(zipfile_path, "./unzips", 'zip')
1353
+ else:
1354
+ return "No zipfile found."
1355
+ for root, dirs, files in os.walk('./unzips'):
1356
+ for file in files:
1357
+ file_path = os.path.join(root, file)
1358
+ if file.endswith(".index"):
1359
+ os.mkdir(f'./logs/{model}')
1360
+ shutil.copy2(file_path,f'./logs/{model}')
1361
+ elif "G_" not in file and "D_" not in file and file.endswith(".pth"):
1362
+ shutil.copy(file_path,f'./weights/{model}.pth')
1363
+ shutil.rmtree("zips")
1364
+ shutil.rmtree("unzips")
1365
+ return "Success."
1366
+ except:
1367
+ return "There's been an error."
1368
+ def success_message(face):
1369
+ return f'{face.name} has been uploaded.', 'None'
1370
+ def mouth(size, face, voice, faces):
1371
+ if size == 'Half':
1372
+ size = 2
1373
+ else:
1374
+ size = 1
1375
+ if faces == 'None':
1376
+ character = face.name
1377
+ else:
1378
+ if faces == 'Ben Shapiro':
1379
+ character = '/content/wav2lip-HD/inputs/ben-shapiro-10.mp4'
1380
+ elif faces == 'Andrew Tate':
1381
+ character = '/content/wav2lip-HD/inputs/tate-7.mp4'
1382
+ command = "python inference.py " \
1383
+ "--checkpoint_path checkpoints/wav2lip.pth " \
1384
+ f"--face {character} " \
1385
+ f"--audio {voice} " \
1386
+ "--pads 0 20 0 0 " \
1387
+ "--outfile /content/wav2lip-HD/outputs/result.mp4 " \
1388
+ "--fps 24 " \
1389
+ f"--resize_factor {size}"
1390
+ process = subprocess.Popen(command, shell=True, cwd='/content/wav2lip-HD/Wav2Lip-master')
1391
+ stdout, stderr = process.communicate()
1392
+ return '/content/wav2lip-HD/outputs/result.mp4', 'Animation completed.'
1393
+ eleven_voices = ['Adam','Antoni','Josh','Arnold','Sam','Bella','Rachel','Domi','Elli']
1394
+ eleven_voices_ids=['pNInz6obpgDQGcFmaJgB','ErXwobaYiN019PkySvjV','TxGEqnHWrfWFTfGW9XjX','VR6AewLTigWG4xSOukaG','yoZ06aMxZJJ28mfd3POQ','EXAVITQu4vr4xnSDxMaL','21m00Tcm4TlvDq8ikWAM','AZnzlk1XvdvUeBnXmlld','MF3mGyEYCl7XYWbV9V6O']
1395
+ chosen_voice = dict(zip(eleven_voices, eleven_voices_ids))
1396
+
1397
+ def stoptraining(mim):
1398
+ if int(mim) == 1:
1399
+ try:
1400
+ CSVutil('csvdb/stop.csv', 'w+', 'stop', 'True')
1401
+ os.kill(PID, signal.SIGTERM)
1402
+ except Exception as e:
1403
+ print(f"Couldn't click due to {e}")
1404
+ return (
1405
+ {"visible": False, "__type__": "update"},
1406
+ {"visible": True, "__type__": "update"},
1407
+ )
1408
+
1409
+
1410
+ def elevenTTS(xiapi, text, id, lang):
1411
+ if xiapi!= '' and id !='':
1412
+ choice = chosen_voice[id]
1413
+ CHUNK_SIZE = 1024
1414
+ url = f"https://api.elevenlabs.io/v1/text-to-speech/{choice}"
1415
+ headers = {
1416
+ "Accept": "audio/mpeg",
1417
+ "Content-Type": "application/json",
1418
+ "xi-api-key": xiapi
1419
+ }
1420
+ if lang == 'en':
1421
+ data = {
1422
+ "text": text,
1423
+ "model_id": "eleven_monolingual_v1",
1424
+ "voice_settings": {
1425
+ "stability": 0.5,
1426
+ "similarity_boost": 0.5
1427
+ }
1428
+ }
1429
+ else:
1430
+ data = {
1431
+ "text": text,
1432
+ "model_id": "eleven_multilingual_v1",
1433
+ "voice_settings": {
1434
+ "stability": 0.5,
1435
+ "similarity_boost": 0.5
1436
+ }
1437
+ }
1438
+
1439
+ response = requests.post(url, json=data, headers=headers)
1440
+ with open('./temp_eleven.mp3', 'wb') as f:
1441
+ for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
1442
+ if chunk:
1443
+ f.write(chunk)
1444
+ aud_path = save_to_wav('./temp_eleven.mp3')
1445
+ return aud_path, aud_path
1446
+ else:
1447
+ tts = gTTS(text, lang=lang)
1448
+ tts.save('./temp_gTTS.mp3')
1449
+ aud_path = save_to_wav('./temp_gTTS.mp3')
1450
+ return aud_path, aud_path
1451
+
1452
+ def upload_to_dataset(files, dir):
1453
+ if dir == '':
1454
+ dir = './dataset'
1455
+ if not os.path.exists(dir):
1456
+ os.makedirs(dir)
1457
+ count = 0
1458
+ for file in files:
1459
+ path=file.name
1460
+ shutil.copy2(path,dir)
1461
+ count += 1
1462
+ return f' {count} files uploaded to {dir}.'
1463
+
1464
+ def zip_downloader(model):
1465
+ if not os.path.exists(f'./weights/{model}.pth'):
1466
+ return {"__type__": "update"}, f'Make sure the Voice Name is correct. I could not find {model}.pth'
1467
+ index_found = False
1468
+ for file in os.listdir(f'./logs/{model}'):
1469
+ if file.endswith('.index') and 'added' in file:
1470
+ log_file = file
1471
+ index_found = True
1472
+ if index_found:
1473
+ return [f'./weights/{model}.pth', f'./logs/{model}/{log_file}'], "Done"
1474
+ else:
1475
+ return f'./weights/{model}.pth', "Could not find Index file."
1476
+
1477
+ with gr.Blocks(theme=gr.themes.Base(), title='Mangio-RVC-Web 💻') as app:
1478
+ with gr.Tabs():
1479
+ with gr.TabItem("Inference"):
1480
+ gr.HTML("<h1> Easy GUI v2 (rejekts) - adapted to Mangio-RVC-Fork 💻 [With extra features and fixes by kalomaze & alexlnkp]</h1>")
1481
+
1482
+ # Inference Preset Row
1483
+ # with gr.Row():
1484
+ # mangio_preset = gr.Dropdown(label="Inference Preset", choices=sorted(get_presets()))
1485
+ # mangio_preset_name_save = gr.Textbox(
1486
+ # label="Your preset name"
1487
+ # )
1488
+ # mangio_preset_save_btn = gr.Button('Save Preset', variant="primary")
1489
+
1490
+ # Other RVC stuff
1491
+ with gr.Row():
1492
+ sid0 = gr.Dropdown(label="1.Choose your Model.", choices=sorted(names), value=check_for_name())
1493
+ refresh_button = gr.Button("Refresh", variant="primary")
1494
+ if check_for_name() != '':
1495
+ get_vc(sorted(names)[0])
1496
+ vc_transform0 = gr.Number(label="Optional: You can change the pitch here or leave it at 0.", value=0)
1497
+ #clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary")
1498
+ spk_item = gr.Slider(
1499
+ minimum=0,
1500
+ maximum=2333,
1501
+ step=1,
1502
+ label=i18n("请选择说话人id"),
1503
+ value=0,
1504
+ visible=False,
1505
+ interactive=True,
1506
+ )
1507
+ #clean_button.click(fn=clean, inputs=[], outputs=[sid0])
1508
+ sid0.change(
1509
+ fn=get_vc,
1510
+ inputs=[sid0],
1511
+ outputs=[spk_item],
1512
+ )
1513
+ but0 = gr.Button("Convert", variant="primary")
1514
+ with gr.Row():
1515
+ with gr.Column():
1516
+ with gr.Row():
1517
+ dropbox = gr.File(label="Drop your audio here & hit the Reload button.")
1518
+ with gr.Row():
1519
+ record_button=gr.Audio(source="microphone", label="OR Record audio.", type="filepath")
1520
+ with gr.Row():
1521
+ input_audio0 = gr.Dropdown(
1522
+ label="2.Choose your audio.",
1523
+ value="./audios/someguy.mp3",
1524
+ choices=audio_files
1525
+ )
1526
+ dropbox.upload(fn=save_to_wav2, inputs=[dropbox], outputs=[input_audio0])
1527
+ dropbox.upload(fn=change_choices2, inputs=[], outputs=[input_audio0])
1528
+ refresh_button2 = gr.Button("Refresh", variant="primary", size='sm')
1529
+ record_button.change(fn=save_to_wav, inputs=[record_button], outputs=[input_audio0])
1530
+ record_button.change(fn=change_choices2, inputs=[], outputs=[input_audio0])
1531
+ with gr.Row():
1532
+ with gr.Accordion('Text To Speech', open=False):
1533
+ with gr.Column():
1534
+ lang = gr.Radio(label='Chinese & Japanese do not work with ElevenLabs currently.',choices=['en','es','fr','pt','zh-CN','de','hi','ja'], value='en')
1535
+ api_box = gr.Textbox(label="Enter your API Key for ElevenLabs, or leave empty to use GoogleTTS", value='')
1536
+ elevenid=gr.Dropdown(label="Voice:", choices=eleven_voices)
1537
+ with gr.Column():
1538
+ tfs = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.")
1539
+ tts_button = gr.Button(value="Speak")
1540
+ tts_button.click(fn=elevenTTS, inputs=[api_box,tfs, elevenid, lang], outputs=[record_button, input_audio0])
1541
+ with gr.Row():
1542
+ with gr.Accordion('Wav2Lip', open=False):
1543
+ with gr.Row():
1544
+ size = gr.Radio(label='Resolution:',choices=['Half','Full'])
1545
+ face = gr.UploadButton("Upload A Character",type='file')
1546
+ faces = gr.Dropdown(label="OR Choose one:", choices=['None','Ben Shapiro','Andrew Tate'])
1547
+ with gr.Row():
1548
+ preview = gr.Textbox(label="Status:",interactive=False)
1549
+ face.upload(fn=success_message,inputs=[face], outputs=[preview, faces])
1550
+ with gr.Row():
1551
+ animation = gr.Video(type='filepath')
1552
+ refresh_button2.click(fn=change_choices2, inputs=[], outputs=[input_audio0, animation])
1553
+ with gr.Row():
1554
+ animate_button = gr.Button('Animate')
1555
+
1556
+ with gr.Column():
1557
+ with gr.Accordion("Index Settings", open=False):
1558
+ file_index1 = gr.Dropdown(
1559
+ label="3. Path to your added.index file (if it didn't automatically find it.)",
1560
+ choices=get_indexes(),
1561
+ value=get_index(),
1562
+ interactive=True,
1563
+ )
1564
+ sid0.change(fn=match_index, inputs=[sid0],outputs=[file_index1])
1565
+ refresh_button.click(
1566
+ fn=change_choices, inputs=[], outputs=[sid0, file_index1]
1567
+ )
1568
+ # file_big_npy1 = gr.Textbox(
1569
+ # label=i18n("特征文件路径"),
1570
+ # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
1571
+ # interactive=True,
1572
+ # )
1573
+ index_rate1 = gr.Slider(
1574
+ minimum=0,
1575
+ maximum=1,
1576
+ label=i18n("检索特征占比"),
1577
+ value=0.66,
1578
+ interactive=True,
1579
+ )
1580
+ vc_output2 = gr.Audio(
1581
+ label="Output Audio (Click on the Three Dots in the Right Corner to Download)",
1582
+ type='filepath',
1583
+ interactive=False,
1584
+ )
1585
+ animate_button.click(fn=mouth, inputs=[size, face, vc_output2, faces], outputs=[animation, preview])
1586
+ with gr.Accordion("Advanced Settings", open=False):
1587
+ f0method0 = gr.Radio(
1588
+ label="Optional: Change the Pitch Extraction Algorithm.\nExtraction methods are sorted from 'worst quality' to 'best quality'.\nmangio-crepe may or may not be better than rmvpe in cases where 'smoothness' is more important, but rmvpe is the best overall.",
1589
+ choices=["pm", "dio", "crepe-tiny", "mangio-crepe-tiny", "crepe", "harvest", "mangio-crepe", "rmvpe"], # Fork Feature. Add Crepe-Tiny
1590
+ value="rmvpe",
1591
+ interactive=True,
1592
+ )
1593
+
1594
+ crepe_hop_length = gr.Slider(
1595
+ minimum=1,
1596
+ maximum=512,
1597
+ step=1,
1598
+ label="Mangio-Crepe Hop Length. Higher numbers will reduce the chance of extreme pitch changes but lower numbers will increase accuracy. 64-192 is a good range to experiment with.",
1599
+ value=120,
1600
+ interactive=True,
1601
+ visible=False,
1602
+ )
1603
+ f0method0.change(fn=whethercrepeornah, inputs=[f0method0], outputs=[crepe_hop_length])
1604
+ filter_radius0 = gr.Slider(
1605
+ minimum=0,
1606
+ maximum=7,
1607
+ label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
1608
+ value=3,
1609
+ step=1,
1610
+ interactive=True,
1611
+ )
1612
+ resample_sr0 = gr.Slider(
1613
+ minimum=0,
1614
+ maximum=48000,
1615
+ label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
1616
+ value=0,
1617
+ step=1,
1618
+ interactive=True,
1619
+ visible=False
1620
+ )
1621
+ rms_mix_rate0 = gr.Slider(
1622
+ minimum=0,
1623
+ maximum=1,
1624
+ label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
1625
+ value=0.21,
1626
+ interactive=True,
1627
+ )
1628
+ protect0 = gr.Slider(
1629
+ minimum=0,
1630
+ maximum=0.5,
1631
+ label=i18n("保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"),
1632
+ value=0.33,
1633
+ step=0.01,
1634
+ interactive=True,
1635
+ )
1636
+ formanting = gr.Checkbox(
1637
+ value=bool(DoFormant),
1638
+ label="[EXPERIMENTAL] Formant shift inference audio",
1639
+ info="Used for male to female and vice-versa conversions",
1640
+ interactive=True,
1641
+ visible=True,
1642
+ )
1643
+
1644
+ formant_preset = gr.Dropdown(
1645
+ value='',
1646
+ choices=get_fshift_presets(),
1647
+ label="browse presets for formanting",
1648
+ visible=bool(DoFormant),
1649
+ )
1650
+ formant_refresh_button = gr.Button(
1651
+ value='\U0001f504',
1652
+ visible=bool(DoFormant),
1653
+ variant='primary',
1654
+ )
1655
+ #formant_refresh_button = ToolButton( elem_id='1')
1656
+ #create_refresh_button(formant_preset, lambda: {"choices": formant_preset}, "refresh_list_shiftpresets")
1657
+
1658
+ qfrency = gr.Slider(
1659
+ value=Quefrency,
1660
+ info="Default value is 1.0",
1661
+ label="Quefrency for formant shifting",
1662
+ minimum=0.0,
1663
+ maximum=16.0,
1664
+ step=0.1,
1665
+ visible=bool(DoFormant),
1666
+ interactive=True,
1667
+ )
1668
+ tmbre = gr.Slider(
1669
+ value=Timbre,
1670
+ info="Default value is 1.0",
1671
+ label="Timbre for formant shifting",
1672
+ minimum=0.0,
1673
+ maximum=16.0,
1674
+ step=0.1,
1675
+ visible=bool(DoFormant),
1676
+ interactive=True,
1677
+ )
1678
+
1679
+ formant_preset.change(fn=preset_apply, inputs=[formant_preset, qfrency, tmbre], outputs=[qfrency, tmbre])
1680
+ frmntbut = gr.Button("Apply", variant="primary", visible=bool(DoFormant))
1681
+ formanting.change(fn=formant_enabled,inputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button],outputs=[formanting,qfrency,tmbre,frmntbut,formant_preset,formant_refresh_button])
1682
+ frmntbut.click(fn=formant_apply,inputs=[qfrency, tmbre], outputs=[qfrency, tmbre])
1683
+ formant_refresh_button.click(fn=update_fshift_presets,inputs=[formant_preset, qfrency, tmbre],outputs=[formant_preset, qfrency, tmbre])
1684
+ with gr.Row():
1685
+ vc_output1 = gr.Textbox("")
1686
+ f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"), visible=False)
1687
+
1688
+ but0.click(
1689
+ vc_single,
1690
+ [
1691
+ spk_item,
1692
+ input_audio0,
1693
+ vc_transform0,
1694
+ f0_file,
1695
+ f0method0,
1696
+ file_index1,
1697
+ # file_index2,
1698
+ # file_big_npy1,
1699
+ index_rate1,
1700
+ filter_radius0,
1701
+ resample_sr0,
1702
+ rms_mix_rate0,
1703
+ protect0,
1704
+ crepe_hop_length
1705
+ ],
1706
+ [vc_output1, vc_output2],
1707
+ )
1708
+
1709
+ with gr.Accordion("Batch Conversion",open=False):
1710
+ with gr.Row():
1711
+ with gr.Column():
1712
+ vc_transform1 = gr.Number(
1713
+ label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0
1714
+ )
1715
+ opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt")
1716
+ f0method1 = gr.Radio(
1717
+ label=i18n(
1718
+ "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
1719
+ ),
1720
+ choices=["pm", "harvest", "crepe", "rmvpe"],
1721
+ value="rmvpe",
1722
+ interactive=True,
1723
+ )
1724
+ filter_radius1 = gr.Slider(
1725
+ minimum=0,
1726
+ maximum=7,
1727
+ label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"),
1728
+ value=3,
1729
+ step=1,
1730
+ interactive=True,
1731
+ )
1732
+ with gr.Column():
1733
+ file_index3 = gr.Textbox(
1734
+ label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"),
1735
+ value="",
1736
+ interactive=True,
1737
+ )
1738
+ file_index4 = gr.Dropdown(
1739
+ label=i18n("自动检测index路径,下拉式选择(dropdown)"),
1740
+ choices=sorted(index_paths),
1741
+ interactive=True,
1742
+ )
1743
+ refresh_button.click(
1744
+ fn=lambda: change_choices()[1],
1745
+ inputs=[],
1746
+ outputs=file_index4,
1747
+ )
1748
+ # file_big_npy2 = gr.Textbox(
1749
+ # label=i18n("特征文件路径"),
1750
+ # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy",
1751
+ # interactive=True,
1752
+ # )
1753
+ index_rate2 = gr.Slider(
1754
+ minimum=0,
1755
+ maximum=1,
1756
+ label=i18n("检索特征占比"),
1757
+ value=1,
1758
+ interactive=True,
1759
+ )
1760
+ with gr.Column():
1761
+ resample_sr1 = gr.Slider(
1762
+ minimum=0,
1763
+ maximum=48000,
1764
+ label=i18n("后处理重采样至最终采样率,0为不进行重采样"),
1765
+ value=0,
1766
+ step=1,
1767
+ interactive=True,
1768
+ )
1769
+ rms_mix_rate1 = gr.Slider(
1770
+ minimum=0,
1771
+ maximum=1,
1772
+ label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"),
1773
+ value=1,
1774
+ interactive=True,
1775
+ )
1776
+ protect1 = gr.Slider(
1777
+ minimum=0,
1778
+ maximum=0.5,
1779
+ label=i18n(
1780
+ "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"
1781
+ ),
1782
+ value=0.33,
1783
+ step=0.01,
1784
+ interactive=True,
1785
+ )
1786
+ with gr.Column():
1787
+ dir_input = gr.Textbox(
1788
+ label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"),
1789
+ value="E:\codes\py39\\test-20230416b\\todo-songs",
1790
+ )
1791
+ inputs = gr.File(
1792
+ file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹")
1793
+ )
1794
+ with gr.Row():
1795
+ format1 = gr.Radio(
1796
+ label=i18n("导出文件格式"),
1797
+ choices=["wav", "flac", "mp3", "m4a"],
1798
+ value="flac",
1799
+ interactive=True,
1800
+ )
1801
+ but1 = gr.Button(i18n("转换"), variant="primary")
1802
+ vc_output3 = gr.Textbox(label=i18n("输出信息"))
1803
+ but1.click(
1804
+ vc_multi,
1805
+ [
1806
+ spk_item,
1807
+ dir_input,
1808
+ opt_input,
1809
+ inputs,
1810
+ vc_transform1,
1811
+ f0method1,
1812
+ file_index3,
1813
+ file_index4,
1814
+ # file_big_npy2,
1815
+ index_rate2,
1816
+ filter_radius1,
1817
+ resample_sr1,
1818
+ rms_mix_rate1,
1819
+ protect1,
1820
+ format1,
1821
+ crepe_hop_length,
1822
+ ],
1823
+ [vc_output3],
1824
+ )
1825
+ but1.click(fn=lambda: easy_uploader.clear())
1826
+ with gr.TabItem("Download Model"):
1827
+ with gr.Row():
1828
+ url=gr.Textbox(label="Enter the URL to the Model:")
1829
+ with gr.Row():
1830
+ model = gr.Textbox(label="Name your model:")
1831
+ download_button=gr.Button("Download")
1832
+ with gr.Row():
1833
+ status_bar=gr.Textbox(label="")
1834
+ download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar])
1835
+ with gr.Row():
1836
+ gr.Markdown(
1837
+ """
1838
+ Original RVC:https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI
1839
+ Mangio's RVC Fork:https://github.com/Mangio621/Mangio-RVC-Fork
1840
+ ❤️ If you like the EasyGUI, help me keep it.❤️
1841
+ https://paypal.me/lesantillan
1842
+ """
1843
+ )
1844
+
1845
+ def has_two_files_in_pretrained_folder():
1846
+ pretrained_folder = "./pretrained/"
1847
+ if not os.path.exists(pretrained_folder):
1848
+ return False
1849
+
1850
+ files_in_folder = os.listdir(pretrained_folder)
1851
+ num_files = len(files_in_folder)
1852
+ return num_files >= 2
1853
+
1854
+ if has_two_files_in_pretrained_folder():
1855
+ print("Pretrained weights are downloaded. Training tab enabled!\n-------------------------------")
1856
+ with gr.TabItem("Train", visible=False):
1857
+ with gr.Row():
1858
+ with gr.Column():
1859
+ exp_dir1 = gr.Textbox(label="Voice Name:", value="My-Voice")
1860
+ sr2 = gr.Radio(
1861
+ label=i18n("目标采样率"),
1862
+ choices=["40k", "48k"],
1863
+ value="40k",
1864
+ interactive=True,
1865
+ visible=False
1866
+ )
1867
+ if_f0_3 = gr.Radio(
1868
+ label=i18n("模型是否带音高指导(唱歌一定要, 语音可以不要)"),
1869
+ choices=[True, False],
1870
+ value=True,
1871
+ interactive=True,
1872
+ visible=False
1873
+ )
1874
+ version19 = gr.Radio(
1875
+ label="RVC version",
1876
+ choices=["v1", "v2"],
1877
+ value="v2",
1878
+ interactive=True,
1879
+ visible=False,
1880
+ )
1881
+ np7 = gr.Slider(
1882
+ minimum=0,
1883
+ maximum=config.n_cpu,
1884
+ step=1,
1885
+ label="# of CPUs for data processing (Leave as it is)",
1886
+ value=config.n_cpu,
1887
+ interactive=True,
1888
+ visible=True
1889
+ )
1890
+ trainset_dir4 = gr.Textbox(label="Path to your dataset (audios, not zip):", value="./dataset")
1891
+ easy_uploader = gr.Files(label='OR Drop your audios here. They will be uploaded in your dataset path above.',file_types=['audio'])
1892
+ but1 = gr.Button("1. Process The Dataset", variant="primary")
1893
+ info1 = gr.Textbox(label="Status (wait until it says 'end preprocess'):", value="")
1894
+ easy_uploader.upload(fn=upload_to_dataset, inputs=[easy_uploader, trainset_dir4], outputs=[info1])
1895
+ but1.click(
1896
+ preprocess_dataset, [trainset_dir4, exp_dir1, sr2, np7], [info1]
1897
+ )
1898
+ with gr.Column():
1899
+ spk_id5 = gr.Slider(
1900
+ minimum=0,
1901
+ maximum=4,
1902
+ step=1,
1903
+ label=i18n("请指定说话人id"),
1904
+ value=0,
1905
+ interactive=True,
1906
+ visible=False
1907
+ )
1908
+ with gr.Accordion('GPU Settings', open=False, visible=False):
1909
+ gpus6 = gr.Textbox(
1910
+ label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
1911
+ value=gpus,
1912
+ interactive=True,
1913
+ visible=False
1914
+ )
1915
+ gpu_info9 = gr.Textbox(label=i18n("显卡信息"), value=gpu_info)
1916
+ f0method8 = gr.Radio(
1917
+ label=i18n(
1918
+ "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢"
1919
+ ),
1920
+ choices=["harvest","crepe", "mangio-crepe", "rmvpe"], # Fork feature: Crepe on f0 extraction for training.
1921
+ value="rmvpe",
1922
+ interactive=True,
1923
+ )
1924
+
1925
+ extraction_crepe_hop_length = gr.Slider(
1926
+ minimum=1,
1927
+ maximum=512,
1928
+ step=1,
1929
+ label=i18n("crepe_hop_length"),
1930
+ value=128,
1931
+ interactive=True,
1932
+ visible=False,
1933
+ )
1934
+ f0method8.change(fn=whethercrepeornah, inputs=[f0method8], outputs=[extraction_crepe_hop_length])
1935
+ but2 = gr.Button("2. Pitch Extraction", variant="primary")
1936
+ info2 = gr.Textbox(label="Status(Check the Colab Notebook's cell output):", value="", max_lines=8)
1937
+ but2.click(
1938
+ extract_f0_feature,
1939
+ [gpus6, np7, f0method8, if_f0_3, exp_dir1, version19, extraction_crepe_hop_length],
1940
+ [info2],
1941
+ )
1942
+ with gr.Row():
1943
+ with gr.Column():
1944
+ total_epoch11 = gr.Slider(
1945
+ minimum=1,
1946
+ maximum=5000,
1947
+ step=10,
1948
+ label="Total # of training epochs (IF you choose a value too high, your model will sound horribly overtrained.):",
1949
+ value=250,
1950
+ interactive=True,
1951
+ )
1952
+ butstop = gr.Button(
1953
+ "Stop Training",
1954
+ variant='primary',
1955
+ visible=False,
1956
+ )
1957
+ but3 = gr.Button("3. Train Model", variant="primary", visible=True)
1958
+
1959
+ but3.click(fn=stoptraining, inputs=[gr.Number(value=0, visible=False)], outputs=[but3, butstop])
1960
+ butstop.click(fn=stoptraining, inputs=[gr.Number(value=1, visible=False)], outputs=[butstop, but3])
1961
+
1962
+
1963
+ but4 = gr.Button("4.Train Index", variant="primary")
1964
+ info3 = gr.Textbox(label="Status(Check the Colab Notebook's cell output):", value="", max_lines=10)
1965
+ with gr.Accordion("Training Preferences (You can leave these as they are)", open=False):
1966
+ #gr.Markdown(value=i18n("step3: 填写训练设置, 开始训练模型和索引"))
1967
+ with gr.Column():
1968
+ save_epoch10 = gr.Slider(
1969
+ minimum=1,
1970
+ maximum=200,
1971
+ step=1,
1972
+ label="Backup every X amount of epochs:",
1973
+ value=10,
1974
+ interactive=True,
1975
+ )
1976
+ batch_size12 = gr.Slider(
1977
+ minimum=1,
1978
+ maximum=40,
1979
+ step=1,
1980
+ label="Batch Size (LEAVE IT unless you know what you're doing!):",
1981
+ value=default_batch_size,
1982
+ interactive=True,
1983
+ )
1984
+ if_save_latest13 = gr.Checkbox(
1985
+ label="Save only the latest '.ckpt' file to save disk space.",
1986
+ value=True,
1987
+ interactive=True,
1988
+ )
1989
+ if_cache_gpu17 = gr.Checkbox(
1990
+ label="Cache all training sets to GPU memory. Caching small datasets (less than 10 minutes) can speed up training, but caching large datasets will consume a lot of GPU memory and may not provide much speed improvement.",
1991
+ value=False,
1992
+ interactive=True,
1993
+ )
1994
+ if_save_every_weights18 = gr.Checkbox(
1995
+ label="Save a small final model to the 'weights' folder at each save point.",
1996
+ value=True,
1997
+ interactive=True,
1998
+ )
1999
+ zip_model = gr.Button('5. Download Model')
2000
+ zipped_model = gr.Files(label='Your Model and Index file can be downloaded here:')
2001
+ zip_model.click(fn=zip_downloader, inputs=[exp_dir1], outputs=[zipped_model, info3])
2002
+ with gr.Group():
2003
+ with gr.Accordion("Base Model Locations:", open=False, visible=False):
2004
+ pretrained_G14 = gr.Textbox(
2005
+ label=i18n("加载预训练底模G路径"),
2006
+ value="pretrained_v2/f0G40k.pth",
2007
+ interactive=True,
2008
+ )
2009
+ pretrained_D15 = gr.Textbox(
2010
+ label=i18n("加载预训练底模D路径"),
2011
+ value="pretrained_v2/f0D40k.pth",
2012
+ interactive=True,
2013
+ )
2014
+ gpus16 = gr.Textbox(
2015
+ label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"),
2016
+ value=gpus,
2017
+ interactive=True,
2018
+ )
2019
+ sr2.change(
2020
+ change_sr2,
2021
+ [sr2, if_f0_3, version19],
2022
+ [pretrained_G14, pretrained_D15, version19],
2023
+ )
2024
+ version19.change(
2025
+ change_version19,
2026
+ [sr2, if_f0_3, version19],
2027
+ [pretrained_G14, pretrained_D15],
2028
+ )
2029
+ if_f0_3.change(
2030
+ change_f0,
2031
+ [if_f0_3, sr2, version19],
2032
+ [f0method8, pretrained_G14, pretrained_D15],
2033
+ )
2034
+ but5 = gr.Button(i18n("一键训练"), variant="primary", visible=False)
2035
+ but3.click(
2036
+ click_train,
2037
+ [
2038
+ exp_dir1,
2039
+ sr2,
2040
+ if_f0_3,
2041
+ spk_id5,
2042
+ save_epoch10,
2043
+ total_epoch11,
2044
+ batch_size12,
2045
+ if_save_latest13,
2046
+ pretrained_G14,
2047
+ pretrained_D15,
2048
+ gpus16,
2049
+ if_cache_gpu17,
2050
+ if_save_every_weights18,
2051
+ version19,
2052
+ ],
2053
+ [
2054
+ info3,
2055
+ butstop,
2056
+ but3,
2057
+ ],
2058
+ )
2059
+ but4.click(train_index, [exp_dir1, version19], info3)
2060
+ but5.click(
2061
+ train1key,
2062
+ [
2063
+ exp_dir1,
2064
+ sr2,
2065
+ if_f0_3,
2066
+ trainset_dir4,
2067
+ spk_id5,
2068
+ np7,
2069
+ f0method8,
2070
+ save_epoch10,
2071
+ total_epoch11,
2072
+ batch_size12,
2073
+ if_save_latest13,
2074
+ pretrained_G14,
2075
+ pretrained_D15,
2076
+ gpus16,
2077
+ if_cache_gpu17,
2078
+ if_save_every_weights18,
2079
+ version19,
2080
+ extraction_crepe_hop_length
2081
+ ],
2082
+ info3,
2083
+ )
2084
+
2085
+ else:
2086
+ print(
2087
+ "Pretrained weights not downloaded. Disabling training tab.\n"
2088
+ "Wondering how to train a voice? Visit here for the RVC model training guide: https://t.ly/RVC_Training_Guide\n"
2089
+ "-------------------------------\n"
2090
+ )
2091
+
2092
+ if config.iscolab or config.paperspace: # Share gradio link for colab and paperspace (FORK FEATURE)
2093
+ app.queue(concurrency_count=511, max_size=1022).launch(share=True, quiet=True)
2094
+ else:
2095
+ app.queue(concurrency_count=511, max_size=1022).launch(
2096
+ server_name="0.0.0.0",
2097
+ inbrowser=not config.noautoopen,
2098
+ server_port=config.listen_port,
2099
+ quiet=True,
2100
+ )
2101
+ #endregion
infer-web.py ADDED
The diff for this file is too large to render. See raw diff
 
infer_uvr5.py ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, sys, torch, warnings, pdb
2
+
3
+ now_dir = os.getcwd()
4
+ sys.path.append(now_dir)
5
+ from json import load as ll
6
+
7
+ warnings.filterwarnings("ignore")
8
+ import librosa
9
+ import importlib
10
+ import numpy as np
11
+ import hashlib, math
12
+ from tqdm import tqdm
13
+ from lib.uvr5_pack.lib_v5 import spec_utils
14
+ from lib.uvr5_pack.utils import _get_name_params, inference
15
+ from lib.uvr5_pack.lib_v5.model_param_init import ModelParameters
16
+ import soundfile as sf
17
+ from lib.uvr5_pack.lib_v5.nets_new import CascadedNet
18
+ from lib.uvr5_pack.lib_v5 import nets_61968KB as nets
19
+
20
+
21
+ class _audio_pre_:
22
+ def __init__(self, agg, model_path, device, is_half):
23
+ self.model_path = model_path
24
+ self.device = device
25
+ self.data = {
26
+ # Processing Options
27
+ "postprocess": False,
28
+ "tta": False,
29
+ # Constants
30
+ "window_size": 512,
31
+ "agg": agg,
32
+ "high_end_process": "mirroring",
33
+ }
34
+ mp = ModelParameters("lib/uvr5_pack/lib_v5/modelparams/4band_v2.json")
35
+ model = nets.CascadedASPPNet(mp.param["bins"] * 2)
36
+ cpk = torch.load(model_path, map_location="cpu")
37
+ model.load_state_dict(cpk)
38
+ model.eval()
39
+ if is_half:
40
+ model = model.half().to(device)
41
+ else:
42
+ model = model.to(device)
43
+
44
+ self.mp = mp
45
+ self.model = model
46
+
47
+ def _path_audio_(self, music_file, ins_root=None, vocal_root=None, format="flac"):
48
+ if ins_root is None and vocal_root is None:
49
+ return "No save root."
50
+ name = os.path.basename(music_file)
51
+ if ins_root is not None:
52
+ os.makedirs(ins_root, exist_ok=True)
53
+ if vocal_root is not None:
54
+ os.makedirs(vocal_root, exist_ok=True)
55
+ X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
56
+ bands_n = len(self.mp.param["band"])
57
+ # print(bands_n)
58
+ for d in range(bands_n, 0, -1):
59
+ bp = self.mp.param["band"][d]
60
+ if d == bands_n: # high-end band
61
+ (
62
+ X_wave[d],
63
+ _,
64
+ ) = librosa.core.load( # 理论上librosa读取可能对某些音频有bug,应该上ffmpeg读取,但是太麻烦了弃坑
65
+ music_file,
66
+ bp["sr"],
67
+ False,
68
+ dtype=np.float32,
69
+ res_type=bp["res_type"],
70
+ )
71
+ if X_wave[d].ndim == 1:
72
+ X_wave[d] = np.asfortranarray([X_wave[d], X_wave[d]])
73
+ else: # lower bands
74
+ X_wave[d] = librosa.core.resample(
75
+ X_wave[d + 1],
76
+ self.mp.param["band"][d + 1]["sr"],
77
+ bp["sr"],
78
+ res_type=bp["res_type"],
79
+ )
80
+ # Stft of wave source
81
+ X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(
82
+ X_wave[d],
83
+ bp["hl"],
84
+ bp["n_fft"],
85
+ self.mp.param["mid_side"],
86
+ self.mp.param["mid_side_b2"],
87
+ self.mp.param["reverse"],
88
+ )
89
+ # pdb.set_trace()
90
+ if d == bands_n and self.data["high_end_process"] != "none":
91
+ input_high_end_h = (bp["n_fft"] // 2 - bp["crop_stop"]) + (
92
+ self.mp.param["pre_filter_stop"] - self.mp.param["pre_filter_start"]
93
+ )
94
+ input_high_end = X_spec_s[d][
95
+ :, bp["n_fft"] // 2 - input_high_end_h : bp["n_fft"] // 2, :
96
+ ]
97
+
98
+ X_spec_m = spec_utils.combine_spectrograms(X_spec_s, self.mp)
99
+ aggresive_set = float(self.data["agg"] / 100)
100
+ aggressiveness = {
101
+ "value": aggresive_set,
102
+ "split_bin": self.mp.param["band"][1]["crop_stop"],
103
+ }
104
+ with torch.no_grad():
105
+ pred, X_mag, X_phase = inference(
106
+ X_spec_m, self.device, self.model, aggressiveness, self.data
107
+ )
108
+ # Postprocess
109
+ if self.data["postprocess"]:
110
+ pred_inv = np.clip(X_mag - pred, 0, np.inf)
111
+ pred = spec_utils.mask_silence(pred, pred_inv)
112
+ y_spec_m = pred * X_phase
113
+ v_spec_m = X_spec_m - y_spec_m
114
+
115
+ if ins_root is not None:
116
+ if self.data["high_end_process"].startswith("mirroring"):
117
+ input_high_end_ = spec_utils.mirroring(
118
+ self.data["high_end_process"], y_spec_m, input_high_end, self.mp
119
+ )
120
+ wav_instrument = spec_utils.cmb_spectrogram_to_wave(
121
+ y_spec_m, self.mp, input_high_end_h, input_high_end_
122
+ )
123
+ else:
124
+ wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
125
+ print("%s instruments done" % name)
126
+ if format in ["wav", "flac"]:
127
+ sf.write(
128
+ os.path.join(
129
+ ins_root,
130
+ "instrument_{}_{}.{}".format(name, self.data["agg"], format),
131
+ ),
132
+ (np.array(wav_instrument) * 32768).astype("int16"),
133
+ self.mp.param["sr"],
134
+ ) #
135
+ else:
136
+ path = os.path.join(
137
+ ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"])
138
+ )
139
+ sf.write(
140
+ path,
141
+ (np.array(wav_instrument) * 32768).astype("int16"),
142
+ self.mp.param["sr"],
143
+ )
144
+ if os.path.exists(path):
145
+ os.system(
146
+ "ffmpeg -i %s -vn %s -q:a 2 -y"
147
+ % (path, path[:-4] + ".%s" % format)
148
+ )
149
+ if vocal_root is not None:
150
+ if self.data["high_end_process"].startswith("mirroring"):
151
+ input_high_end_ = spec_utils.mirroring(
152
+ self.data["high_end_process"], v_spec_m, input_high_end, self.mp
153
+ )
154
+ wav_vocals = spec_utils.cmb_spectrogram_to_wave(
155
+ v_spec_m, self.mp, input_high_end_h, input_high_end_
156
+ )
157
+ else:
158
+ wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
159
+ print("%s vocals done" % name)
160
+ if format in ["wav", "flac"]:
161
+ sf.write(
162
+ os.path.join(
163
+ vocal_root,
164
+ "vocal_{}_{}.{}".format(name, self.data["agg"], format),
165
+ ),
166
+ (np.array(wav_vocals) * 32768).astype("int16"),
167
+ self.mp.param["sr"],
168
+ )
169
+ else:
170
+ path = os.path.join(
171
+ vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"])
172
+ )
173
+ sf.write(
174
+ path,
175
+ (np.array(wav_vocals) * 32768).astype("int16"),
176
+ self.mp.param["sr"],
177
+ )
178
+ if os.path.exists(path):
179
+ os.system(
180
+ "ffmpeg -i %s -vn %s -q:a 2 -y"
181
+ % (path, path[:-4] + ".%s" % format)
182
+ )
183
+
184
+
185
+ class _audio_pre_new:
186
+ def __init__(self, agg, model_path, device, is_half):
187
+ self.model_path = model_path
188
+ self.device = device
189
+ self.data = {
190
+ # Processing Options
191
+ "postprocess": False,
192
+ "tta": False,
193
+ # Constants
194
+ "window_size": 512,
195
+ "agg": agg,
196
+ "high_end_process": "mirroring",
197
+ }
198
+ mp = ModelParameters("lib/uvr5_pack/lib_v5/modelparams/4band_v3.json")
199
+ nout = 64 if "DeReverb" in model_path else 48
200
+ model = CascadedNet(mp.param["bins"] * 2, nout)
201
+ cpk = torch.load(model_path, map_location="cpu")
202
+ model.load_state_dict(cpk)
203
+ model.eval()
204
+ if is_half:
205
+ model = model.half().to(device)
206
+ else:
207
+ model = model.to(device)
208
+
209
+ self.mp = mp
210
+ self.model = model
211
+
212
+ def _path_audio_(
213
+ self, music_file, vocal_root=None, ins_root=None, format="flac"
214
+ ): # 3个VR模型vocal和ins是反的
215
+ if ins_root is None and vocal_root is None:
216
+ return "No save root."
217
+ name = os.path.basename(music_file)
218
+ if ins_root is not None:
219
+ os.makedirs(ins_root, exist_ok=True)
220
+ if vocal_root is not None:
221
+ os.makedirs(vocal_root, exist_ok=True)
222
+ X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {}
223
+ bands_n = len(self.mp.param["band"])
224
+ # print(bands_n)
225
+ for d in range(bands_n, 0, -1):
226
+ bp = self.mp.param["band"][d]
227
+ if d == bands_n: # high-end band
228
+ (
229
+ X_wave[d],
230
+ _,
231
+ ) = librosa.core.load( # 理论上librosa读取可能对某些音频有bug,应该上ffmpeg读取,但是太麻烦了弃坑
232
+ music_file,
233
+ bp["sr"],
234
+ False,
235
+ dtype=np.float32,
236
+ res_type=bp["res_type"],
237
+ )
238
+ if X_wave[d].ndim == 1:
239
+ X_wave[d] = np.asfortranarray([X_wave[d], X_wave[d]])
240
+ else: # lower bands
241
+ X_wave[d] = librosa.core.resample(
242
+ X_wave[d + 1],
243
+ self.mp.param["band"][d + 1]["sr"],
244
+ bp["sr"],
245
+ res_type=bp["res_type"],
246
+ )
247
+ # Stft of wave source
248
+ X_spec_s[d] = spec_utils.wave_to_spectrogram_mt(
249
+ X_wave[d],
250
+ bp["hl"],
251
+ bp["n_fft"],
252
+ self.mp.param["mid_side"],
253
+ self.mp.param["mid_side_b2"],
254
+ self.mp.param["reverse"],
255
+ )
256
+ # pdb.set_trace()
257
+ if d == bands_n and self.data["high_end_process"] != "none":
258
+ input_high_end_h = (bp["n_fft"] // 2 - bp["crop_stop"]) + (
259
+ self.mp.param["pre_filter_stop"] - self.mp.param["pre_filter_start"]
260
+ )
261
+ input_high_end = X_spec_s[d][
262
+ :, bp["n_fft"] // 2 - input_high_end_h : bp["n_fft"] // 2, :
263
+ ]
264
+
265
+ X_spec_m = spec_utils.combine_spectrograms(X_spec_s, self.mp)
266
+ aggresive_set = float(self.data["agg"] / 100)
267
+ aggressiveness = {
268
+ "value": aggresive_set,
269
+ "split_bin": self.mp.param["band"][1]["crop_stop"],
270
+ }
271
+ with torch.no_grad():
272
+ pred, X_mag, X_phase = inference(
273
+ X_spec_m, self.device, self.model, aggressiveness, self.data
274
+ )
275
+ # Postprocess
276
+ if self.data["postprocess"]:
277
+ pred_inv = np.clip(X_mag - pred, 0, np.inf)
278
+ pred = spec_utils.mask_silence(pred, pred_inv)
279
+ y_spec_m = pred * X_phase
280
+ v_spec_m = X_spec_m - y_spec_m
281
+
282
+ if ins_root is not None:
283
+ if self.data["high_end_process"].startswith("mirroring"):
284
+ input_high_end_ = spec_utils.mirroring(
285
+ self.data["high_end_process"], y_spec_m, input_high_end, self.mp
286
+ )
287
+ wav_instrument = spec_utils.cmb_spectrogram_to_wave(
288
+ y_spec_m, self.mp, input_high_end_h, input_high_end_
289
+ )
290
+ else:
291
+ wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp)
292
+ print("%s instruments done" % name)
293
+ if format in ["wav", "flac"]:
294
+ sf.write(
295
+ os.path.join(
296
+ ins_root,
297
+ "instrument_{}_{}.{}".format(name, self.data["agg"], format),
298
+ ),
299
+ (np.array(wav_instrument) * 32768).astype("int16"),
300
+ self.mp.param["sr"],
301
+ ) #
302
+ else:
303
+ path = os.path.join(
304
+ ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"])
305
+ )
306
+ sf.write(
307
+ path,
308
+ (np.array(wav_instrument) * 32768).astype("int16"),
309
+ self.mp.param["sr"],
310
+ )
311
+ if os.path.exists(path):
312
+ os.system(
313
+ "ffmpeg -i %s -vn %s -q:a 2 -y"
314
+ % (path, path[:-4] + ".%s" % format)
315
+ )
316
+ if vocal_root is not None:
317
+ if self.data["high_end_process"].startswith("mirroring"):
318
+ input_high_end_ = spec_utils.mirroring(
319
+ self.data["high_end_process"], v_spec_m, input_high_end, self.mp
320
+ )
321
+ wav_vocals = spec_utils.cmb_spectrogram_to_wave(
322
+ v_spec_m, self.mp, input_high_end_h, input_high_end_
323
+ )
324
+ else:
325
+ wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp)
326
+ print("%s vocals done" % name)
327
+ if format in ["wav", "flac"]:
328
+ sf.write(
329
+ os.path.join(
330
+ vocal_root,
331
+ "vocal_{}_{}.{}".format(name, self.data["agg"], format),
332
+ ),
333
+ (np.array(wav_vocals) * 32768).astype("int16"),
334
+ self.mp.param["sr"],
335
+ )
336
+ else:
337
+ path = os.path.join(
338
+ vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"])
339
+ )
340
+ sf.write(
341
+ path,
342
+ (np.array(wav_vocals) * 32768).astype("int16"),
343
+ self.mp.param["sr"],
344
+ )
345
+ if os.path.exists(path):
346
+ os.system(
347
+ "ffmpeg -i %s -vn %s -q:a 2 -y"
348
+ % (path, path[:-4] + ".%s" % format)
349
+ )
350
+
351
+
352
+ if __name__ == "__main__":
353
+ device = "cuda"
354
+ is_half = True
355
+ # model_path = "uvr5_weights/2_HP-UVR.pth"
356
+ # model_path = "uvr5_weights/VR-DeEchoDeReverb.pth"
357
+ # model_path = "uvr5_weights/VR-DeEchoNormal.pth"
358
+ model_path = "uvr5_weights/DeEchoNormal.pth"
359
+ # pre_fun = _audio_pre_(model_path=model_path, device=device, is_half=True,agg=10)
360
+ pre_fun = _audio_pre_new(model_path=model_path, device=device, is_half=True, agg=10)
361
+ audio_path = "雪雪伴奏对消HP5.wav"
362
+ save_path = "opt"
363
+ pre_fun._path_audio_(audio_path, save_path, save_path)