Update voice_processing.py
Browse files- voice_processing.py +3 -22
voice_processing.py
CHANGED
@@ -118,14 +118,7 @@ def get_model_names():
|
|
118 |
model_root = "weights" # Assuming this is where your models are stored
|
119 |
return [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
|
120 |
|
121 |
-
|
122 |
-
model_name,
|
123 |
-
tts_text,
|
124 |
-
tts_voice,
|
125 |
-
index_rate,
|
126 |
-
use_uploaded_voice,
|
127 |
-
uploaded_voice,
|
128 |
-
):
|
129 |
# Default values for parameters used in EdgeTTS
|
130 |
speed = 0 # Default speech speed
|
131 |
f0_up_key = 0 # Default pitch adjustment
|
@@ -138,7 +131,6 @@ async def tts(
|
|
138 |
|
139 |
edge_output_filename = get_unique_filename("mp3")
|
140 |
|
141 |
-
|
142 |
try:
|
143 |
if use_uploaded_voice:
|
144 |
if uploaded_voice is None:
|
@@ -149,7 +141,6 @@ async def tts(
|
|
149 |
tmp_file.write(uploaded_voice)
|
150 |
uploaded_file_path = tmp_file.name
|
151 |
|
152 |
-
#uploaded_file_path = uploaded_voice.name
|
153 |
audio, sr = librosa.load(uploaded_file_path, sr=16000, mono=True)
|
154 |
else:
|
155 |
# EdgeTTS processing
|
@@ -163,9 +154,7 @@ async def tts(
|
|
163 |
# Invoke Edge TTS
|
164 |
t0 = time.time()
|
165 |
speed_str = f"+{speed}%" if speed >= 0 else f"{speed}%"
|
166 |
-
|
167 |
-
tts_text, tts_voice, rate=speed_str
|
168 |
-
).save(edge_output_filename)
|
169 |
t1 = time.time()
|
170 |
edge_time = t1 - t0
|
171 |
|
@@ -224,9 +213,7 @@ async def tts(
|
|
224 |
)
|
225 |
|
226 |
except EOFError:
|
227 |
-
info =
|
228 |
-
"output not valid. This may occur when input text and speaker do not match."
|
229 |
-
)
|
230 |
print(info)
|
231 |
return info, None, None
|
232 |
except Exception as e:
|
@@ -234,19 +221,13 @@ async def tts(
|
|
234 |
print(traceback_info)
|
235 |
return str(e), None, None
|
236 |
|
237 |
-
|
238 |
voice_mapping = {
|
239 |
"Mongolian Male": "mn-MN-BataaNeural",
|
240 |
"Mongolian Female": "mn-MN-YesuiNeural"
|
241 |
}
|
242 |
|
243 |
-
|
244 |
-
|
245 |
hubert_model = load_hubert()
|
246 |
-
|
247 |
rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)
|
248 |
|
249 |
|
250 |
|
251 |
-
|
252 |
-
|
|
|
118 |
model_root = "weights" # Assuming this is where your models are stored
|
119 |
return [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
|
120 |
|
121 |
+
def tts(model_name, tts_text, tts_voice, index_rate, use_uploaded_voice, uploaded_voice):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
# Default values for parameters used in EdgeTTS
|
123 |
speed = 0 # Default speech speed
|
124 |
f0_up_key = 0 # Default pitch adjustment
|
|
|
131 |
|
132 |
edge_output_filename = get_unique_filename("mp3")
|
133 |
|
|
|
134 |
try:
|
135 |
if use_uploaded_voice:
|
136 |
if uploaded_voice is None:
|
|
|
141 |
tmp_file.write(uploaded_voice)
|
142 |
uploaded_file_path = tmp_file.name
|
143 |
|
|
|
144 |
audio, sr = librosa.load(uploaded_file_path, sr=16000, mono=True)
|
145 |
else:
|
146 |
# EdgeTTS processing
|
|
|
154 |
# Invoke Edge TTS
|
155 |
t0 = time.time()
|
156 |
speed_str = f"+{speed}%" if speed >= 0 else f"{speed}%"
|
157 |
+
edge_tts.Communicate(tts_text, tts_voice, rate=speed_str).save(edge_output_filename)
|
|
|
|
|
158 |
t1 = time.time()
|
159 |
edge_time = t1 - t0
|
160 |
|
|
|
213 |
)
|
214 |
|
215 |
except EOFError:
|
216 |
+
info = "Output not valid. This may occur when input text and speaker do not match."
|
|
|
|
|
217 |
print(info)
|
218 |
return info, None, None
|
219 |
except Exception as e:
|
|
|
221 |
print(traceback_info)
|
222 |
return str(e), None, None
|
223 |
|
|
|
224 |
voice_mapping = {
|
225 |
"Mongolian Male": "mn-MN-BataaNeural",
|
226 |
"Mongolian Female": "mn-MN-YesuiNeural"
|
227 |
}
|
228 |
|
|
|
|
|
229 |
hubert_model = load_hubert()
|
|
|
230 |
rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)
|
231 |
|
232 |
|
233 |
|
|
|
|