Vijish commited on
Commit
42e91c3
1 Parent(s): dc3db46

Update voice_processing.py

Browse files
Files changed (1) hide show
  1. voice_processing.py +23 -4
voice_processing.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import datetime
2
  import logging
3
  import os
@@ -33,7 +34,7 @@ limitation = os.getenv("SYSTEM") == "spaces"
33
  config = Config()
34
 
35
  # Edge TTS
36
- tts_voice_list = edge_tts.list_voices()
37
  tts_voices = ["mn-MN-BataaNeural", "mn-MN-YesuiNeural"] # Specific voices
38
 
39
  # RVC models
@@ -117,7 +118,14 @@ def get_model_names():
117
  model_root = "weights" # Assuming this is where your models are stored
118
  return [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
119
 
120
- def tts(model_name, tts_text, tts_voice, index_rate, use_uploaded_voice, uploaded_voice):
 
 
 
 
 
 
 
121
  # Default values for parameters used in EdgeTTS
122
  speed = 0 # Default speech speed
123
  f0_up_key = 0 # Default pitch adjustment
@@ -130,6 +138,7 @@ def tts(model_name, tts_text, tts_voice, index_rate, use_uploaded_voice, uploade
130
 
131
  edge_output_filename = get_unique_filename("mp3")
132
 
 
133
  try:
134
  if use_uploaded_voice:
135
  if uploaded_voice is None:
@@ -140,6 +149,7 @@ def tts(model_name, tts_text, tts_voice, index_rate, use_uploaded_voice, uploade
140
  tmp_file.write(uploaded_voice)
141
  uploaded_file_path = tmp_file.name
142
 
 
143
  audio, sr = librosa.load(uploaded_file_path, sr=16000, mono=True)
144
  else:
145
  # EdgeTTS processing
@@ -153,7 +163,9 @@ def tts(model_name, tts_text, tts_voice, index_rate, use_uploaded_voice, uploade
153
  # Invoke Edge TTS
154
  t0 = time.time()
155
  speed_str = f"+{speed}%" if speed >= 0 else f"{speed}%"
156
- edge_tts.Communicate(tts_text, tts_voice, rate=speed_str).save(edge_output_filename)
 
 
157
  t1 = time.time()
158
  edge_time = t1 - t0
159
 
@@ -212,7 +224,9 @@ def tts(model_name, tts_text, tts_voice, index_rate, use_uploaded_voice, uploade
212
  )
213
 
214
  except EOFError:
215
- info = "Output not valid. This may occur when input text and speaker do not match."
 
 
216
  print(info)
217
  return info, None, None
218
  except Exception as e:
@@ -220,10 +234,15 @@ def tts(model_name, tts_text, tts_voice, index_rate, use_uploaded_voice, uploade
220
  print(traceback_info)
221
  return str(e), None, None
222
 
 
223
  voice_mapping = {
224
  "Mongolian Male": "mn-MN-BataaNeural",
225
  "Mongolian Female": "mn-MN-YesuiNeural"
226
  }
227
 
 
 
228
  hubert_model = load_hubert()
 
229
  rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)
 
 
1
+ import asyncio
2
  import datetime
3
  import logging
4
  import os
 
34
  config = Config()
35
 
36
  # Edge TTS
37
+ tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
38
  tts_voices = ["mn-MN-BataaNeural", "mn-MN-YesuiNeural"] # Specific voices
39
 
40
  # RVC models
 
118
  model_root = "weights" # Assuming this is where your models are stored
119
  return [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
120
 
121
+ async def tts(
122
+ model_name,
123
+ tts_text,
124
+ tts_voice,
125
+ index_rate,
126
+ use_uploaded_voice,
127
+ uploaded_voice,
128
+ ):
129
  # Default values for parameters used in EdgeTTS
130
  speed = 0 # Default speech speed
131
  f0_up_key = 0 # Default pitch adjustment
 
138
 
139
  edge_output_filename = get_unique_filename("mp3")
140
 
141
+
142
  try:
143
  if use_uploaded_voice:
144
  if uploaded_voice is None:
 
149
  tmp_file.write(uploaded_voice)
150
  uploaded_file_path = tmp_file.name
151
 
152
+ #uploaded_file_path = uploaded_voice.name
153
  audio, sr = librosa.load(uploaded_file_path, sr=16000, mono=True)
154
  else:
155
  # EdgeTTS processing
 
163
  # Invoke Edge TTS
164
  t0 = time.time()
165
  speed_str = f"+{speed}%" if speed >= 0 else f"{speed}%"
166
+ await edge_tts.Communicate(
167
+ tts_text, tts_voice, rate=speed_str
168
+ ).save(edge_output_filename)
169
  t1 = time.time()
170
  edge_time = t1 - t0
171
 
 
224
  )
225
 
226
  except EOFError:
227
+ info = (
228
+ "output not valid. This may occur when input text and speaker do not match."
229
+ )
230
  print(info)
231
  return info, None, None
232
  except Exception as e:
 
234
  print(traceback_info)
235
  return str(e), None, None
236
 
237
+
238
  voice_mapping = {
239
  "Mongolian Male": "mn-MN-BataaNeural",
240
  "Mongolian Female": "mn-MN-YesuiNeural"
241
  }
242
 
243
+
244
+
245
  hubert_model = load_hubert()
246
+
247
  rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)
248
+