RUI-LONG commited on
Commit
c4b86ad
·
1 Parent(s): 387a00b
Files changed (2) hide show
  1. app.py +38 -4
  2. requirements.txt +3 -1
app.py CHANGED
@@ -8,6 +8,9 @@ import traceback
8
  import edge_tts
9
  import gradio as gr
10
  import librosa
 
 
 
11
 
12
  from src.rmvpe import RMVPE
13
  from model_loader import ModelLoader
@@ -39,9 +42,34 @@ rmvpe_model = RMVPE(
39
 
40
  model_loader.load("char2")
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  def tts(
44
- rvc,
 
45
  speed,
46
  pitch,
47
  tts_text,
@@ -61,6 +89,8 @@ def tts(
61
  print(f"tts_voice: {tts_voice}")
62
  print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
63
 
 
 
64
  try:
65
  if limitation and len(tts_text) > 280:
66
  print("Error: Text too long")
@@ -86,9 +116,11 @@ def tts(
86
  )
87
  t1 = time.time()
88
  edge_time = t1 - t0
89
- # with open(edge_output_filename, "rb") as f:
90
- # audio_opt = f.read()
91
  if not rvc:
 
 
 
92
  info = f"Success. Time: edge-tts: {edge_time}s"
93
  print(info)
94
  return (
@@ -206,7 +238,8 @@ with app:
206
  label="Input Text",
207
  value="I'm Never Gonna Give You Up",
208
  )
209
- rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=True)
 
210
  with gr.Column():
211
  but0 = gr.Button("Convert", variant="primary")
212
  info_text = gr.Textbox(label="Output info")
@@ -216,6 +249,7 @@ with app:
216
  tts,
217
  [
218
  rvc,
 
219
  speed,
220
  pitch,
221
  tts_text,
 
8
  import edge_tts
9
  import gradio as gr
10
  import librosa
11
+ import numpy as np
12
+ from pydub import AudioSegment
13
+ from scipy.io import wavfile
14
 
15
  from src.rmvpe import RMVPE
16
  from model_loader import ModelLoader
 
42
 
43
  model_loader.load("char2")
44
 
45
+ def add_robotic_effect(mp3_path):
46
+ audio = AudioSegment.from_mp3(mp3_path)
47
+
48
+ # Convert to numpy array
49
+ data = np.array(audio.get_array_of_samples())
50
+ sample_rate = audio.frame_rate
51
+
52
+ # If stereo, average the channels to mono
53
+ if audio.channels == 2:
54
+ data = data.reshape((-1, 2)).mean(axis=1).astype(np.int16)
55
+
56
+ # Apply delay effect
57
+ delay = 0.05
58
+ alpha = 0.55
59
+ delay_samples = int(delay * sample_rate)
60
+
61
+ delayed_data = np.zeros_like(data)
62
+ delayed_data[delay_samples:] = data[:-delay_samples] * alpha
63
+ delayed_data += data
64
+
65
+ # Clip the values to int16 range
66
+ delayed_data = np.clip(delayed_data, -32768, 32767)
67
+ wavfile.write("processed.wav", sample_rate, delayed_data.astype(np.int16))
68
+ return "processed.wav"
69
 
70
  def tts(
71
+ rvc,
72
+ effect,
73
  speed,
74
  pitch,
75
  tts_text,
 
89
  print(f"tts_voice: {tts_voice}")
90
  print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
91
 
92
+ edge_output_filename = "edge_output.mp3"
93
+
94
  try:
95
  if limitation and len(tts_text) > 280:
96
  print("Error: Text too long")
 
116
  )
117
  t1 = time.time()
118
  edge_time = t1 - t0
119
+
 
120
  if not rvc:
121
+ if effect:
122
+ edge_output_filename = add_robotic_effect(edge_output_filename)
123
+
124
  info = f"Success. Time: edge-tts: {edge_time}s"
125
  print(info)
126
  return (
 
238
  label="Input Text",
239
  value="I'm Never Gonna Give You Up",
240
  )
241
+ rvc = gr.Checkbox(label="Transform Voice", info="Would you like to apply voice transformation? Check means yes", value=False)
242
+ effect = gr.Checkbox(label="Add Effect", info="Would you like to apply Effect?", value=True)
243
  with gr.Column():
244
  but0 = gr.Button("Convert", variant="primary")
245
  info_text = gr.Textbox(label="Output info")
 
249
  tts,
250
  [
251
  rvc,
252
+ effect,
253
  speed,
254
  pitch,
255
  tts_text,
requirements.txt CHANGED
@@ -8,4 +8,6 @@ pyworld==0.3.4
8
  torchcrepe==0.0.21
9
  scikit-learn==1.3.0
10
  gradio
11
- gradio_client
 
 
 
8
  torchcrepe==0.0.21
9
  scikit-learn==1.3.0
10
  gradio
11
+ gradio_client
12
+ pydub
13
+ scipy