guetLzy commited on
Commit
084bb16
·
verified ·
1 Parent(s): 04be048

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -12
app.py CHANGED
@@ -40,7 +40,6 @@ def download_models():
40
  os.makedirs("model", exist_ok=True)
41
  os.makedirs("hubert/chinese-hubert-large-fairseq-ckpt", exist_ok=True)
42
 
43
- # 下载FreeVC模型
44
  freevc_paths = {}
45
  for model_name, model_path in MODEL_OPTIONS.items():
46
  path = hf_hub_download(
@@ -51,7 +50,6 @@ def download_models():
51
  )
52
  freevc_paths[model_name] = path
53
 
54
- # 下载整个HuBERT仓库
55
  hubert_dir = "hubert/chinese-hubert-large-fairseq-ckpt"
56
  snapshot_download(
57
  repo_id=MODEL_CONFIG["hubert"]["repo_id"],
@@ -66,15 +64,17 @@ def download_models():
66
  "hubert": hubert_paths
67
  }
68
 
69
- def load_hubert(hubert_dir):
70
  """加载HuBERT模型(使用fairseq格式的检查点)"""
 
71
  logger.info("正在加载 HuBERT 模型...")
72
  model = HubertModel.from_pretrained(hubert_dir)
73
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(hubert_dir)
74
  return model.to(device).float().eval(), feature_extractor
75
 
76
- def load_freevc(model_path):
77
  """加载FreeVC模型(使用本地配置文件)"""
 
78
  logger.info(f"正在从 {model_path} 加载 FreeVC 模型...")
79
  hps = utils.get_hparams_from_file("configs/freevc.json")
80
 
@@ -91,28 +91,36 @@ def load_freevc(model_path):
91
  return net_g, smodel, hps
92
 
93
  # 预加载模型
 
94
  logger.info("正在下载模型...")
95
  model_paths = download_models()
 
96
  logger.info(f"模型路径: {model_paths}")
 
97
  logger.info("正在初始化 HuBERT...")
98
  hubert_dir = "hubert/chinese-hubert-large-fairseq-ckpt"
99
- hubert_model, hubert_feature_extractor = load_hubert(hubert_dir)
100
 
101
  def voice_conversion(src_audio, tgt_audio, output_name, model_selection):
102
  """执行语音转换"""
 
 
103
  try:
104
  # 加载选中的FreeVC模型
105
- freevc_model, speaker_model, hps = load_freevc(MODEL_OPTIONS[model_selection])
106
 
107
  with torch.no_grad():
108
  # 处理目标音频
 
109
  wav_tgt, _ = librosa.load(tgt_audio, sr=hps.data.sampling_rate)
110
  wav_tgt, _ = librosa.effects.trim(wav_tgt, top_db=20)
111
 
112
  if hps.model.use_spk:
 
113
  g_tgt = speaker_model.embed_utterance(wav_tgt)
114
  g_tgt = torch.from_numpy(g_tgt).unsqueeze(0).to(device)
115
  else:
 
116
  wav_tgt = torch.from_numpy(wav_tgt).unsqueeze(0).to(device)
117
  mel_tgt = mel_spectrogram_torch(
118
  wav_tgt,
@@ -125,29 +133,35 @@ def voice_conversion(src_audio, tgt_audio, output_name, model_selection):
125
  hps.data.mel_fmax
126
  )
127
 
128
- # 处理源音频(HuBERT需要16kHz)
129
- wav_src, _ = librosa.load(src_audio, sr=16000)
 
130
  inputs = hubert_feature_extractor(
131
  wav_src,
132
  return_tensors="pt",
133
  sampling_rate=16_000
134
  ).input_values.to(device)
135
 
 
136
  c = hubert_model(inputs.float()).last_hidden_state.transpose(1, 2)
137
 
138
  # 执行转换
 
139
  audio = freevc_model.infer(c, g=g_tgt) if hps.model.use_spk else freevc_model.infer(c, mel=mel_tgt)
140
 
141
  # 保存结果
 
142
  os.makedirs("output", exist_ok=True)
143
  output_path = f"output/{output_name}.wav"
144
  write(output_path, hps.data.sampling_rate, audio[0][0].data.cpu().float().numpy())
145
 
146
- return output_path, "转换完成"
 
147
 
148
  except Exception as e:
149
  logger.error(f"转换错误: {str(e)}")
150
- return None, f"转换失败: {str(e)}"
 
151
 
152
  # Gradio界面
153
  with gr.Blocks(title="Chinese-FreeVC 语音转换") as app:
@@ -157,13 +171,13 @@ with gr.Blocks(title="Chinese-FreeVC 语音转换") as app:
157
  with gr.Column():
158
  src_input = gr.Audio(label="源语音", type="filepath")
159
  tgt_input = gr.Audio(label="目标音色", type="filepath")
160
- with gr.Row(): # 将输出文件名和模型选择放在同一排
161
- output_name = gr.Textbox(label="输出文件名", value="converted")
162
  model_dropdown = gr.Dropdown(
163
  choices=list(MODEL_OPTIONS.keys()),
164
  label="选择模型",
165
  value="Model_17000"
166
  )
 
167
  convert_btn = gr.Button("开始转换", variant="primary")
168
 
169
  with gr.Column():
 
40
  os.makedirs("model", exist_ok=True)
41
  os.makedirs("hubert/chinese-hubert-large-fairseq-ckpt", exist_ok=True)
42
 
 
43
  freevc_paths = {}
44
  for model_name, model_path in MODEL_OPTIONS.items():
45
  path = hf_hub_download(
 
50
  )
51
  freevc_paths[model_name] = path
52
 
 
53
  hubert_dir = "hubert/chinese-hubert-large-fairseq-ckpt"
54
  snapshot_download(
55
  repo_id=MODEL_CONFIG["hubert"]["repo_id"],
 
64
  "hubert": hubert_paths
65
  }
66
 
67
+ def load_hubert(hubert_dir, status_list):
68
  """加载HuBERT模型(使用fairseq格式的检查点)"""
69
+ status_list.append("正在加载 HuBERT 模型...")
70
  logger.info("正在加载 HuBERT 模型...")
71
  model = HubertModel.from_pretrained(hubert_dir)
72
  feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(hubert_dir)
73
  return model.to(device).float().eval(), feature_extractor
74
 
75
+ def load_freevc(model_path, status_list):
76
  """加载FreeVC模型(使用本地配置文件)"""
77
+ status_list.append(f"正在从 {model_path} 加载 FreeVC 模型...")
78
  logger.info(f"正在从 {model_path} 加载 FreeVC 模型...")
79
  hps = utils.get_hparams_from_file("configs/freevc.json")
80
 
 
91
  return net_g, smodel, hps
92
 
93
  # 预加载模型
94
+ status_list = ["正在下载模型..."]
95
  logger.info("正在下载模型...")
96
  model_paths = download_models()
97
+ status_list.append(f"模型路径: {model_paths}")
98
  logger.info(f"模型路径: {model_paths}")
99
+ status_list.append("正在初始化 HuBERT...")
100
  logger.info("正在初始化 HuBERT...")
101
  hubert_dir = "hubert/chinese-hubert-large-fairseq-ckpt"
102
+ hubert_model, hubert_feature_extractor = load_hubert(hubert_dir, status_list)
103
 
104
  def voice_conversion(src_audio, tgt_audio, output_name, model_selection):
105
  """执行语音转换"""
106
+ status_list = ["开始语音转换..."]
107
+
108
  try:
109
  # 加载选中的FreeVC模型
110
+ freevc_model, speaker_model, hps = load_freevc(MODEL_OPTIONS[model_selection], status_list)
111
 
112
  with torch.no_grad():
113
  # 处理目标音频
114
+ status_list.append("处理目标音频...")
115
  wav_tgt, _ = librosa.load(tgt_audio, sr=hps.data.sampling_rate)
116
  wav_tgt, _ = librosa.effects.trim(wav_tgt, top_db=20)
117
 
118
  if hps.model.use_spk:
119
+ status_list.append("提取目标音色特征(使用说话人编码器)...")
120
  g_tgt = speaker_model.embed_utterance(wav_tgt)
121
  g_tgt = torch.from_numpy(g_tgt).unsqueeze(0).to(device)
122
  else:
123
+ status_list.append("生成目标音频 Mel 频谱图...")
124
  wav_tgt = torch.from_numpy(wav_tgt).unsqueeze(0).to(device)
125
  mel_tgt = mel_spectrogram_torch(
126
  wav_tgt,
 
133
  hps.data.mel_fmax
134
  )
135
 
136
+ # 处理源音频
137
+ status_list.append("处理源音频(转换为16kHz)...")
138
+ wav_src, _ = librosa.load(src_audio, sr=16_000)
139
  inputs = hubert_feature_extractor(
140
  wav_src,
141
  return_tensors="pt",
142
  sampling_rate=16_000
143
  ).input_values.to(device)
144
 
145
+ status_list.append("提取源音频特征...")
146
  c = hubert_model(inputs.float()).last_hidden_state.transpose(1, 2)
147
 
148
  # 执行转换
149
+ status_list.append("执行语音转换...")
150
  audio = freevc_model.infer(c, g=g_tgt) if hps.model.use_spk else freevc_model.infer(c, mel=mel_tgt)
151
 
152
  # 保存结果
153
+ status_list.append("保存转换结果...")
154
  os.makedirs("output", exist_ok=True)
155
  output_path = f"output/{output_name}.wav"
156
  write(output_path, hps.data.sampling_rate, audio[0][0].data.cpu().float().numpy())
157
 
158
+ status_list.append("转换完成")
159
+ return output_path, "\n".join(status_list)
160
 
161
  except Exception as e:
162
  logger.error(f"转换错误: {str(e)}")
163
+ status_list.append(f"转换失败: {str(e)}")
164
+ return None, "\n".join(status_list)
165
 
166
  # Gradio界面
167
  with gr.Blocks(title="Chinese-FreeVC 语音转换") as app:
 
171
  with gr.Column():
172
  src_input = gr.Audio(label="源语音", type="filepath")
173
  tgt_input = gr.Audio(label="目标音色", type="filepath")
174
+ with gr.Row(): # 输出文件名和模型选择在同一排
 
175
  model_dropdown = gr.Dropdown(
176
  choices=list(MODEL_OPTIONS.keys()),
177
  label="选择模型",
178
  value="Model_17000"
179
  )
180
+ output_name = gr.Textbox(label="输出文件名", value="converted")
181
  convert_btn = gr.Button("开始转换", variant="primary")
182
 
183
  with gr.Column():