Spaces:
Running
Running
File size: 12,627 Bytes
6962fb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
global character_name
import os, json
import sys
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from classic_inference_core import get_tts_wav, get_streaming_tts_wav, change_sovits_weights, change_gpt_weights
print("您正在使用经典推理模式,不支持并行推理。\n如果您不希望使用,请去调节config.json文件中的classic_inference参数为false。")
def load_infer_config(character_path):
config_path = os.path.join(character_path, "infer_config.json")
"""加载环境配置文件"""
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
if config.get("ref_wav_path") is not None:
return update_config_version(character_path)
return config
import os
import json
# 取得模型文件夹路径
global models_path
models_path = "trained"
config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "config.json")
if os.path.exists(config_path):
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
models_path = config.get("models_path", "trained")
def remove_character_path(full_path,character_path):
# 从full_path中移除character_path部分
relative_path = full_path.replace(character_path, '')
# 如果relative_path以路径分隔符开头,去除它
if relative_path.startswith(os.path.sep):
relative_path = relative_path[len(os.path.sep):]
return relative_path
def update_config_version(character_path):
config_path = os.path.join(character_path, "infer_config.json")
try:
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
print("正在更新文件")
if config.get("ref_wav_path") is not None:
config["emotion_list"] = {
"default": {
"ref_wav_path": remove_character_path(config["ref_wav_path"],character_path),
"prompt_text": config["prompt_text"],
"prompt_language": config["prompt_language"]
}
}
config.pop("ref_wav_path", None)
config.pop("prompt_text", None)
config.pop("prompt_language", None)
config["sovits_path"] = remove_character_path(config["sovits_path"],character_path)
config["gpt_path"] = remove_character_path(config["gpt_path"],character_path)
with open(config_path, 'w', encoding='utf-8') as f:
json.dump(config, f, ensure_ascii=False, indent=4)
return config
except:
raise Exception("更新失败!请手动删除infer_config.json文件,让系统自动生成")
def auto_generate_infer_config(character_path):
## TODO: Auto-generate wav-list and prompt-list from character_path
##
# Initialize variables for file detection
print(f"正在自动生成配置文件: {character_path}")
ckpt_file_found = None
pth_file_found = None
wav_file_found = None
# Iterate through files in character_path to find matching file types
for dirpath, dirnames, filenames in os.walk(character_path):
for file in filenames:
# 构建文件的完整路径
full_path = os.path.join(dirpath, file)
# 从full_path中移除character_path部分
relative_path = remove_character_path(full_path,character_path)
# 根据文件扩展名和变量是否已赋值来更新变量
if file.lower().endswith(".ckpt") and ckpt_file_found is None:
ckpt_file_found = relative_path
elif file.lower().endswith(".pth") and pth_file_found is None:
pth_file_found = relative_path
elif file.lower().endswith(".wav") and wav_file_found is None:
wav_file_found = relative_path
elif file.lower().endswith(".mp3"):
import pydub
# Convert mp3 to wav
wav_file_path = os.path.join(dirpath,os.path.splitext(file)[0] + ".wav")
pydub.AudioSegment.from_mp3(full_path).export(wav_file_path, format="wav")
if wav_file_found is None:
wav_file_found = remove_character_path(os.path.join(dirpath,os.path.splitext(file)[0] + ".wav"),character_path)
# Initialize infer_config with gpt_path and sovits_path regardless of wav_file_found
infer_config = {
"gpt_path": ckpt_file_found,
"sovits_path": pth_file_found,
"software_version": "1.1",
r"简介": r"这是一个配置文件适用于https://github.com/X-T-E-R/TTS-for-GPT-soVITS,是一个简单好用的前后端项目"
}
# If wav file is also found, update infer_config to include ref_wav_path, prompt_text, and prompt_language
if wav_file_found:
wav_file_name = os.path.splitext(os.path.basename(wav_file_found))[0] # Extract the filename without extension
infer_config["emotion_list"] = {
"default": {
"ref_wav_path": wav_file_found,
"prompt_text": wav_file_name,
"prompt_language": "多语种混合"
}
}
else:
raise Exception("找不到wav参考文件!请把有效wav文件放置在模型文件夹下。否则效果可能会非常怪")
pass
# Check if the essential model files were found
if ckpt_file_found and pth_file_found:
infer_config_path = os.path.join(character_path, "infer_config.json")
try:
with open(infer_config_path , 'w', encoding='utf-8') as f:
json.dump(infer_config, f, ensure_ascii=False, indent=4)
except IOError as e:
print(f"无法写入文件: {infer_config_path}. 错误: {e}")
return infer_config_path
else:
return "Required model files (.ckpt or .pth) not found in character_path directory."
def load_character(cha_name):
global character_name
character_path=os.path.join(models_path,cha_name)
try:
# 加载配置
config = load_infer_config(character_path)
# 尝试从环境变量获取gpt_path,如果未设置,则从配置文件读取
gpt_path = os.path.join(character_path,config.get("gpt_path"))
# 尝试从环境变量获取sovits_path,如果未设置,则从配置文件读取
sovits_path = os.path.join(character_path,config.get("sovits_path"))
except:
try:
# 尝试调用auto_get_infer_config
auto_generate_infer_config(character_path)
load_character(cha_name)
return
except:
# 报错
raise Exception("找不到模型文件!请把有效模型放置在模型文件夹下,确保其中至少有pth、ckpt和wav三种文件。")
# 修改权重
character_name = cha_name
change_sovits_weights(sovits_path)
change_gpt_weights(gpt_path)
print(f"加载角色成功: {cha_name}")
def get_deflaut_character_name():
import os
import json
character_info_path = os.path.join(models_path, "character_info.json")
default_character = None
if os.path.exists(character_info_path):
with open(character_info_path, "r", encoding='utf-8') as f:
try:
character_info = json.load(f)
default_character = character_info.get("deflaut_character")
except:
pass
if default_character is None or not os.path.exists(os.path.join(models_path, default_character)):
# List all items in models_path
all_items = os.listdir(models_path)
# Filter out only directories (folders) from all_items
trained_folders = [item for item in all_items if os.path.isdir(os.path.join(models_path, item))]
# If there are any directories found, set the first one as the default character
if trained_folders:
default_character = trained_folders[0]
return default_character
character_name = get_deflaut_character_name()
load_character(character_name)
def match_character_emotion(character_path):
if not os.path.exists(os.path.join(character_path, "reference_audio")):
# 如果没有reference_audio文件夹,就返回None
return None, None, None
def get_wav_from_text_api(text, text_language, top_k=12, top_p=0.6, temperature=0.6, character_emotion="default", cut_method="auto_cut", stream=False):
# 加载环境配置
config = load_infer_config(os.path.join(models_path, character_name))
# 尝试从配置中提取参数,如果找不到则设置为None
ref_wav_path = None
prompt_text = None
prompt_language = None
if character_emotion == "auto":
# 如果是auto模式,那么就自动决定情感
ref_wav_path, prompt_text, prompt_language = match_character_emotion(os.path.join(models_path, character_name))
if ref_wav_path is None:
# 未能通过auto匹配到情感,就尝试使用指定的情绪列表
emotion_list=config.get('emotion_list', None)# 这是新版的infer_config文件,如果出现错误请删除infer_config.json文件,让系统自动生成
now_emotion="default"
for emotion, details in emotion_list.items():
print(emotion)
if emotion==character_emotion:
now_emotion=character_emotion
break
for emotion, details in emotion_list.items():
if emotion==now_emotion:
ref_wav_path = os.path.join(os.path.join(models_path,character_name), details['ref_wav_path'])
prompt_text = details['prompt_text']
prompt_language = details['prompt_language']
break
if ref_wav_path is None:
print("找不到ref_wav_path!请删除infer_config.json文件,让系统自动生成")
print(prompt_text)
# 根据是否找到ref_wav_path和prompt_text、prompt_language来决定ref_free的值
if ref_wav_path is not None and prompt_text is not None and prompt_language is not None:
ref_free = False
else:
ref_free = True
top_k = 3
top_p = 0.3
temperature = 0.3
# 调用原始的get_tts_wav函数
# 注意:这里假设get_tts_wav函数及其所需的其它依赖已经定义并可用
if stream == False:
return get_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, top_k=top_k, top_p=top_p, temperature=temperature, ref_free=ref_free, stream=stream)
else:
return get_streaming_tts_wav(ref_wav_path, prompt_text, prompt_language, text, text_language, top_k=top_k, top_p=top_p, temperature=temperature, ref_free=ref_free, byte_stream=True)
def update_character_info():
with open(os.path.join(models_path, "character_info.json"), "r", encoding='utf-8') as f:
default_character = json.load(f).get("deflaut_character", None)
characters_and_emotions = {}
for character_subdir in [f for f in os.listdir(models_path) if os.path.isdir(os.path.join(models_path, f))]:
if os.path.exists(os.path.join(models_path, character_subdir, "infer_config.json")):
try:
with open(os.path.join(models_path, character_subdir, "infer_config.json"), "r", encoding='utf-8') as f:
config = json.load(f)
emotion_list=[emotion for emotion in config.get('emotion_list', None)]
if emotion_list is not None:
characters_and_emotions[character_subdir] = emotion_list
else:
characters_and_emotions[character_subdir] = ["default"]
except:
characters_and_emotions[character_subdir] = ["default"]
else:
characters_and_emotions[character_subdir] = ["default"]
with open(os.path.join(models_path, "character_info.json"), "w", encoding='utf-8') as f:
json.dump({"deflaut_character": default_character, "characters_and_emotions": characters_and_emotions}, f, ensure_ascii=False, indent=4)
return {"deflaut_character": default_character, "characters_and_emotions": characters_and_emotions}
# def test_audio_save():
# fs, audio_to_save=get_wav_from_text_api("""这是一段音频测试""",'多语种混合')
# file_path = "example_audio.wav"
# from scipy.io.wavfile import write
# write(file_path, fs, audio_to_save)
# test_audio_save()
update_character_info() |