Spaces:
Running
Running
global character_name | |
import os, json, sys | |
now_dir = os.getcwd() | |
sys.path.append(now_dir) | |
sys.path.append(os.path.join(now_dir, "GPT_SoVITS")) | |
from inference_core import inference, tts_pipline, get_streaming_tts_wav | |
def load_infer_config(character_path): | |
config_path = os.path.join(character_path, "infer_config.json") | |
"""加载环境配置文件""" | |
with open(config_path, 'r', encoding='utf-8') as f: | |
config = json.load(f) | |
if config.get("ref_wav_path") is not None: | |
return update_config_version(character_path) | |
return config | |
import os | |
import json | |
# 取得模型文件夹路径 | |
global models_path | |
models_path = "trained" | |
config_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "config.json") | |
if os.path.exists(config_path): | |
with open(config_path, 'r', encoding='utf-8') as f: | |
config = json.load(f) | |
models_path = config.get("models_path", "trained") | |
def remove_character_path(full_path,character_path): | |
# 从full_path中移除character_path部分 | |
relative_path = full_path.replace(character_path, '') | |
# 如果relative_path以路径分隔符开头,去除它 | |
if relative_path.startswith(os.path.sep): | |
relative_path = relative_path[len(os.path.sep):] | |
return relative_path | |
def update_config_version(character_path): | |
config_path = os.path.join(character_path, "infer_config.json") | |
try: | |
with open(config_path, 'r', encoding='utf-8') as f: | |
config = json.load(f) | |
print("正在更新文件") | |
if config.get("ref_wav_path") is not None: | |
config["emotion_list"] = { | |
"default": { | |
"ref_wav_path": remove_character_path(config["ref_wav_path"],character_path), | |
"prompt_text": config["prompt_text"], | |
"prompt_language": config["prompt_language"] | |
} | |
} | |
config.pop("ref_wav_path", None) | |
config.pop("prompt_text", None) | |
config.pop("prompt_language", None) | |
config["sovits_path"] = remove_character_path(config["sovits_path"],character_path) | |
config["gpt_path"] = remove_character_path(config["gpt_path"],character_path) | |
with open(config_path, 'w', encoding='utf-8') as f: | |
json.dump(config, f, ensure_ascii=False, indent=4) | |
return config | |
except: | |
raise Exception("更新失败!请手动删除infer_config.json文件,让系统自动生成") | |
def auto_generate_infer_config(character_path): | |
## TODO: Auto-generate wav-list and prompt-list from character_path | |
## | |
# Initialize variables for file detection | |
print(f"正在自动生成配置文件: {character_path}") | |
ckpt_file_found = None | |
pth_file_found = None | |
wav_file_found = None | |
# Iterate through files in character_path to find matching file types | |
for dirpath, dirnames, filenames in os.walk(character_path): | |
for file in filenames: | |
# 构建文件的完整路径 | |
full_path = os.path.join(dirpath, file) | |
# 从full_path中移除character_path部分 | |
relative_path = remove_character_path(full_path,character_path) | |
# 根据文件扩展名和变量是否已赋值来更新变量 | |
if file.lower().endswith(".ckpt") and ckpt_file_found is None: | |
ckpt_file_found = relative_path | |
elif file.lower().endswith(".pth") and pth_file_found is None: | |
pth_file_found = relative_path | |
elif file.lower().endswith(".wav") and wav_file_found is None: | |
wav_file_found = relative_path | |
elif file.lower().endswith(".mp3"): | |
import pydub | |
# Convert mp3 to wav | |
wav_file_path = os.path.join(dirpath,os.path.splitext(file)[0] + ".wav") | |
pydub.AudioSegment.from_mp3(full_path).export(wav_file_path, format="wav") | |
if wav_file_found is None: | |
wav_file_found = remove_character_path(os.path.join(dirpath,os.path.splitext(file)[0] + ".wav"),character_path) | |
# Initialize infer_config with gpt_path and sovits_path regardless of wav_file_found | |
infer_config = { | |
"gpt_path": ckpt_file_found, | |
"sovits_path": pth_file_found, | |
"software_version": "1.1", | |
r"简介": r"这是一个配置文件适用于https://github.com/X-T-E-R/TTS-for-GPT-soVITS,是一个简单好用的前后端项目" | |
} | |
# If wav file is also found, update infer_config to include ref_wav_path, prompt_text, and prompt_language | |
if wav_file_found: | |
wav_file_name = os.path.splitext(os.path.basename(wav_file_found))[0] # Extract the filename without extension | |
infer_config["emotion_list"] = { | |
"default": { | |
"ref_wav_path": wav_file_found, | |
"prompt_text": wav_file_name, | |
"prompt_language": "多语种混合" | |
} | |
} | |
else: | |
raise Exception("找不到wav参考文件!请把有效wav文件放置在模型文件夹下。否则效果可能会非常怪") | |
pass | |
# Check if the essential model files were found | |
if ckpt_file_found and pth_file_found: | |
infer_config_path = os.path.join(character_path, "infer_config.json") | |
try: | |
with open(infer_config_path , 'w', encoding='utf-8') as f: | |
json.dump(infer_config, f, ensure_ascii=False, indent=4) | |
except IOError as e: | |
print(f"无法写入文件: {infer_config_path}. 错误: {e}") | |
return infer_config_path | |
else: | |
return "Required model files (.ckpt or .pth) not found in character_path directory." | |
def load_character(cha_name): | |
global character_name | |
character_path=os.path.join(models_path,cha_name) | |
try: | |
# 加载配置 | |
config = load_infer_config(character_path) | |
# 尝试从环境变量获取gpt_path,如果未设置,则从配置文件读取 | |
gpt_path = os.path.join(character_path,config.get("gpt_path")) | |
# 尝试从环境变量获取sovits_path,如果未设置,则从配置文件读取 | |
sovits_path = os.path.join(character_path,config.get("sovits_path")) | |
except: | |
try: | |
# 尝试调用auto_get_infer_config | |
auto_generate_infer_config(character_path) | |
load_character(cha_name) | |
return | |
except: | |
# 报错 | |
raise Exception("找不到模型文件!请把有效模型放置在模型文件夹下,确保其中至少有pth、ckpt和wav三种文件。") | |
# 修改权重 | |
character_name = cha_name | |
tts_pipline.init_t2s_weights(gpt_path) | |
tts_pipline.init_vits_weights(sovits_path) | |
print(f"加载角色成功: {cha_name}") | |
def get_deflaut_character_name(): | |
import os | |
import json | |
character_info_path = os.path.join(models_path, "character_info.json") | |
default_character = None | |
if os.path.exists(character_info_path): | |
with open(character_info_path, "r", encoding='utf-8') as f: | |
try: | |
character_info = json.load(f) | |
default_character = character_info.get("deflaut_character") | |
except: | |
pass | |
if default_character in ["", None, "default"]: | |
default_character=None | |
if default_character is None or not os.path.exists(os.path.join(models_path, default_character)): | |
# List all items in models_path | |
all_items = os.listdir(models_path) | |
# Filter out only directories (folders) from all_items | |
trained_folders = [item for item in all_items if os.path.isdir(os.path.join(models_path, item))] | |
# If there are any directories found, set the first one as the default character | |
if trained_folders: | |
default_character = trained_folders[0] | |
return default_character | |
character_name = get_deflaut_character_name() | |
load_character(character_name) | |
def match_character_emotion(character_path): | |
if not os.path.exists(os.path.join(character_path, "reference_audio")): | |
# 如果没有reference_audio文件夹,就返回None | |
return None, None, None | |
def get_wav_from_text_api( | |
text, | |
text_language, | |
batch_size=1, | |
speed_factor=1.0, | |
top_k=12, | |
top_p=0.6, | |
temperature=0.6, | |
character_emotion="default", | |
cut_method="auto_cut", | |
seed=-1, | |
stream=False, | |
): | |
text = text.replace("\r", "\n").replace("<br>", "\n").replace("\t", " ") | |
# 加载环境配置 | |
config = load_infer_config(os.path.join(models_path, character_name)) | |
# 尝试从配置中提取参数,如果找不到则设置为None | |
ref_wav_path = None | |
prompt_text = None | |
prompt_language = None | |
if character_emotion == "auto": | |
# 如果是auto模式,那么就自动决定情感 | |
ref_wav_path, prompt_text, prompt_language = match_character_emotion(os.path.join(models_path, character_name)) | |
if ref_wav_path is None: | |
# 未能通过auto匹配到情感,就尝试使用指定的情绪列表 | |
emotion_list=config.get('emotion_list', None)# 这是新版的infer_config文件,如果出现错误请删除infer_config.json文件,让系统自动生成 | |
now_emotion="default" | |
for emotion, details in emotion_list.items(): | |
print(emotion) | |
if emotion==character_emotion: | |
now_emotion=character_emotion | |
break | |
for emotion, details in emotion_list.items(): | |
if emotion==now_emotion: | |
ref_wav_path = os.path.join(os.path.join(models_path,character_name), details['ref_wav_path']) | |
prompt_text = details['prompt_text'] | |
prompt_language = details['prompt_language'] | |
break | |
if ref_wav_path is None: | |
print("找不到ref_wav_path!请删除infer_config.json文件,让系统自动生成") | |
print(prompt_text) | |
# 根据是否找到ref_wav_path和prompt_text、prompt_language来决定ref_free的值 | |
if ref_wav_path is not None and prompt_text is not None and prompt_language is not None: | |
ref_free = False | |
else: | |
ref_free = True | |
top_k = 3 | |
top_p = 0.3 | |
temperature = 0.3 | |
params = { | |
"text": text, | |
"text_lang": text_language, | |
"ref_audio_path": ref_wav_path, | |
"prompt_text": prompt_text, | |
"prompt_lang": prompt_language, | |
"top_k": top_k, | |
"top_p": top_p, | |
"temperature": temperature, | |
"text_split_method": cut_method, | |
"batch_size": batch_size, | |
"speed_factor": speed_factor, | |
"ref_text_free": ref_free, | |
"split_bucket":True, | |
"return_fragment":stream, | |
"seed": seed, | |
} | |
# 调用原始的get_tts_wav函数 | |
# 注意:这里假设get_tts_wav函数及其所需的其它依赖已经定义并可用 | |
if stream == False: | |
return inference(**params) | |
else: | |
return get_streaming_tts_wav(params) | |
def update_character_info(): | |
try: | |
with open(os.path.join(models_path, "character_info.json"), "r", encoding='utf-8') as f: | |
default_character = json.load(f).get("deflaut_character", None) | |
except: | |
default_character = "" | |
characters_and_emotions = {} | |
for character_subdir in [f for f in os.listdir(models_path) if os.path.isdir(os.path.join(models_path, f))]: | |
if os.path.exists(os.path.join(models_path, character_subdir, "infer_config.json")): | |
try: | |
with open(os.path.join(models_path, character_subdir, "infer_config.json"), "r", encoding='utf-8') as f: | |
config = json.load(f) | |
emotion_list=[emotion for emotion in config.get('emotion_list', None)] | |
if emotion_list is not None: | |
characters_and_emotions[character_subdir] = emotion_list | |
else: | |
characters_and_emotions[character_subdir] = ["default"] | |
except: | |
characters_and_emotions[character_subdir] = ["default"] | |
else: | |
characters_and_emotions[character_subdir] = ["default"] | |
with open(os.path.join(models_path, "character_info.json"), "w", encoding='utf-8') as f: | |
json.dump({"deflaut_character": default_character, "characters_and_emotions": characters_and_emotions}, f, ensure_ascii=False, indent=4) | |
return {"deflaut_character": default_character, "characters_and_emotions": characters_and_emotions} | |
# def test_audio_save(): | |
# fs, audio_to_save=get_wav_from_text_api("""这是一段音频测试""",'多语种混合') | |
# file_path = "example_audio.wav" | |
# from scipy.io.wavfile import write | |
# write(file_path, fs, audio_to_save) | |
# test_audio_save() | |
update_character_info() | |