import gradio as gr from TTS.utils.manage import ModelManager @staticmethod def new_ask_tos(model_full_path): return True ModelManager.ask_tos = new_ask_tos from TTS.api import TTS import torch import os import random import string from cn_tx import TextNorm normalizer = TextNorm() def preprocess(text): # text = text.replace("掌柜", "涨柜") text = normalizer(text) return text def tts_function(voice_file, text): speaker_wav = voice_file text = preprocess(text) # Generate a random filename with 16 characters random_filename = ''.join(random.choices( string.ascii_letters + string.digits, k=16)) file_path = f"output/{random_filename}.wav" if not os.path.exists("output"): os.makedirs("output") # Run TTS tts.tts_to_file(text=text, speaker_wav=speaker_wav, language="pt", file_path=file_path) return file_path # Create the Gradio interface with Chinese labels and placeholders iface = gr.Interface( fn=tts_function, inputs=[ gr.Audio(sources=["upload", "microphone"], type="filepath", label="上传或录制声音"), gr.Textbox(lines=2, placeholder="请输入要转换的文字...", label="文本输入"), ], outputs=gr.Audio(type="filepath", label="生成的语音"), title="数字栩生声音克隆", allow_flagging='never' ) if __name__ == '__main__': # Initialize TTS device = "cuda" if torch.cuda.is_available() else "cpu" tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) # tts = None iface.launch(inbrowser=True)