import io, wave import os, json, sys import threading from Synthesizers.base import Base_TTS_Synthesizer ,load_config from .remote_task import Remote_TTS_Task as TTS_Task, set_based_synthesizer, get_ui_config import requests from urllib import parse from datetime import datetime from typing import Union, Generator, Tuple, Any, Optional, Dict, Literal import numpy as np import soundfile as sf class Remote_Synthesizer(Base_TTS_Synthesizer): url :str = "http://127.0.0.1:5000" tts_endpoint:str = "/tts" character_endpoint:str = "/character_list" based_synthesizer :str = "gsv_fast" class Config: extra = "ignore" def __init__(self, config_path:str = None, **kwargs): super().__init__(**kwargs) if config_path is None: config_path = os.path.join(os.path.dirname(__file__), "configs", "config.json") config_dict = load_config(config_path) config_dict.update(kwargs) for key, value in config_dict.items(): if hasattr(self, key): setattr(self, key, value) set_based_synthesizer(self.based_synthesizer) self.ui_config = get_ui_config(self.based_synthesizer) def get_characters(self)-> dict: url = self.url + self.character_endpoint res = requests.get(url) return json.loads(res.text) @staticmethod def stream_audio(url, data: Dict[str, Any]) -> Generator[Tuple[int, np.ndarray], None, None]: headers = {"Content-Type": "application/json"} # 发起POST请求,获取响应流 response = requests.post( url, data=json.dumps(data), headers=headers, stream=True ) chunk_size = 1024 # 确保请求成功 if response.status_code == 200: # 循环读取音频流 for chunk in response.iter_content(chunk_size): # 将二进制数据转换为numpy数组,这里假设音频数据是16位整数格式 audiodata = np.frombuffer(chunk, dtype=np.int16) yield 32000, audiodata else: raise Exception( f"Failed to get audio stream, status code: {response.status_code}" ) def generate( self, task: TTS_Task, return_type: Literal["filepath", "numpy"] = "numpy", save_path: Optional[str] = None, ) -> Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]: url = self.url + self.tts_endpoint data = task.data print(return_type) if self.debug_mode: print(f"generate task: \n{data}") headers = {"Content-Type": "application/json"} if return_type == "filepath" or ( return_type == "numpy" and not task.stream ): if save_path is None: save_path = f"tmp_audio/{datetime.now().strftime('%Y%m%d%H%M%S')}.wav" res = requests.post(url, data=json.dumps(data), headers=headers) if res.status_code == 200: with open(save_path, "wb") as f: f.write(res.content) if return_type == "filepath": return save_path else: audiodata, sr = sf.read(save_path) return ((sr, audiodata) for _ in range(1)) else: raise Exception(f"remote synthesizer error: {res.text}") elif return_type == "numpy" and task.stream: return self.stream_audio(url, data) def params_parser(self, data) -> TTS_Task: task = TTS_Task(based_synthesizer=self.based_synthesizer, **data) return task def ms_like_parser(self,data) -> TTS_Task: task = TTS_Task(based_synthesizer=self.based_synthesizer, **data) return task