gpt-sovits / Synthesizers /base /Base_TTS_Synthesizer.py
nekoaoxiang
添加代码至 df4c937
558c90a
raw
history blame contribute delete
3.67 kB
from abc import ABC, abstractmethod
from .Base_TTS_Task import Base_TTS_Task as TTS_Task
import json
from typing import List, Dict, Literal, Optional, Any, Union, Generator, Tuple
from pydantic import BaseModel, Field, model_validator
import numpy as np
from abc import ABC, abstractmethod
from typing import Dict, List, Union, Generator, Tuple
from typing_extensions import Literal
import numpy as np
import wave,io
class Base_TTS_Synthesizer(ABC):
"""
Abstract base class for a Text-To-Speech (TTS) synthesizer.
Attributes:
ui_config (Dict[str, List]): A dictionary containing UI configuration settings.
debug_mode (bool): Flag to toggle debug mode for additional logging and debugging information.
"""
ui_config: Dict[str, List] = {}
debug_mode: bool = False
def __init__(self, **kwargs):
"""
Initializes the TTS synthesizer with optional UI configurations and debug mode setting.
Args:
ui_config (Dict[str, List], optional): Configuration for user interface settings.
debug_mode (bool, optional): Enables or disables debug mode.
"""
self.ui_config = kwargs.get("ui_config", {})
self.debug_mode = kwargs.get("debug_mode", False)
@abstractmethod
def generate(
self,
task: TTS_Task,
return_type: Literal["filepath", "numpy"] = "numpy",
save_path: Optional[str] = None,
) -> Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]:
"""
Generates speech from a given TTS task.
Args:
task (TTS_Task): The task containing data and parameters for speech synthesis.
return_type (Literal["filepath", "numpy"], optional): The type of return value, either a file path or audio data.
save_path (str, optional): The path to save the audio file.
Returns:
Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]: Depending on the return_type, returns a file path, a generator of audio data, or other types.
"""
pass
@abstractmethod
def get_characters(self):
"""
Retrieves the available characters and their emotions for the TTS.
Returns:
Dict[str, List[str]]: A dictionary mapping character names to lists of their emotions.
"""
pass
@abstractmethod
def params_parser(self, data):
"""
Parses input data into a TTS_Task.
Args:
data (Any): The raw input data to be parsed.
Returns:
TTS_Task: A TTS task object created from the input data.
"""
pass
@abstractmethod
def ms_like_parser(self, data):
"""
Parses input data in a Microsoft-like format into a TTS_Task.
Args:
data (Any): The raw input data to be parsed.
Returns:
TTS_Task: A TTS task object created from the Microsoft-like formatted input data.
"""
pass
def get_wave_header_chunk(sample_rate: int, channels: int = 1, sample_width: int = 2):
"""
Generate a wave header with no data.
Args:
sample_rate (int): The sample rate of the audio.
channels (int, optional): The number of audio channels. Defaults to 1.
sample_width (int, optional): The sample width in bytes. Defaults to 2.
Returns:
bytes: The wave header as bytes.
"""
wav_buf = io.BytesIO()
with wave.open(wav_buf, "wb") as vfout:
vfout.setnchannels(channels)
vfout.setsampwidth(sample_width)
vfout.setframerate(sample_rate)
wav_buf.seek(0)
return wav_buf.read()