Spaces:
Sleeping
Sleeping
from abc import ABC, abstractmethod | |
from .Base_TTS_Task import Base_TTS_Task as TTS_Task | |
import json | |
from typing import List, Dict, Literal, Optional, Any, Union, Generator, Tuple | |
from pydantic import BaseModel, Field, model_validator | |
import numpy as np | |
from abc import ABC, abstractmethod | |
from typing import Dict, List, Union, Generator, Tuple | |
from typing_extensions import Literal | |
import numpy as np | |
import wave,io | |
class Base_TTS_Synthesizer(ABC): | |
""" | |
Abstract base class for a Text-To-Speech (TTS) synthesizer. | |
Attributes: | |
ui_config (Dict[str, List]): A dictionary containing UI configuration settings. | |
debug_mode (bool): Flag to toggle debug mode for additional logging and debugging information. | |
""" | |
ui_config: Dict[str, List] = {} | |
debug_mode: bool = False | |
def __init__(self, **kwargs): | |
""" | |
Initializes the TTS synthesizer with optional UI configurations and debug mode setting. | |
Args: | |
ui_config (Dict[str, List], optional): Configuration for user interface settings. | |
debug_mode (bool, optional): Enables or disables debug mode. | |
""" | |
self.ui_config = kwargs.get("ui_config", {}) | |
self.debug_mode = kwargs.get("debug_mode", False) | |
def generate( | |
self, | |
task: TTS_Task, | |
return_type: Literal["filepath", "numpy"] = "numpy", | |
save_path: Optional[str] = None, | |
) -> Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]: | |
""" | |
Generates speech from a given TTS task. | |
Args: | |
task (TTS_Task): The task containing data and parameters for speech synthesis. | |
return_type (Literal["filepath", "numpy"], optional): The type of return value, either a file path or audio data. | |
save_path (str, optional): The path to save the audio file. | |
Returns: | |
Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]: Depending on the return_type, returns a file path, a generator of audio data, or other types. | |
""" | |
pass | |
def get_characters(self): | |
""" | |
Retrieves the available characters and their emotions for the TTS. | |
Returns: | |
Dict[str, List[str]]: A dictionary mapping character names to lists of their emotions. | |
""" | |
pass | |
def params_parser(self, data): | |
""" | |
Parses input data into a TTS_Task. | |
Args: | |
data (Any): The raw input data to be parsed. | |
Returns: | |
TTS_Task: A TTS task object created from the input data. | |
""" | |
pass | |
def ms_like_parser(self, data): | |
""" | |
Parses input data in a Microsoft-like format into a TTS_Task. | |
Args: | |
data (Any): The raw input data to be parsed. | |
Returns: | |
TTS_Task: A TTS task object created from the Microsoft-like formatted input data. | |
""" | |
pass | |
def get_wave_header_chunk(sample_rate: int, channels: int = 1, sample_width: int = 2): | |
""" | |
Generate a wave header with no data. | |
Args: | |
sample_rate (int): The sample rate of the audio. | |
channels (int, optional): The number of audio channels. Defaults to 1. | |
sample_width (int, optional): The sample width in bytes. Defaults to 2. | |
Returns: | |
bytes: The wave header as bytes. | |
""" | |
wav_buf = io.BytesIO() | |
with wave.open(wav_buf, "wb") as vfout: | |
vfout.setnchannels(channels) | |
vfout.setsampwidth(sample_width) | |
vfout.setframerate(sample_rate) | |
wav_buf.seek(0) | |
return wav_buf.read() | |