Spaces:

aoxiang1221
/

gpt-sovits

Sleeping

File size: 3,672 Bytes

558c90a

from abc import ABC, abstractmethod

from .Base_TTS_Task import Base_TTS_Task as TTS_Task
import json
from typing import List, Dict, Literal, Optional, Any, Union, Generator, Tuple
from pydantic import BaseModel, Field, model_validator
import numpy as np
from abc import ABC, abstractmethod
from typing import Dict, List, Union, Generator, Tuple
from typing_extensions import Literal
import numpy as np
import wave,io

class Base_TTS_Synthesizer(ABC):
    """
    Abstract base class for a Text-To-Speech (TTS) synthesizer.

    Attributes:
        ui_config (Dict[str, List]): A dictionary containing UI configuration settings.
        debug_mode (bool): Flag to toggle debug mode for additional logging and debugging information.

    """

    ui_config: Dict[str, List] = {}
    debug_mode: bool = False

    def __init__(self, **kwargs):
        """
        Initializes the TTS synthesizer with optional UI configurations and debug mode setting.

        Args:
            ui_config (Dict[str, List], optional): Configuration for user interface settings.
            debug_mode (bool, optional): Enables or disables debug mode.

        """
        self.ui_config = kwargs.get("ui_config", {})
        self.debug_mode = kwargs.get("debug_mode", False)

    @abstractmethod
    def generate(
        self,
        task: TTS_Task,
        return_type: Literal["filepath", "numpy"] = "numpy",
        save_path: Optional[str] = None,
    ) -> Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]:
        """
        Generates speech from a given TTS task.

        Args:
            task (TTS_Task): The task containing data and parameters for speech synthesis.
            return_type (Literal["filepath", "numpy"], optional): The type of return value, either a file path or audio data.
            save_path (str, optional): The path to save the audio file.
        Returns:
            Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]: Depending on the return_type, returns a file path, a generator of audio data, or other types.

        """
        pass

    @abstractmethod
    def get_characters(self):
        """
        Retrieves the available characters and their emotions for the TTS.

        Returns:
            Dict[str, List[str]]: A dictionary mapping character names to lists of their emotions.
        """
        pass

    @abstractmethod
    def params_parser(self, data):
        """
        Parses input data into a TTS_Task.

        Args:
            data (Any): The raw input data to be parsed.

        Returns:
            TTS_Task: A TTS task object created from the input data.
        """
        pass

    @abstractmethod
    def ms_like_parser(self, data):
        """
        Parses input data in a Microsoft-like format into a TTS_Task.

        Args:
            data (Any): The raw input data to be parsed.

        Returns:
            TTS_Task: A TTS task object created from the Microsoft-like formatted input data.
        """
        pass


def get_wave_header_chunk(sample_rate: int, channels: int = 1, sample_width: int = 2):
    """
    Generate a wave header with no data.

    Args:
        sample_rate (int): The sample rate of the audio.
        channels (int, optional): The number of audio channels. Defaults to 1.
        sample_width (int, optional): The sample width in bytes. Defaults to 2.

    Returns:
        bytes: The wave header as bytes.
    """
    wav_buf = io.BytesIO()
    with wave.open(wav_buf, "wb") as vfout:
        vfout.setnchannels(channels)
        vfout.setsampwidth(sample_width)
        vfout.setframerate(sample_rate)

    wav_buf.seek(0)
    return wav_buf.read()