Spaces:
Sleeping
Sleeping
File size: 3,672 Bytes
558c90a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
from abc import ABC, abstractmethod
from .Base_TTS_Task import Base_TTS_Task as TTS_Task
import json
from typing import List, Dict, Literal, Optional, Any, Union, Generator, Tuple
from pydantic import BaseModel, Field, model_validator
import numpy as np
from abc import ABC, abstractmethod
from typing import Dict, List, Union, Generator, Tuple
from typing_extensions import Literal
import numpy as np
import wave,io
class Base_TTS_Synthesizer(ABC):
"""
Abstract base class for a Text-To-Speech (TTS) synthesizer.
Attributes:
ui_config (Dict[str, List]): A dictionary containing UI configuration settings.
debug_mode (bool): Flag to toggle debug mode for additional logging and debugging information.
"""
ui_config: Dict[str, List] = {}
debug_mode: bool = False
def __init__(self, **kwargs):
"""
Initializes the TTS synthesizer with optional UI configurations and debug mode setting.
Args:
ui_config (Dict[str, List], optional): Configuration for user interface settings.
debug_mode (bool, optional): Enables or disables debug mode.
"""
self.ui_config = kwargs.get("ui_config", {})
self.debug_mode = kwargs.get("debug_mode", False)
@abstractmethod
def generate(
self,
task: TTS_Task,
return_type: Literal["filepath", "numpy"] = "numpy",
save_path: Optional[str] = None,
) -> Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]:
"""
Generates speech from a given TTS task.
Args:
task (TTS_Task): The task containing data and parameters for speech synthesis.
return_type (Literal["filepath", "numpy"], optional): The type of return value, either a file path or audio data.
save_path (str, optional): The path to save the audio file.
Returns:
Union[str, Generator[Tuple[int, np.ndarray], None, None], Any]: Depending on the return_type, returns a file path, a generator of audio data, or other types.
"""
pass
@abstractmethod
def get_characters(self):
"""
Retrieves the available characters and their emotions for the TTS.
Returns:
Dict[str, List[str]]: A dictionary mapping character names to lists of their emotions.
"""
pass
@abstractmethod
def params_parser(self, data):
"""
Parses input data into a TTS_Task.
Args:
data (Any): The raw input data to be parsed.
Returns:
TTS_Task: A TTS task object created from the input data.
"""
pass
@abstractmethod
def ms_like_parser(self, data):
"""
Parses input data in a Microsoft-like format into a TTS_Task.
Args:
data (Any): The raw input data to be parsed.
Returns:
TTS_Task: A TTS task object created from the Microsoft-like formatted input data.
"""
pass
def get_wave_header_chunk(sample_rate: int, channels: int = 1, sample_width: int = 2):
"""
Generate a wave header with no data.
Args:
sample_rate (int): The sample rate of the audio.
channels (int, optional): The number of audio channels. Defaults to 1.
sample_width (int, optional): The sample width in bytes. Defaults to 2.
Returns:
bytes: The wave header as bytes.
"""
wav_buf = io.BytesIO()
with wave.open(wav_buf, "wb") as vfout:
vfout.setnchannels(channels)
vfout.setsampwidth(sample_width)
vfout.setframerate(sample_rate)
wav_buf.seek(0)
return wav_buf.read()
|