import os import io from typing_extensions import Literal import gradio as gr import tempfile import numpy as np from dotenv import load_dotenv from elevenlabs.client import ElevenLabs from elevenlabs import play, stream, save from elevenlabs import Voice, VoiceSettings from pydub import AudioSegment from pydub.playback import play import imageio_ffmpeg as ffmpeg import requests from audiostretchy.stretch import AudioStretch load_dotenv() def verify_auth(username, password): if username == USER and password == PASSWORD: return True else: return False ELE_API_KEY = os.getenv("ELE_API_KEY") USER = os.getenv("USER") PASSWORD = os.getenv("PASSWORD") MODEL = "eleven_multilingual_v2" ele_client = ElevenLabs(api_key=ELE_API_KEY) VOICE = [ "승현", "우승" ] KEY_MAPPING = { "승현": "0RBbbgk6KUJxHmWzPiHz", # 승현+제시카(2:1) "우승": "ASwOiisDbuaP2R1jUQU6", # 우승+TTS_KKC(1:1) } AudioSegment.converter = ffmpeg.get_ffmpeg_exe() def change_pitch(audio_segment, pitch_shift): new_sample_rate = int(audio_segment.frame_rate * (2.0 ** pitch_shift)) pitched_audio = audio_segment._spawn(audio_segment.raw_data, overrides={'frame_rate': new_sample_rate}) return pitched_audio.set_frame_rate(audio_segment.frame_rate) def predict( text: str, voice: str, output_file_format: Literal["mp3"] = "", speed: float = 1.0, pitch_shift: float = 0.0, stability: float = 0.5, similarity: float = 0.7, style_exaggeration: float = 0., speaker_boost: bool = True ): try: voice_setup=Voice( voice_id=KEY_MAPPING[voice], settings=VoiceSettings(stability=stability, similarity_boost=similarity, style=style_exaggeration, use_speaker_boost=speaker_boost) ) audio = ele_client.generate( text = text, voice = voice_setup, model=MODEL ) audio_data = b''.join(audio) except Exception as e: raise requests.exceptions.RequestException(f"An error occurred while generating speech. Please check your API key and come back try again. {str(e)}") print(f"[Text] {text}") audio_stretch = AudioStretch() audio_stretch.open_mp3(io.BytesIO(audio_data)) audio_stretch.stretch(ratio=1/speed) # speed 0.5 -> 2.0\ # Export the final audio to a temporary file with tempfile.NamedTemporaryFile(suffix=f".{output_file_format}", delete=False) as temp_file: audio_stretch.save(path=temp_file.name) audio = AudioSegment.from_file(temp_file.name) # Adjust pitch if needed if pitch_shift != 0.0: audio = change_pitch(audio, pitch_shift) audio.export(temp_file.name, format=output_file_format) temp_file_path = temp_file.name return temp_file_path with gr.Blocks() as demo: gr.Markdown("#