Spaces:
Running
Running
File size: 5,122 Bytes
b72ab63 09274b3 b72ab63 02cd175 72ff919 160e4b0 72ff919 b72ab63 160e4b0 f7d35cb 160e4b0 02cd175 b72ab63 160e4b0 72ff919 4c58375 b72ab63 09274b3 b72ab63 160e4b0 09274b3 160e4b0 f7d35cb 821e73d b72ab63 396e7de b72ab63 396e7de 72ff919 821e73d 72ff919 160e4b0 72ff919 a65bab4 09274b3 821e73d 09274b3 821e73d bec1b9a 09274b3 bec1b9a 821e73d 09274b3 396e7de b72ab63 02cd175 9c0d38a b72ab63 160e4b0 b72ab63 4c5de34 b72ab63 821e73d b72ab63 821e73d b72ab63 4c5de34 02cd175 4c5de34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import os
import torch
import argparse
import gradio as gr
import requests
from openvoice import se_extractor
from openvoice.api import BaseSpeakerTTS, ToneColorConverter
from dotenv import load_dotenv
from openai import OpenAI
from elevenlabs.client import ElevenLabs
from elevenlabs import play, save
from flask import Flask
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
# Load environment variables
load_dotenv()
# Initialize Flask app
app = Flask(__name__)
# Setup Limiter for rate limiting and quota management based on IP address
limiter = Limiter(get_remote_address, app=app, default_limits=["5 per minute"])
# Argument parsing
parser = argparse.ArgumentParser()
parser.add_argument("--share", action='store_true', default=False, help="make link public")
args = parser.parse_args()
# Initialize ElevenLabs client
client = ElevenLabs(api_key=os.environ.get("ELEVENLABS_API_KEY"))
device = 'cuda' if torch.cuda.is_available() else 'cpu'
output_dir = 'outputs'
os.makedirs(output_dir, exist_ok=True)
api_key = os.environ.get("ELEVENLABS_API_KEY")
supported_languages = ['zh', 'en']
# Function to get all voices
def get_voices(api_key):
url = "https://api.elevenlabs.io/v1/voices"
headers = {"xi-api-key": api_key}
response = requests.request("GET", url, headers=headers)
return response.json()
# Function to delete a voice by ID
def delete_voice(api_key, voice_id):
url = f"https://api.elevenlabs.io/v1/voices/{voice_id}"
headers = {"xi-api-key": api_key}
response = requests.request("DELETE", url, headers=headers)
return response.status_code, response.text
# Predict function with rate limiting based on IP address
#@limiter.limit("100 per minute")
def predict(prompt, style, audio_file_pth, voice_name):
text_hint = ''
if len(prompt) < 2:
text_hint += "[ERROR] Please provide a longer prompt text.\n"
return text_hint, None, None
if len(prompt) > 200:
text_hint += "[ERROR] Text length limited to 200 characters. Please try shorter text.\n"
return text_hint, None, None
print(audio_file_pth)
voice = client.clone(
name=voice_name,
description="A trial voice model for testing",
files=[audio_file_pth],
)
# Generate audio from text
audio = client.generate(text=prompt, voice=voice)
save(audio, f'{output_dir}/output.wav')
save_path = f'{output_dir}/output.wav'
data = get_voices(api_key)
# Find all voice IDs with the name provided by the user
trial_voice_ids = [voice.get("voice_id") for voice in data['voices'] if voice.get("name") == voice_name]
# # Delete each voice with the name provided by the user
# for voice_id in trial_voice_ids:
# status_code, response_text = delete_voice(api_key, voice_id)
# print(f"Deleted voice ID {voice_id}: Status Code {status_code}, Response {response_text}")
# if not trial_voice_ids:
# print("No voices with the name provided by the user found.")
return text_hint, save_path, audio_file_pth
# Gradio interface setup
with gr.Blocks(gr.themes.Glass()) as demo:
with gr.Row():
with gr.Column():
input_text_gr = gr.Textbox(
label="Text Prompt",
info="One or two sentences at a time is better. Up to 200 text characters.",
value="He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
)
style_gr = gr.Dropdown(
label="Style",
choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
info="Please upload a reference audio file that is at least 1 minute long. For best results, ensure the audio is clear. You can use Adobe Podcast Enhance(https://podcast.adobe.com/enhance) to improve the audio quality before uploading.",
max_choices=1,
value="default",
)
ref_gr = gr.Audio(
label="Reference Audio",
type="filepath",
value="resources/demo_speaker2.mp3",
sources=["upload"], # Allow only upload
)
voice_name_gr = gr.Textbox(
label="Voice Name",
info="Name for the cloned voice model.",
value="TrialVoice"
)
tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
with gr.Column():
out_text_gr = gr.Text(label="Info")
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
ref_audio_gr = gr.Audio(label="Reference Audio Used")
tts_button.click(predict, [input_text_gr, style_gr, ref_gr, voice_name_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
demo.queue()
demo.launch(debug=True, show_api=False, share=args.share)
# Hide Gradio footer and record button
css = """
footer {visibility: hidden}
audio .btn-container {display: none}
"""
demo.add_css(css)
|