openvoice2

Running

App Files Files Community

openvoice2 / openvoice_app.py

poemsforaphrodite

Update openvoice_app.py

821e73d verified 5 months ago

raw

history blame

5.12 kB

	import os
	import torch
	import argparse
	import gradio as gr
	import requests
	from openvoice import se_extractor
	from openvoice.api import BaseSpeakerTTS, ToneColorConverter
	from dotenv import load_dotenv
	from openai import OpenAI
	from elevenlabs.client import ElevenLabs
	from elevenlabs import play, save
	from flask import Flask
	from flask_limiter import Limiter
	from flask_limiter.util import get_remote_address

	# Load environment variables
	load_dotenv()

	# Initialize Flask app
	app = Flask(__name__)

	# Setup Limiter for rate limiting and quota management based on IP address
	limiter = Limiter(get_remote_address, app=app, default_limits=["5 per minute"])

	# Argument parsing
	parser = argparse.ArgumentParser()
	parser.add_argument("--share", action='store_true', default=False, help="make link public")
	args = parser.parse_args()

	# Initialize ElevenLabs client
	client = ElevenLabs(api_key=os.environ.get("ELEVENLABS_API_KEY"))
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	output_dir = 'outputs'
	os.makedirs(output_dir, exist_ok=True)

	api_key = os.environ.get("ELEVENLABS_API_KEY")
	supported_languages = ['zh', 'en']

	# Function to get all voices
	def get_voices(api_key):
	url = "https://api.elevenlabs.io/v1/voices"
	headers = {"xi-api-key": api_key}
	response = requests.request("GET", url, headers=headers)
	return response.json()

	# Function to delete a voice by ID
	def delete_voice(api_key, voice_id):
	url = f"https://api.elevenlabs.io/v1/voices/{voice_id}"
	headers = {"xi-api-key": api_key}
	response = requests.request("DELETE", url, headers=headers)
	return response.status_code, response.text

	# Predict function with rate limiting based on IP address
	#@limiter.limit("100 per minute")
	def predict(prompt, style, audio_file_pth, voice_name):
	text_hint = ''
	if len(prompt) < 2:
	text_hint += "[ERROR] Please provide a longer prompt text.\n"
	return text_hint, None, None
	if len(prompt) > 200:
	text_hint += "[ERROR] Text length limited to 200 characters. Please try shorter text.\n"
	return text_hint, None, None

	print(audio_file_pth)
	voice = client.clone(
	name=voice_name,
	description="A trial voice model for testing",
	files=[audio_file_pth],
	)
	# Generate audio from text
	audio = client.generate(text=prompt, voice=voice)
	save(audio, f'{output_dir}/output.wav')

	save_path = f'{output_dir}/output.wav'
	data = get_voices(api_key)
	# Find all voice IDs with the name provided by the user
	trial_voice_ids = [voice.get("voice_id") for voice in data['voices'] if voice.get("name") == voice_name]

	# # Delete each voice with the name provided by the user
	# for voice_id in trial_voice_ids:
	# status_code, response_text = delete_voice(api_key, voice_id)
	# print(f"Deleted voice ID {voice_id}: Status Code {status_code}, Response {response_text}")

	# if not trial_voice_ids:
	# print("No voices with the name provided by the user found.")

	return text_hint, save_path, audio_file_pth

	# Gradio interface setup
	with gr.Blocks(gr.themes.Glass()) as demo:
	with gr.Row():
	with gr.Column():
	input_text_gr = gr.Textbox(
	label="Text Prompt",
	info="One or two sentences at a time is better. Up to 200 text characters.",
	value="He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
	)
	style_gr = gr.Dropdown(
	label="Style",
	choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
	info="Please upload a reference audio file that is at least 1 minute long. For best results, ensure the audio is clear. You can use Adobe Podcast Enhance(https://podcast.adobe.com/enhance) to improve the audio quality before uploading.",
	max_choices=1,
	value="default",
	)
	ref_gr = gr.Audio(
	label="Reference Audio",
	type="filepath",
	value="resources/demo_speaker2.mp3",
	sources=["upload"], # Allow only upload
	)
	voice_name_gr = gr.Textbox(
	label="Voice Name",
	info="Name for the cloned voice model.",
	value="TrialVoice"
	)
	tts_button = gr.Button("Send", elem_id="send-btn", visible=True)

	with gr.Column():
	out_text_gr = gr.Text(label="Info")
	audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
	ref_audio_gr = gr.Audio(label="Reference Audio Used")

	tts_button.click(predict, [input_text_gr, style_gr, ref_gr, voice_name_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])

	demo.queue()
	demo.launch(debug=True, show_api=False, share=args.share)

	# Hide Gradio footer and record button
	css = """
	footer {visibility: hidden}
	audio .btn-container {display: none}
	"""

	demo.add_css(css)