pantelnm
/

OpenSpeech-TTS

Model card Files Files and versions Community

OpenSpeech-TTS / api_env.py

pantelnm's picture

Upload api_env.py

7fb0b06 verified about 1 month ago

history blame contribute delete

3.29 kB

	from flask import Flask, request, send_file, jsonify
	from gevent.pywsgi import WSGIServer
	from functools import wraps
	from art import tprint
	import edge_tts
	import asyncio
	import tempfile
	import os

	app = Flask(__name__)

	# Use environment variables for API_KEY and PORT
	API_KEY = os.environ.get('API_KEY')
	PORT = int(os.environ.get('PORT', 5000))

	tprint("OPEN SPEECH")
	print(f"OpenSource TTS API Compatible with OpenAI API")
	print(f" ")
	print(f" ---------------------------------------------------------------- ")
	print(f" * Serving OpenVoice API")
	print(f" * Server running on http://localhost:{PORT}")
	print(f" * Voice Endpoint Generated: http://localhost:{PORT}/v1/audio/speech")
	print(f" ")
	print("Press CTRL+C to quit")

	def require_api_key(f):
	@wraps(f)
	def decorated_function(args, *kwargs):
	auth_header = request.headers.get('Authorization')
	if not auth_header or not auth_header.startswith('Bearer '):
	return jsonify({"error": "Missing or invalid API key"}), 401
	token = auth_header.split('Bearer ')[1]
	if token != API_KEY:
	return jsonify({"error": "Invalid API key"}), 401
	return f(args, *kwargs)
	return decorated_function

	@app.route('/v1/audio/speech', methods=['POST'])
	@require_api_key
	def text_to_speech():
	data = request.json
	if not data or 'input' not in data:
	return jsonify({"error": "Missing 'input' in request body"}), 400

	text = data['input']
	model = data.get('model', 'tts-1') # We will ignore this input
	voice = data.get('voice', 'en-US-AriaNeural')

	# Map OpenAI voices to edge-tts voices (this is a simple mapping, you might want to expand it)
	voice_mapping = {
	'alloy': 'en-US-AriaNeural',
	'echo': 'en-US-GuyNeural',
	'fable': 'en-GB-SoniaNeural',
	'onyx': 'en-US-ChristopherNeural',
	'nova': 'en-AU-NatashaNeural',
	'shimmer': 'en-US-JennyNeural'
	}

	edge_tts_voice = voice_mapping.get(voice, voice)

	output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")

	async def generate_speech():
	communicate = edge_tts.Communicate(text, edge_tts_voice)
	await communicate.save(output_file.name)

	asyncio.run(generate_speech())

	return send_file(output_file.name, mimetype="audio/mpeg", as_attachment=True, download_name="speech.mp3")

	@app.route('/v1/models', methods=['GET'])
	@require_api_key
	def list_models():
	# For simplicity, we're returning a fixed list of "models"
	models = [
	{"id": "tts-1", "name": "Text-to-speech v1"},
	{"id": "tts-1-hd", "name": "Text-to-speech v1 HD"}
	]
	return jsonify({"data": models})

	@app.route('/v1/voices', methods=['GET'])
	@require_api_key
	def list_voices():
	voices = edge_tts.list_voices()
	# Transform the voice data to match OpenAI's format
	formatted_voices = [{"name": v['ShortName'], "language": v['Locale']} for v in voices]
	return jsonify({"voices": formatted_voices})

	if __name__ == '__main__':
	if not API_KEY:
	print("Warning: API_KEY environment variable is not set.")
	http_server = WSGIServer(('0.0.0.0', PORT), app)
	http_server.serve_forever()