OpenSpeech-TTS / api_env.py
pantelnm's picture
Upload api_env.py
7fb0b06 verified
from flask import Flask, request, send_file, jsonify
from gevent.pywsgi import WSGIServer
from functools import wraps
from art import tprint
import edge_tts
import asyncio
import tempfile
import os
app = Flask(__name__)
# Use environment variables for API_KEY and PORT
API_KEY = os.environ.get('API_KEY')
PORT = int(os.environ.get('PORT', 5000))
tprint("OPEN SPEECH")
print(f"OpenSource TTS API Compatible with OpenAI API")
print(f" ")
print(f" ---------------------------------------------------------------- ")
print(f" * Serving OpenVoice API")
print(f" * Server running on http://localhost:{PORT}")
print(f" * Voice Endpoint Generated: http://localhost:{PORT}/v1/audio/speech")
print(f" ")
print("Press CTRL+C to quit")
def require_api_key(f):
@wraps(f)
def decorated_function(*args, **kwargs):
auth_header = request.headers.get('Authorization')
if not auth_header or not auth_header.startswith('Bearer '):
return jsonify({"error": "Missing or invalid API key"}), 401
token = auth_header.split('Bearer ')[1]
if token != API_KEY:
return jsonify({"error": "Invalid API key"}), 401
return f(*args, **kwargs)
return decorated_function
@app.route('/v1/audio/speech', methods=['POST'])
@require_api_key
def text_to_speech():
data = request.json
if not data or 'input' not in data:
return jsonify({"error": "Missing 'input' in request body"}), 400
text = data['input']
model = data.get('model', 'tts-1') # We will ignore this input
voice = data.get('voice', 'en-US-AriaNeural')
# Map OpenAI voices to edge-tts voices (this is a simple mapping, you might want to expand it)
voice_mapping = {
'alloy': 'en-US-AriaNeural',
'echo': 'en-US-GuyNeural',
'fable': 'en-GB-SoniaNeural',
'onyx': 'en-US-ChristopherNeural',
'nova': 'en-AU-NatashaNeural',
'shimmer': 'en-US-JennyNeural'
}
edge_tts_voice = voice_mapping.get(voice, voice)
output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
async def generate_speech():
communicate = edge_tts.Communicate(text, edge_tts_voice)
await communicate.save(output_file.name)
asyncio.run(generate_speech())
return send_file(output_file.name, mimetype="audio/mpeg", as_attachment=True, download_name="speech.mp3")
@app.route('/v1/models', methods=['GET'])
@require_api_key
def list_models():
# For simplicity, we're returning a fixed list of "models"
models = [
{"id": "tts-1", "name": "Text-to-speech v1"},
{"id": "tts-1-hd", "name": "Text-to-speech v1 HD"}
]
return jsonify({"data": models})
@app.route('/v1/voices', methods=['GET'])
@require_api_key
def list_voices():
voices = edge_tts.list_voices()
# Transform the voice data to match OpenAI's format
formatted_voices = [{"name": v['ShortName'], "language": v['Locale']} for v in voices]
return jsonify({"voices": formatted_voices})
if __name__ == '__main__':
if not API_KEY:
print("Warning: API_KEY environment variable is not set.")
http_server = WSGIServer(('0.0.0.0', PORT), app)
http_server.serve_forever()