from flask import Flask, request, send_file, jsonify from gevent.pywsgi import WSGIServer from functools import wraps from art import tprint import edge_tts import asyncio import tempfile import os app = Flask(__name__) # Use environment variables for API_KEY and PORT API_KEY = os.environ.get('API_KEY') PORT = int(os.environ.get('PORT', 5000)) tprint("OPEN SPEECH") print(f"OpenSource TTS API Compatible with OpenAI API") print(f" ") print(f" ---------------------------------------------------------------- ") print(f" * Serving OpenVoice API") print(f" * Server running on http://localhost:{PORT}") print(f" * Voice Endpoint Generated: http://localhost:{PORT}/v1/audio/speech") print(f" ") print("Press CTRL+C to quit") def require_api_key(f): @wraps(f) def decorated_function(*args, **kwargs): auth_header = request.headers.get('Authorization') if not auth_header or not auth_header.startswith('Bearer '): return jsonify({"error": "Missing or invalid API key"}), 401 token = auth_header.split('Bearer ')[1] if token != API_KEY: return jsonify({"error": "Invalid API key"}), 401 return f(*args, **kwargs) return decorated_function @app.route('/v1/audio/speech', methods=['POST']) @require_api_key def text_to_speech(): data = request.json if not data or 'input' not in data: return jsonify({"error": "Missing 'input' in request body"}), 400 text = data['input'] model = data.get('model', 'tts-1') # We will ignore this input voice = data.get('voice', 'en-US-AriaNeural') # Map OpenAI voices to edge-tts voices (this is a simple mapping, you might want to expand it) voice_mapping = { 'alloy': 'en-US-AriaNeural', 'echo': 'en-US-GuyNeural', 'fable': 'en-GB-SoniaNeural', 'onyx': 'en-US-ChristopherNeural', 'nova': 'en-AU-NatashaNeural', 'shimmer': 'en-US-JennyNeural' } edge_tts_voice = voice_mapping.get(voice, voice) output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") async def generate_speech(): communicate = edge_tts.Communicate(text, edge_tts_voice) await communicate.save(output_file.name) asyncio.run(generate_speech()) return send_file(output_file.name, mimetype="audio/mpeg", as_attachment=True, download_name="speech.mp3") @app.route('/v1/models', methods=['GET']) @require_api_key def list_models(): # For simplicity, we're returning a fixed list of "models" models = [ {"id": "tts-1", "name": "Text-to-speech v1"}, {"id": "tts-1-hd", "name": "Text-to-speech v1 HD"} ] return jsonify({"data": models}) @app.route('/v1/voices', methods=['GET']) @require_api_key def list_voices(): voices = edge_tts.list_voices() # Transform the voice data to match OpenAI's format formatted_voices = [{"name": v['ShortName'], "language": v['Locale']} for v in voices] return jsonify({"voices": formatted_voices}) if __name__ == '__main__': if not API_KEY: print("Warning: API_KEY environment variable is not set.") http_server = WSGIServer(('0.0.0.0', PORT), app) http_server.serve_forever()