File size: 3,288 Bytes
7fb0b06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from flask import Flask, request, send_file, jsonify
from gevent.pywsgi import WSGIServer
from functools import wraps
from art import tprint
import edge_tts
import asyncio
import tempfile
import os

app = Flask(__name__)

# Use environment variables for API_KEY and PORT
API_KEY = os.environ.get('API_KEY')
PORT = int(os.environ.get('PORT', 5000))

tprint("OPEN SPEECH")
print(f"OpenSource TTS API Compatible with OpenAI API")
print(f" ")
print(f"   ---------------------------------------------------------------- ")
print(f" * Serving OpenVoice API")
print(f" * Server running on http://localhost:{PORT}")
print(f" * Voice Endpoint Generated: http://localhost:{PORT}/v1/audio/speech")
print(f" ")
print("Press CTRL+C to quit")

def require_api_key(f):
    @wraps(f)
    def decorated_function(*args, **kwargs):
        auth_header = request.headers.get('Authorization')
        if not auth_header or not auth_header.startswith('Bearer '):
            return jsonify({"error": "Missing or invalid API key"}), 401
        token = auth_header.split('Bearer ')[1]
        if token != API_KEY:
            return jsonify({"error": "Invalid API key"}), 401
        return f(*args, **kwargs)
    return decorated_function

@app.route('/v1/audio/speech', methods=['POST'])
@require_api_key
def text_to_speech():
    data = request.json
    if not data or 'input' not in data:
        return jsonify({"error": "Missing 'input' in request body"}), 400

    text = data['input']
    model = data.get('model', 'tts-1') # We will ignore this input
    voice = data.get('voice', 'en-US-AriaNeural')

    # Map OpenAI voices to edge-tts voices (this is a simple mapping, you might want to expand it)
    voice_mapping = {
        'alloy': 'en-US-AriaNeural',
        'echo': 'en-US-GuyNeural',
        'fable': 'en-GB-SoniaNeural',
        'onyx': 'en-US-ChristopherNeural',
        'nova': 'en-AU-NatashaNeural',
        'shimmer': 'en-US-JennyNeural'
    }

    edge_tts_voice = voice_mapping.get(voice, voice)

    output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")

    async def generate_speech():
        communicate = edge_tts.Communicate(text, edge_tts_voice)
        await communicate.save(output_file.name)

    asyncio.run(generate_speech())

    return send_file(output_file.name, mimetype="audio/mpeg", as_attachment=True, download_name="speech.mp3")

@app.route('/v1/models', methods=['GET'])
@require_api_key
def list_models():
    # For simplicity, we're returning a fixed list of "models"
    models = [
        {"id": "tts-1", "name": "Text-to-speech v1"},
        {"id": "tts-1-hd", "name": "Text-to-speech v1 HD"}
    ]
    return jsonify({"data": models})

@app.route('/v1/voices', methods=['GET'])
@require_api_key
def list_voices():
    voices = edge_tts.list_voices()
    # Transform the voice data to match OpenAI's format
    formatted_voices = [{"name": v['ShortName'], "language": v['Locale']} for v in voices]
    return jsonify({"voices": formatted_voices})

if __name__ == '__main__':
    if not API_KEY:
        print("Warning: API_KEY environment variable is not set.")
    http_server = WSGIServer(('0.0.0.0', PORT), app)
    http_server.serve_forever()