File size: 5,407 Bytes

cc62aa8

from fastapi import FastAPI, Form, UploadFile, File, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse
from typing import Optional
import uvicorn
from voice_processing import tts, get_model_names, voice_mapping, get_unique_filename
import os
import base64
import numpy as np
import librosa
#import soundfile as sf
from scipy.io import wavfile



# HTML and JavaScript for frontend (defined as a string)
html = """
<!DOCTYPE html>
<html>
<head>
<title>TTS Converter</title>
<script>
    async function submitForm(event) {
        event.preventDefault();
        const formData = new FormData(document.getElementById('ttsForm'));
        const response = await fetch('/convert', {
            method: 'POST',
            body: formData
        });
        if(response.ok) {
            const data = await response.json();
            document.getElementById('resultInfo').innerText = data.info;
            const audioUrl = data.audio_data_uri;
            document.getElementById('resultAudio').src = audioUrl;
        } else {
            alert('Error: ' + await response.text());
        }
    }

    document.addEventListener('DOMContentLoaded', function() {
        // Populate model and voice dropdowns
        fetch('/models').then(response => response.json()).then(data => {
            const modelSelect = document.getElementById('model');
            data.forEach(model => modelSelect.add(new Option(model, model)));
        });
        fetch('/voices').then(response => response.json()).then(data => {
            const voiceSelect = document.getElementById('voice');
            data.forEach(voice => voiceSelect.add(new Option(voice, voice)));
        });
    });
</script>
</head>
<body>
<h2>Text-to-Speech Conversion</h2>
<form id="ttsForm" onsubmit="submitForm(event)">
    <label for="model">Model:</label>
    <select id="model" name="model_name"></select><br><br>
    <label for="text">Text:</label>
    <input type="text" id="text" name="tts_text"><br><br>
    <label for="voice">Voice:</label>
    <select id="voice" name="selected_voice"></select><br><br>
    <label for="rate">Slang Rate:</label>
    <input type="range" id="rate" name="slang_rate" min="0" max="1" step="0.01"><br><br>
    <input type="checkbox" id="uploaded_voice" name="use_uploaded_voice">
    <label for="uploaded_voice">Use Uploaded Voice</label><br><br>
    <input type="file" id="voice_file" name="voice_upload"><br><br>
    <input type="submit" value="Convert">
</form>
<p id="resultInfo"></p>
<audio id="resultAudio" controls></audio>
</body>
</html>
"""

app = FastAPI()

@app.get("/")
def read_root():
    return HTMLResponse(content=html, status_code=200)

@app.get("/models")
def get_models():
    return get_model_names()

@app.get("/voices")
def get_voices():
    return list(voice_mapping.keys())


def save_audio_data_to_file(audio_data, sample_rate=40000):
    file_path = get_unique_filename('wav')  # Generate a unique file name
    wavfile.write(file_path, sample_rate, audio_data)
    return file_path

@app.post("/convert")
async def convert_tts(model_name: str = Form(...), 
                      tts_text: str = Form("Текстыг оруулна уу."), 
                      selected_voice: str = Form(...), 
                      slang_rate: float = Form(...), 
                      use_uploaded_voice: bool = Form(False), 
                      voice_upload: Optional[UploadFile] = File(None)):

    edge_tts_voice = voice_mapping.get(selected_voice)
    if not edge_tts_voice:
        raise HTTPException(status_code=400, detail=f"Invalid voice '{selected_voice}'.")

    voice_upload_file = None
    if use_uploaded_voice:
        if voice_upload is None:
            raise HTTPException(status_code=400, detail="No voice file uploaded")
        voice_upload_file = await voice_upload.read()
        

    # Process the text input or uploaded voice
    info, edge_tts_output_path, tts_output_data,edge_output_file  = await tts(
        model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
    )

    if edge_output_file and os.path.exists(edge_output_file):
        os.remove(edge_output_file)

    _, audio_output = tts_output_data

    # Generate a unique filename and save the audio data
    audio_file_path = save_audio_data_to_file(audio_output) if isinstance(audio_output, np.ndarray) else audio_output

    # Encode the audio file to base64
    try:
        with open(audio_file_path, 'rb') as file:
            audio_bytes = file.read()
        audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Failed to read audio file: {e}")
    finally:
        # Cleanup the temporary audio file
        if os.path.exists(audio_file_path):
            os.remove(audio_file_path)

    return JSONResponse(content={"info": info, "audio_data_uri": audio_data_uri})

def convert_to_audio_bytes(audio_file_path):
    try:
        with open(audio_file_path, 'rb') as audio_file:
            return audio_file.read()
    except Exception as e:
        print(f"Error reading audio file: {e}")
        return None




if __name__ == "__main__":
    uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)


# More routes can be added as needed

# To run the server, use the command:
#uvicorn main:app --reload