|
from fastapi import FastAPI, Form, UploadFile, File, HTTPException |
|
from fastapi.responses import HTMLResponse, JSONResponse |
|
from typing import Optional |
|
import uvicorn |
|
from voice_processing import tts, get_model_names, voice_mapping, get_unique_filename |
|
import os |
|
import base64 |
|
import numpy as np |
|
import librosa |
|
|
|
from scipy.io import wavfile |
|
|
|
|
|
|
|
|
|
html = """ |
|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<title>TTS Converter</title> |
|
<script> |
|
async function submitForm(event) { |
|
event.preventDefault(); |
|
const formData = new FormData(document.getElementById('ttsForm')); |
|
const response = await fetch('/convert', { |
|
method: 'POST', |
|
body: formData |
|
}); |
|
if(response.ok) { |
|
const data = await response.json(); |
|
document.getElementById('resultInfo').innerText = data.info; |
|
const audioUrl = data.audio_data_uri; |
|
document.getElementById('resultAudio').src = audioUrl; |
|
} else { |
|
alert('Error: ' + await response.text()); |
|
} |
|
} |
|
|
|
document.addEventListener('DOMContentLoaded', function() { |
|
// Populate model and voice dropdowns |
|
fetch('/models').then(response => response.json()).then(data => { |
|
const modelSelect = document.getElementById('model'); |
|
data.forEach(model => modelSelect.add(new Option(model, model))); |
|
}); |
|
fetch('/voices').then(response => response.json()).then(data => { |
|
const voiceSelect = document.getElementById('voice'); |
|
data.forEach(voice => voiceSelect.add(new Option(voice, voice))); |
|
}); |
|
}); |
|
</script> |
|
</head> |
|
<body> |
|
<h2>Text-to-Speech Conversion</h2> |
|
<form id="ttsForm" onsubmit="submitForm(event)"> |
|
<label for="model">Model:</label> |
|
<select id="model" name="model_name"></select><br><br> |
|
<label for="text">Text:</label> |
|
<input type="text" id="text" name="tts_text"><br><br> |
|
<label for="voice">Voice:</label> |
|
<select id="voice" name="selected_voice"></select><br><br> |
|
<label for="rate">Slang Rate:</label> |
|
<input type="range" id="rate" name="slang_rate" min="0" max="1" step="0.01"><br><br> |
|
<input type="checkbox" id="uploaded_voice" name="use_uploaded_voice"> |
|
<label for="uploaded_voice">Use Uploaded Voice</label><br><br> |
|
<input type="file" id="voice_file" name="voice_upload"><br><br> |
|
<input type="submit" value="Convert"> |
|
</form> |
|
<p id="resultInfo"></p> |
|
<audio id="resultAudio" controls></audio> |
|
</body> |
|
</html> |
|
""" |
|
|
|
app = FastAPI() |
|
|
|
@app.get("/") |
|
def read_root(): |
|
return HTMLResponse(content=html, status_code=200) |
|
|
|
@app.get("/models") |
|
def get_models(): |
|
return get_model_names() |
|
|
|
@app.get("/voices") |
|
def get_voices(): |
|
return list(voice_mapping.keys()) |
|
|
|
|
|
def save_audio_data_to_file(audio_data, sample_rate=40000): |
|
file_path = get_unique_filename('wav') |
|
wavfile.write(file_path, sample_rate, audio_data) |
|
return file_path |
|
|
|
@app.post("/convert") |
|
async def convert_tts(model_name: str = Form(...), |
|
tts_text: str = Form("Текстыг оруулна уу."), |
|
selected_voice: str = Form(...), |
|
slang_rate: float = Form(...), |
|
use_uploaded_voice: bool = Form(False), |
|
voice_upload: Optional[UploadFile] = File(None)): |
|
|
|
edge_tts_voice = voice_mapping.get(selected_voice) |
|
if not edge_tts_voice: |
|
raise HTTPException(status_code=400, detail=f"Invalid voice '{selected_voice}'.") |
|
|
|
voice_upload_file = None |
|
if use_uploaded_voice: |
|
if voice_upload is None: |
|
raise HTTPException(status_code=400, detail="No voice file uploaded") |
|
voice_upload_file = await voice_upload.read() |
|
|
|
|
|
|
|
info, edge_tts_output_path, tts_output_data,edge_output_file = await tts( |
|
model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file |
|
) |
|
|
|
if edge_output_file and os.path.exists(edge_output_file): |
|
os.remove(edge_output_file) |
|
|
|
_, audio_output = tts_output_data |
|
|
|
|
|
audio_file_path = save_audio_data_to_file(audio_output) if isinstance(audio_output, np.ndarray) else audio_output |
|
|
|
|
|
try: |
|
with open(audio_file_path, 'rb') as file: |
|
audio_bytes = file.read() |
|
audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}" |
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=f"Failed to read audio file: {e}") |
|
finally: |
|
|
|
if os.path.exists(audio_file_path): |
|
os.remove(audio_file_path) |
|
|
|
return JSONResponse(content={"info": info, "audio_data_uri": audio_data_uri}) |
|
|
|
def convert_to_audio_bytes(audio_file_path): |
|
try: |
|
with open(audio_file_path, 'rb') as audio_file: |
|
return audio_file.read() |
|
except Exception as e: |
|
print(f"Error reading audio file: {e}") |
|
return None |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|