MAZALA2024's picture
Rename main.py to m.py
294bfd5 verified
raw
history blame
5.41 kB
from fastapi import FastAPI, Form, UploadFile, File, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse
from typing import Optional
import uvicorn
from voice_processing import tts, get_model_names, voice_mapping, get_unique_filename
import os
import base64
import numpy as np
import librosa
#import soundfile as sf
from scipy.io import wavfile
# HTML and JavaScript for frontend (defined as a string)
html = """
<!DOCTYPE html>
<html>
<head>
<title>TTS Converter</title>
<script>
async function submitForm(event) {
event.preventDefault();
const formData = new FormData(document.getElementById('ttsForm'));
const response = await fetch('/convert', {
method: 'POST',
body: formData
});
if(response.ok) {
const data = await response.json();
document.getElementById('resultInfo').innerText = data.info;
const audioUrl = data.audio_data_uri;
document.getElementById('resultAudio').src = audioUrl;
} else {
alert('Error: ' + await response.text());
}
}
document.addEventListener('DOMContentLoaded', function() {
// Populate model and voice dropdowns
fetch('/models').then(response => response.json()).then(data => {
const modelSelect = document.getElementById('model');
data.forEach(model => modelSelect.add(new Option(model, model)));
});
fetch('/voices').then(response => response.json()).then(data => {
const voiceSelect = document.getElementById('voice');
data.forEach(voice => voiceSelect.add(new Option(voice, voice)));
});
});
</script>
</head>
<body>
<h2>Text-to-Speech Conversion</h2>
<form id="ttsForm" onsubmit="submitForm(event)">
<label for="model">Model:</label>
<select id="model" name="model_name"></select><br><br>
<label for="text">Text:</label>
<input type="text" id="text" name="tts_text"><br><br>
<label for="voice">Voice:</label>
<select id="voice" name="selected_voice"></select><br><br>
<label for="rate">Slang Rate:</label>
<input type="range" id="rate" name="slang_rate" min="0" max="1" step="0.01"><br><br>
<input type="checkbox" id="uploaded_voice" name="use_uploaded_voice">
<label for="uploaded_voice">Use Uploaded Voice</label><br><br>
<input type="file" id="voice_file" name="voice_upload"><br><br>
<input type="submit" value="Convert">
</form>
<p id="resultInfo"></p>
<audio id="resultAudio" controls></audio>
</body>
</html>
"""
app = FastAPI()
@app.get("/")
def read_root():
return HTMLResponse(content=html, status_code=200)
@app.get("/models")
def get_models():
return get_model_names()
@app.get("/voices")
def get_voices():
return list(voice_mapping.keys())
def save_audio_data_to_file(audio_data, sample_rate=40000):
file_path = get_unique_filename('wav') # Generate a unique file name
wavfile.write(file_path, sample_rate, audio_data)
return file_path
@app.post("/convert")
async def convert_tts(model_name: str = Form(...),
tts_text: str = Form("Текстыг оруулна уу."),
selected_voice: str = Form(...),
slang_rate: float = Form(...),
use_uploaded_voice: bool = Form(False),
voice_upload: Optional[UploadFile] = File(None)):
edge_tts_voice = voice_mapping.get(selected_voice)
if not edge_tts_voice:
raise HTTPException(status_code=400, detail=f"Invalid voice '{selected_voice}'.")
voice_upload_file = None
if use_uploaded_voice:
if voice_upload is None:
raise HTTPException(status_code=400, detail="No voice file uploaded")
voice_upload_file = await voice_upload.read()
# Process the text input or uploaded voice
info, edge_tts_output_path, tts_output_data,edge_output_file = await tts(
model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
)
if edge_output_file and os.path.exists(edge_output_file):
os.remove(edge_output_file)
_, audio_output = tts_output_data
# Generate a unique filename and save the audio data
audio_file_path = save_audio_data_to_file(audio_output) if isinstance(audio_output, np.ndarray) else audio_output
# Encode the audio file to base64
try:
with open(audio_file_path, 'rb') as file:
audio_bytes = file.read()
audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to read audio file: {e}")
finally:
# Cleanup the temporary audio file
if os.path.exists(audio_file_path):
os.remove(audio_file_path)
return JSONResponse(content={"info": info, "audio_data_uri": audio_data_uri})
def convert_to_audio_bytes(audio_file_path):
try:
with open(audio_file_path, 'rb') as audio_file:
return audio_file.read()
except Exception as e:
print(f"Error reading audio file: {e}")
return None
if __name__ == "__main__":
uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)
# More routes can be added as needed
# To run the server, use the command:
#uvicorn main:app --reload