File size: 5,407 Bytes
cc62aa8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
from fastapi import FastAPI, Form, UploadFile, File, HTTPException
from fastapi.responses import HTMLResponse, JSONResponse
from typing import Optional
import uvicorn
from voice_processing import tts, get_model_names, voice_mapping, get_unique_filename
import os
import base64
import numpy as np
import librosa
#import soundfile as sf
from scipy.io import wavfile
# HTML and JavaScript for frontend (defined as a string)
html = """
<!DOCTYPE html>
<html>
<head>
<title>TTS Converter</title>
<script>
async function submitForm(event) {
event.preventDefault();
const formData = new FormData(document.getElementById('ttsForm'));
const response = await fetch('/convert', {
method: 'POST',
body: formData
});
if(response.ok) {
const data = await response.json();
document.getElementById('resultInfo').innerText = data.info;
const audioUrl = data.audio_data_uri;
document.getElementById('resultAudio').src = audioUrl;
} else {
alert('Error: ' + await response.text());
}
}
document.addEventListener('DOMContentLoaded', function() {
// Populate model and voice dropdowns
fetch('/models').then(response => response.json()).then(data => {
const modelSelect = document.getElementById('model');
data.forEach(model => modelSelect.add(new Option(model, model)));
});
fetch('/voices').then(response => response.json()).then(data => {
const voiceSelect = document.getElementById('voice');
data.forEach(voice => voiceSelect.add(new Option(voice, voice)));
});
});
</script>
</head>
<body>
<h2>Text-to-Speech Conversion</h2>
<form id="ttsForm" onsubmit="submitForm(event)">
<label for="model">Model:</label>
<select id="model" name="model_name"></select><br><br>
<label for="text">Text:</label>
<input type="text" id="text" name="tts_text"><br><br>
<label for="voice">Voice:</label>
<select id="voice" name="selected_voice"></select><br><br>
<label for="rate">Slang Rate:</label>
<input type="range" id="rate" name="slang_rate" min="0" max="1" step="0.01"><br><br>
<input type="checkbox" id="uploaded_voice" name="use_uploaded_voice">
<label for="uploaded_voice">Use Uploaded Voice</label><br><br>
<input type="file" id="voice_file" name="voice_upload"><br><br>
<input type="submit" value="Convert">
</form>
<p id="resultInfo"></p>
<audio id="resultAudio" controls></audio>
</body>
</html>
"""
app = FastAPI()
@app.get("/")
def read_root():
return HTMLResponse(content=html, status_code=200)
@app.get("/models")
def get_models():
return get_model_names()
@app.get("/voices")
def get_voices():
return list(voice_mapping.keys())
def save_audio_data_to_file(audio_data, sample_rate=40000):
file_path = get_unique_filename('wav') # Generate a unique file name
wavfile.write(file_path, sample_rate, audio_data)
return file_path
@app.post("/convert")
async def convert_tts(model_name: str = Form(...),
tts_text: str = Form("Текстыг оруулна уу."),
selected_voice: str = Form(...),
slang_rate: float = Form(...),
use_uploaded_voice: bool = Form(False),
voice_upload: Optional[UploadFile] = File(None)):
edge_tts_voice = voice_mapping.get(selected_voice)
if not edge_tts_voice:
raise HTTPException(status_code=400, detail=f"Invalid voice '{selected_voice}'.")
voice_upload_file = None
if use_uploaded_voice:
if voice_upload is None:
raise HTTPException(status_code=400, detail="No voice file uploaded")
voice_upload_file = await voice_upload.read()
# Process the text input or uploaded voice
info, edge_tts_output_path, tts_output_data,edge_output_file = await tts(
model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
)
if edge_output_file and os.path.exists(edge_output_file):
os.remove(edge_output_file)
_, audio_output = tts_output_data
# Generate a unique filename and save the audio data
audio_file_path = save_audio_data_to_file(audio_output) if isinstance(audio_output, np.ndarray) else audio_output
# Encode the audio file to base64
try:
with open(audio_file_path, 'rb') as file:
audio_bytes = file.read()
audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to read audio file: {e}")
finally:
# Cleanup the temporary audio file
if os.path.exists(audio_file_path):
os.remove(audio_file_path)
return JSONResponse(content={"info": info, "audio_data_uri": audio_data_uri})
def convert_to_audio_bytes(audio_file_path):
try:
with open(audio_file_path, 'rb') as audio_file:
return audio_file.read()
except Exception as e:
print(f"Error reading audio file: {e}")
return None
if __name__ == "__main__":
uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)
# More routes can be added as needed
# To run the server, use the command:
#uvicorn main:app --reload
|