TTS_Mongolian / main.py

Upload folder using huggingface_hub

cc62aa8 verified 7 months ago

5.41 kB

	from fastapi import FastAPI, Form, UploadFile, File, HTTPException
	from fastapi.responses import HTMLResponse, JSONResponse
	from typing import Optional
	import uvicorn
	from voice_processing import tts, get_model_names, voice_mapping, get_unique_filename
	import os
	import base64
	import numpy as np
	import librosa
	#import soundfile as sf
	from scipy.io import wavfile



	# HTML and JavaScript for frontend (defined as a string)
	html = """
	<!DOCTYPE html>
	<html>
	<head>
	<title>TTS Converter</title>
	<script>
	async function submitForm(event) {
	event.preventDefault();
	const formData = new FormData(document.getElementById('ttsForm'));
	const response = await fetch('/convert', {
	method: 'POST',
	body: formData
	});
	if(response.ok) {
	const data = await response.json();
	document.getElementById('resultInfo').innerText = data.info;
	const audioUrl = data.audio_data_uri;
	document.getElementById('resultAudio').src = audioUrl;
	} else {
	alert('Error: ' + await response.text());
	}
	}

	document.addEventListener('DOMContentLoaded', function() {
	// Populate model and voice dropdowns
	fetch('/models').then(response => response.json()).then(data => {
	const modelSelect = document.getElementById('model');
	data.forEach(model => modelSelect.add(new Option(model, model)));
	});
	fetch('/voices').then(response => response.json()).then(data => {
	const voiceSelect = document.getElementById('voice');
	data.forEach(voice => voiceSelect.add(new Option(voice, voice)));
	});
	});
	</script>
	</head>
	<body>
	<h2>Text-to-Speech Conversion</h2>
	<form id="ttsForm" onsubmit="submitForm(event)">
	<label for="model">Model:</label>
	<select id="model" name="model_name"></select><br><br>
	<label for="text">Text:</label>
	<input type="text" id="text" name="tts_text"><br><br>
	<label for="voice">Voice:</label>
	<select id="voice" name="selected_voice"></select><br><br>
	<label for="rate">Slang Rate:</label>
	<input type="range" id="rate" name="slang_rate" min="0" max="1" step="0.01"><br><br>
	<input type="checkbox" id="uploaded_voice" name="use_uploaded_voice">
	<label for="uploaded_voice">Use Uploaded Voice</label><br><br>
	<input type="file" id="voice_file" name="voice_upload"><br><br>
	<input type="submit" value="Convert">
	</form>
	<p id="resultInfo"></p>
	<audio id="resultAudio" controls></audio>
	</body>
	</html>
	"""

	app = FastAPI()

	@app.get("/")
	def read_root():
	return HTMLResponse(content=html, status_code=200)

	@app.get("/models")
	def get_models():
	return get_model_names()

	@app.get("/voices")
	def get_voices():
	return list(voice_mapping.keys())


	def save_audio_data_to_file(audio_data, sample_rate=40000):
	file_path = get_unique_filename('wav') # Generate a unique file name
	wavfile.write(file_path, sample_rate, audio_data)
	return file_path

	@app.post("/convert")
	async def convert_tts(model_name: str = Form(...),
	tts_text: str = Form("Текстыг оруулна уу."),
	selected_voice: str = Form(...),
	slang_rate: float = Form(...),
	use_uploaded_voice: bool = Form(False),
	voice_upload: Optional[UploadFile] = File(None)):

	edge_tts_voice = voice_mapping.get(selected_voice)
	if not edge_tts_voice:
	raise HTTPException(status_code=400, detail=f"Invalid voice '{selected_voice}'.")

	voice_upload_file = None
	if use_uploaded_voice:
	if voice_upload is None:
	raise HTTPException(status_code=400, detail="No voice file uploaded")
	voice_upload_file = await voice_upload.read()


	# Process the text input or uploaded voice
	info, edge_tts_output_path, tts_output_data,edge_output_file = await tts(
	model_name, tts_text, edge_tts_voice, slang_rate, use_uploaded_voice, voice_upload_file
	)

	if edge_output_file and os.path.exists(edge_output_file):
	os.remove(edge_output_file)

	_, audio_output = tts_output_data

	# Generate a unique filename and save the audio data
	audio_file_path = save_audio_data_to_file(audio_output) if isinstance(audio_output, np.ndarray) else audio_output

	# Encode the audio file to base64
	try:
	with open(audio_file_path, 'rb') as file:
	audio_bytes = file.read()
	audio_data_uri = f"data:audio/wav;base64,{base64.b64encode(audio_bytes).decode('utf-8')}"
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Failed to read audio file: {e}")
	finally:
	# Cleanup the temporary audio file
	if os.path.exists(audio_file_path):
	os.remove(audio_file_path)

	return JSONResponse(content={"info": info, "audio_data_uri": audio_data_uri})

	def convert_to_audio_bytes(audio_file_path):
	try:
	with open(audio_file_path, 'rb') as audio_file:
	return audio_file.read()
	except Exception as e:
	print(f"Error reading audio file: {e}")
	return None




	if __name__ == "__main__":
	uvicorn.run("main:app", host="127.0.0.1", port=8000, reload=True)


	# More routes can be added as needed

	# To run the server, use the command:
	#uvicorn main:app --reload