""" Created By: ishwor subedi Date: 2024-07-31 """ import os import tempfile from fastapi.responses import JSONResponse from fastapi import Form from fastapi import UploadFile, HTTPException, status from src.models.models import TextToSpeechRequest from fastapi.routing import APIRouter from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline speech_translator_router = APIRouter(tags=["SpeechTranscription"]) pipeline = SpeechTranscriptionPipeline() @speech_translator_router.post( "/text_to_speech", description=""" ** For language refer below points** **Supported Locales:** - **English:** - **Australia:** - **Language:** en - **TLD:** com.au - **United Kingdom:** - **Language:** en - **TLD:** co.uk - **United States:** - **Language:** en - **TLD:** us - **Canada:** - **Language:** en - **TLD:** ca - **India:** - **Language:** en - **TLD:** co.in - **Ireland:** - **Language:** en - **TLD:** ie - **South Africa:** - **Language:** en - **TLD:** co.za - **Nigeria:** - **Language:** en - **TLD:** com.ng - **French:** - **Canada:** - **Language:** fr - **TLD:** ca - **France:** - **Language:** fr - **TLD:** fr - **Mandarin:** - **China Mainland:** - **Language:** zh-CN - **TLD:** any - **Taiwan:** - **Language:** zh-TW - **TLD:** any - **Portuguese:** - **Brazil:** - **Language:** pt - **TLD:** com.br - **Portugal:** - **Language:** pt - **TLD:** pt - **Spanish:** - **Mexico:** - **Language:** es - **TLD:** com.mx - **Spain:** - **Language:** es - **TLD:** es - **United States:** - **Language:** es - **TLD:** us """ ) async def text_to_speech(request: TextToSpeechRequest): try: audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld) if not audio_bytes: raise ValueError("Audio generation failed.") return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200) except ValueError as ve: raise HTTPException(status_code=400, detail=str(ve)) except Exception as e: raise HTTPException(status_code=500, detail="Internal Server Error") @speech_translator_router.post( "/speech_to_text", description=""" ** Specify the language used in the audio ** **Supported Languages:** **Major Languages:** - **English:** en - **Mandarin Chinese:** zh - **Spanish:** es - **French:** fr - **German:** de - **Italian:** it - **Japanese:** ja - **Korean:** ko - **Russian:** ru - **Portuguese:** pt - **Arabic:** ar **Additional Languages:** - **Indic Languages:** - **Hindi:** hi - **Bengali:** bn - **Tamil:** ta - **Telugu:** te - **Southeast Asian Languages:** - **Vietnamese:** vi - **Thai:** th - **Indonesian:** id - **Malay:** ms - **African Languages:** - **Swahili:** sw - **Yoruba:** yo - **Hausa:** ha - **European Languages:** - **Polish:** pl - **Dutch:** nl - **Swedish:** sv - **Norwegian:** no """ ) async def speech_to_text(audio: UploadFile, lang: str = Form(...)): try: audio_bytes = await audio.read() if not audio_bytes: raise ValueError("Empty audio file") except Exception as e: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid audio file" ) try: with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file: temp_audio_file.write(audio_bytes) temp_audio_file_path = temp_audio_file.name except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Could not process audio file" ) try: transcript = pipeline.speech_to_text(temp_audio_file_path, lang) except FileNotFoundError as fnfe: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Temporary file not found" ) except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Error processing speech-to-text" ) finally: if os.path.exists(temp_audio_file_path): os.remove(temp_audio_file_path) return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)