ConversAI / src /api /speech_api.py
ishworrsubedii's picture
Integrated speech transcription
1a05dd7
raw
history blame
4.88 kB
"""
Created By: ishwor subedi
Date: 2024-07-31
"""
import os
import tempfile
from fastapi.responses import JSONResponse
from fastapi import Form
from fastapi import UploadFile, HTTPException, status
from src.models.models import TextToSpeechRequest
from fastapi.routing import APIRouter
from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline
speech_translator_router = APIRouter(tags=["SpeechTranscription"])
pipeline = SpeechTranscriptionPipeline()
@speech_translator_router.post(
"/text_to_speech",
description="""
** For language refer below points**
**Supported Locales:**
- **English:**
- **Australia:**
- **Language:** en
- **TLD:** com.au
- **United Kingdom:**
- **Language:** en
- **TLD:** co.uk
- **United States:**
- **Language:** en
- **TLD:** us
- **Canada:**
- **Language:** en
- **TLD:** ca
- **India:**
- **Language:** en
- **TLD:** co.in
- **Ireland:**
- **Language:** en
- **TLD:** ie
- **South Africa:**
- **Language:** en
- **TLD:** co.za
- **Nigeria:**
- **Language:** en
- **TLD:** com.ng
- **French:**
- **Canada:**
- **Language:** fr
- **TLD:** ca
- **France:**
- **Language:** fr
- **TLD:** fr
- **Mandarin:**
- **China Mainland:**
- **Language:** zh-CN
- **TLD:** any
- **Taiwan:**
- **Language:** zh-TW
- **TLD:** any
- **Portuguese:**
- **Brazil:**
- **Language:** pt
- **TLD:** com.br
- **Portugal:**
- **Language:** pt
- **TLD:** pt
- **Spanish:**
- **Mexico:**
- **Language:** es
- **TLD:** com.mx
- **Spain:**
- **Language:** es
- **TLD:** es
- **United States:**
- **Language:** es
- **TLD:** us
"""
)
async def text_to_speech(request: TextToSpeechRequest):
try:
audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
if not audio_bytes:
raise ValueError("Audio generation failed.")
return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200)
except ValueError as ve:
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
raise HTTPException(status_code=500, detail="Internal Server Error")
@speech_translator_router.post(
"/speech_to_text",
description="""
** Specify the language used in the audio **
**Supported Languages:**
**Major Languages:**
- **English:** en
- **Mandarin Chinese:** zh
- **Spanish:** es
- **French:** fr
- **German:** de
- **Italian:** it
- **Japanese:** ja
- **Korean:** ko
- **Russian:** ru
- **Portuguese:** pt
- **Arabic:** ar
**Additional Languages:**
- **Indic Languages:**
- **Hindi:** hi
- **Bengali:** bn
- **Tamil:** ta
- **Telugu:** te
- **Southeast Asian Languages:**
- **Vietnamese:** vi
- **Thai:** th
- **Indonesian:** id
- **Malay:** ms
- **African Languages:**
- **Swahili:** sw
- **Yoruba:** yo
- **Hausa:** ha
- **European Languages:**
- **Polish:** pl
- **Dutch:** nl
- **Swedish:** sv
- **Norwegian:** no
"""
)
async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
try:
audio_bytes = await audio.read()
if not audio_bytes:
raise ValueError("Empty audio file")
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid audio file"
)
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
temp_audio_file.write(audio_bytes)
temp_audio_file_path = temp_audio_file.name
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Could not process audio file"
)
try:
transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
except FileNotFoundError as fnfe:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Temporary file not found"
)
except Exception as e:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Error processing speech-to-text"
)
finally:
if os.path.exists(temp_audio_file_path):
os.remove(temp_audio_file_path)
return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)