Spaces:

techconspartners
/

ConversAI

Sleeping

App Files Files Community

ConversAI / src /api /speech_api.py

ishworrsubedii

Integrated speech transcription

1a05dd7 2 months ago

raw

history blame

4.88 kB

	"""
	Created By: ishwor subedi
	Date: 2024-07-31
	"""
	import os
	import tempfile
	from fastapi.responses import JSONResponse
	from fastapi import Form
	from fastapi import UploadFile, HTTPException, status
	from src.models.models import TextToSpeechRequest
	from fastapi.routing import APIRouter
	from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline

	speech_translator_router = APIRouter(tags=["SpeechTranscription"])
	pipeline = SpeechTranscriptionPipeline()


	@speech_translator_router.post(
	"/text_to_speech",
	description="""
	For language refer below points
	Supported Locales:

	- English:
	- Australia:
	- Language: en
	- TLD: com.au
	- United Kingdom:
	- Language: en
	- TLD: co.uk
	- United States:
	- Language: en
	- TLD: us
	- Canada:
	- Language: en
	- TLD: ca
	- India:
	- Language: en
	- TLD: co.in
	- Ireland:
	- Language: en
	- TLD: ie
	- South Africa:
	- Language: en
	- TLD: co.za
	- Nigeria:
	- Language: en
	- TLD: com.ng

	- French:
	- Canada:
	- Language: fr
	- TLD: ca
	- France:
	- Language: fr
	- TLD: fr

	- Mandarin:
	- China Mainland:
	- Language: zh-CN
	- TLD: any
	- Taiwan:
	- Language: zh-TW
	- TLD: any

	- Portuguese:
	- Brazil:
	- Language: pt
	- TLD: com.br
	- Portugal:
	- Language: pt
	- TLD: pt

	- Spanish:
	- Mexico:
	- Language: es
	- TLD: com.mx
	- Spain:
	- Language: es
	- TLD: es
	- United States:
	- Language: es
	- TLD: us
	"""
	)
	async def text_to_speech(request: TextToSpeechRequest):
	try:
	audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
	if not audio_bytes:
	raise ValueError("Audio generation failed.")
	return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200)
	except ValueError as ve:
	raise HTTPException(status_code=400, detail=str(ve))
	except Exception as e:
	raise HTTPException(status_code=500, detail="Internal Server Error")


	@speech_translator_router.post(
	"/speech_to_text",
	description="""
	Specify the language used in the audio
	Supported Languages:

	Major Languages:
	- English: en
	- Mandarin Chinese: zh
	- Spanish: es
	- French: fr
	- German: de
	- Italian: it
	- Japanese: ja
	- Korean: ko
	- Russian: ru
	- Portuguese: pt
	- Arabic: ar

	Additional Languages:

	- Indic Languages:
	- Hindi: hi
	- Bengali: bn
	- Tamil: ta
	- Telugu: te

	- Southeast Asian Languages:
	- Vietnamese: vi
	- Thai: th
	- Indonesian: id
	- Malay: ms

	- African Languages:
	- Swahili: sw
	- Yoruba: yo
	- Hausa: ha

	- European Languages:
	- Polish: pl
	- Dutch: nl
	- Swedish: sv
	- Norwegian: no
	"""
	)
	async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
	try:
	audio_bytes = await audio.read()
	if not audio_bytes:
	raise ValueError("Empty audio file")
	except Exception as e:
	raise HTTPException(
	status_code=status.HTTP_400_BAD_REQUEST,
	detail="Invalid audio file"
	)

	try:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
	temp_audio_file.write(audio_bytes)
	temp_audio_file_path = temp_audio_file.name
	except Exception as e:
	raise HTTPException(
	status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
	detail="Could not process audio file"
	)

	try:
	transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
	except FileNotFoundError as fnfe:
	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail="Temporary file not found"
	)
	except Exception as e:
	raise HTTPException(
	status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
	detail="Error processing speech-to-text"
	)
	finally:
	if os.path.exists(temp_audio_file_path):
	os.remove(temp_audio_file_path)

	return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)

	"""
	Created By: ishwor subedi
	Date: 2024-07-31
	"""
	import os
	import tempfile
	from fastapi.responses import JSONResponse
	from fastapi import Form
	from fastapi import UploadFile, HTTPException, status
	from src.models.models import TextToSpeechRequest
	from fastapi.routing import APIRouter
	from src.pipeline.speech_transcription_pipeline import SpeechTranscriptionPipeline

	speech_translator_router = APIRouter(tags=["SpeechTranscription"])
	pipeline = SpeechTranscriptionPipeline()


	@speech_translator_router.post(
	"/text_to_speech",
	description="""
	For language refer below points
	Supported Locales:

	- English:
	- Australia:
	- Language: en
	- TLD: com.au
	- United Kingdom:
	- Language: en
	- TLD: co.uk
	- United States:
	- Language: en
	- TLD: us
	- Canada:
	- Language: en
	- TLD: ca
	- India:
	- Language: en
	- TLD: co.in
	- Ireland:
	- Language: en
	- TLD: ie
	- South Africa:
	- Language: en
	- TLD: co.za
	- Nigeria:
	- Language: en
	- TLD: com.ng

	- French:
	- Canada:
	- Language: fr
	- TLD: ca
	- France:
	- Language: fr
	- TLD: fr

	- Mandarin:
	- China Mainland:
	- Language: zh-CN
	- TLD: any
	- Taiwan:
	- Language: zh-TW
	- TLD: any

	- Portuguese:
	- Brazil:
	- Language: pt
	- TLD: com.br
	- Portugal:
	- Language: pt
	- TLD: pt

	- Spanish:
	- Mexico:
	- Language: es
	- TLD: com.mx
	- Spain:
	- Language: es
	- TLD: es
	- United States:
	- Language: es
	- TLD: us
	"""
	)
	async def text_to_speech(request: TextToSpeechRequest):
	try:
	audio_bytes = pipeline.text_to_speech(request.text, request.lang, request.tld)
	if not audio_bytes:
	raise ValueError("Audio generation failed.")
	return JSONResponse(content={"audio": audio_bytes, "status_code": status.HTTP_200_OK}, status_code=200)
	except ValueError as ve:
	raise HTTPException(status_code=400, detail=str(ve))
	except Exception as e:
	raise HTTPException(status_code=500, detail="Internal Server Error")


	@speech_translator_router.post(
	"/speech_to_text",
	description="""
	Specify the language used in the audio
	Supported Languages:

	Major Languages:
	- English: en
	- Mandarin Chinese: zh
	- Spanish: es
	- French: fr
	- German: de
	- Italian: it
	- Japanese: ja
	- Korean: ko
	- Russian: ru
	- Portuguese: pt
	- Arabic: ar

	Additional Languages:

	- Indic Languages:
	- Hindi: hi
	- Bengali: bn
	- Tamil: ta
	- Telugu: te

	- Southeast Asian Languages:
	- Vietnamese: vi
	- Thai: th
	- Indonesian: id
	- Malay: ms

	- African Languages:
	- Swahili: sw
	- Yoruba: yo
	- Hausa: ha

	- European Languages:
	- Polish: pl
	- Dutch: nl
	- Swedish: sv
	- Norwegian: no
	"""
	)
	async def speech_to_text(audio: UploadFile, lang: str = Form(...)):
	try:
	audio_bytes = await audio.read()
	if not audio_bytes:
	raise ValueError("Empty audio file")
	except Exception as e:
	raise HTTPException(
	status_code=status.HTTP_400_BAD_REQUEST,
	detail="Invalid audio file"
	)

	try:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
	temp_audio_file.write(audio_bytes)
	temp_audio_file_path = temp_audio_file.name
	except Exception as e:
	raise HTTPException(
	status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
	detail="Could not process audio file"
	)

	try:
	transcript = pipeline.speech_to_text(temp_audio_file_path, lang)
	except FileNotFoundError as fnfe:
	raise HTTPException(
	status_code=status.HTTP_404_NOT_FOUND,
	detail="Temporary file not found"
	)
	except Exception as e:
	raise HTTPException(
	status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
	detail="Error processing speech-to-text"
	)
	finally:
	if os.path.exists(temp_audio_file_path):
	os.remove(temp_audio_file_path)

	return JSONResponse(content={"transcript": transcript, "status_code": status.HTTP_200_OK}, status_code=200)