eng-to-hau

Sleeping

App Files Files Community

eng-to-hau / app.py

Baghdad99

Update app.py

2192037 11 months ago

raw

history blame

2.03 kB

	import gradio as gr
	import requests
	import numpy as np
	from pydub import AudioSegment
	import io
	from IPython.display import Audio

	# Define the Hugging Face Inference API URLs and headers
	ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
	TTS_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/english_voice_tts"
	TRANSLATION_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-hausa-text-to-english-text"
	headers = {"Authorization": "Bearer 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"}

	# Define the function to query the Hugging Face Inference API
	def query(api_url, payload=None, data=None):
	if data is not None:
	response = requests.post(api_url, headers=headers, data=data)
	else:
	response = requests.post(api_url, headers=headers, json=payload)
	return response.json()

	# Define the function to translate speech
	def translate_speech(audio_file):
	print(f"Type of audio: {type(audio_file)}, Value of audio: {audio_file}") # Debug line

	# Use the ASR pipeline to transcribe the audio
	with open(audio_file.name, "rb") as f: # Change this line
	data = f.read()
	output = query(ASR_API_URL, data=data)
	transcription = output["text"]

	# Use the translation pipeline to translate the transcription
	translated_text = query(TRANSLATION_API_URL, {"inputs": transcription})

	# Use the TTS pipeline to synthesize the translated text
	response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
	audio_bytes = response.content

	# Display the audio output
	return Audio(audio_bytes)

	# Define the Gradio interface
	iface = gr.Interface(
	fn=translate_speech,
	inputs=gr.inputs.File(type="file"), # Change this line
	outputs=gr.outputs.Audio(type="numpy"),
	title="Hausa to English Translation",
	description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
	)

	iface.launch()