eng-to-hau

Sleeping

File size: 1,894 Bytes

5b74a4b
fcc244c
 
5b74a4b
fcc244c
 
 
 
 
425531b
fcc244c
 
 
 
5b74a4b
fcc244c
 
 
 
 
 
 
 
 
 
 
a48f8e0
fcc244c
a48f8e0
5b74a4b
72632b9
fcc244c
c58bd88
fcc244c
 
 
17cfe18
fcc244c
a5ec736
b2c7d3a
5b74a4b
 
fcc244c
 
5b74a4b
 
 
 
b2c7d3a

import gradio as gr
import requests
from IPython.display import Audio

# Define the Hugging Face Inference API URLs and headers
ASR_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-speech-recognition-hausa-audio-to-text"
TTS_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/english_voice_tts"
TRANSLATION_API_URL = "https://api-inference.huggingface.co/models/Baghdad99/saad-hausa-text-to-english-text"
headers = {"Authorization": "Bearer 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"}

# Define the function to query the Hugging Face Inference API
def query(api_url, payload):
    response = requests.post(api_url, headers=headers, json=payload)
    return response.json()

# Define the function to translate speech
def translate_speech(audio):
    # Use the ASR pipeline to transcribe the audio
    with open(audio.name, "rb") as f:
        data = f.read()
    response = requests.post(ASR_API_URL, headers=headers, data=data)
    output = response.json()

    # Check if the output contains 'text'
    if 'text' in output:
        transcription = output["text"]
    else:
        print("The output does not contain 'text'")
        return

    # Use the translation pipeline to translate the transcription
    translated_text = query(TRANSLATION_API_URL, {"inputs": transcription})

    # Use the TTS pipeline to synthesize the translated text
    response = requests.post(TTS_API_URL, headers=headers, json={"inputs": translated_text})
    audio_bytes = response.content

    return audio_bytes

# Define the Gradio interface
iface = gr.Interface(
    fn=translate_speech, 
    inputs=gr.inputs.Audio(source="microphone", type="file"), 
    outputs=gr.outputs.Audio(type="auto"),
    title="Hausa to English Translation",
    description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
)

iface.launch()