ayureasehealthcare's picture
Update app.py
25abd69 verified
import os
import gradio as gr
from google.cloud import speech
from google.cloud import texttospeech
from nlp import process_query
import warnings
# Suppress FutureWarning from huggingface_hub
warnings.filterwarnings("ignore", category=FutureWarning)
# Google credentials setup
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials/credentials.json"
# Initialize Google Cloud services
speech_client = speech.SpeechClient()
tts_client = texttospeech.TextToSpeechClient()
# Function for speech-to-text conversion
def speech_to_text(audio):
audio = speech.RecognitionAudio(content=audio)
config = speech.RecognitionConfig(
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code="en-US", # You can set it dynamically based on user input
)
response = speech_client.recognize(config=config, audio=audio)
# Extract the first result
if response.results:
return response.results[0].alternatives[0].transcript
return ""
# Function for text-to-speech conversion
def text_to_speech(text, language_code="en-US"):
synthesis_input = texttospeech.SynthesisInput(text=text)
voice = texttospeech.VoiceSelectionParams(
language_code=language_code, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
)
audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
response = tts_client.synthesize_speech(
input=synthesis_input, voice=voice, audio_config=audio_config
)
return response.audio_content
# Function for processing user query
def handle_query(audio):
# Convert speech to text
query_text = speech_to_text(audio)
# Check if the query is in regional language and process accordingly
language = "en" # Default language
if query_text:
# You can extend language detection if needed here.
response_text = process_query(query_text, language)
# Return the response as text and convert it to speech
audio_response = text_to_speech(response_text, language_code="en-US")
return response_text, audio_response
return "Sorry, I didn't catch that. Could you please repeat?", None
# Gradio Interface
iface = gr.Interface(
fn=handle_query,
inputs=gr.Audio(type="microphone"), # Updated here to capture audio directly from the microphone
outputs=[gr.Textbox(), gr.Audio()],
live=True,
title="Ayurveda AI Voice Assistant",
description="A voice assistant to help with Ayurvedic queries in multiple languages.",
)
# Launch the interface
if __name__ == "__main__":
iface.launch()