import os import gradio as gr from google.cloud import speech from google.cloud import texttospeech from nlp import process_query import warnings # Suppress FutureWarning from huggingface_hub warnings.filterwarnings("ignore", category=FutureWarning) # Google credentials setup os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials/credentials.json" # Initialize Google Cloud services speech_client = speech.SpeechClient() tts_client = texttospeech.TextToSpeechClient() # Function for speech-to-text conversion def speech_to_text(audio): audio = speech.RecognitionAudio(content=audio) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="en-US", # You can set it dynamically based on user input ) response = speech_client.recognize(config=config, audio=audio) # Extract the first result if response.results: return response.results[0].alternatives[0].transcript return "" # Function for text-to-speech conversion def text_to_speech(text, language_code="en-US"): synthesis_input = texttospeech.SynthesisInput(text=text) voice = texttospeech.VoiceSelectionParams( language_code=language_code, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL ) audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3) response = tts_client.synthesize_speech( input=synthesis_input, voice=voice, audio_config=audio_config ) return response.audio_content # Function for processing user query def handle_query(audio): # Convert speech to text query_text = speech_to_text(audio) # Check if the query is in regional language and process accordingly language = "en" # Default language if query_text: # You can extend language detection if needed here. response_text = process_query(query_text, language) # Return the response as text and convert it to speech audio_response = text_to_speech(response_text, language_code="en-US") return response_text, audio_response return "Sorry, I didn't catch that. Could you please repeat?", None # Gradio Interface iface = gr.Interface( fn=handle_query, inputs=gr.Audio(type="microphone"), # Updated here to capture audio directly from the microphone outputs=[gr.Textbox(), gr.Audio()], live=True, title="Ayurveda AI Voice Assistant", description="A voice assistant to help with Ayurvedic queries in multiple languages.", ) # Launch the interface if __name__ == "__main__": iface.launch()