Spaces:

ayureasehealthcare
/

Ayurezeassistant

Runtime error

App Files Files Community

ayureasehealthcare commited on Dec 26, 2024

Commit

93ca6c9

verified ·

1 Parent(s): aa02ad6

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -57

app.py CHANGED Viewed

@@ -1,85 +1,76 @@
 import os
 import gradio as gr
-from google.cloud import speech, texttospeech
 from nlp import process_query
-from utils.language_support import detect_language, translate_text
-# Set Google credentials
-os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials/google_credentials.json"
-# Initialize Google Speech-to-Text client
-speech_client = speech.SpeechClient()
-# Initialize Google Text-to-Speech client
 tts_client = texttospeech.TextToSpeechClient()
-# Function to transcribe speech to text
-def transcribe_speech(audio_file):
-    with open(audio_file, "rb") as f:
-        audio_data = f.read()
-    audio = speech.RecognitionAudio(content=audio_data)
     config = speech.RecognitionConfig(
         encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
-        language_code="ta-IN",  # Default to Tamil; can detect dynamically
-        enable_automatic_punctuation=True
     )
     response = speech_client.recognize(config=config, audio=audio)
     if response.results:
         return response.results[0].alternatives[0].transcript
-    else:
-        return "Could not transcribe the audio."
-# Function to convert text to speech
-def generate_speech(text, language_code="ta-IN"):
-    input_text = texttospeech.SynthesisInput(text=text)
     voice = texttospeech.VoiceSelectionParams(
-        language_code=language_code,
-        ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
     )
     audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
     response = tts_client.synthesize_speech(
-        input=input_text, voice=voice, audio_config=audio_config
     )
     return response.audio_content
-# Main function to handle user interaction
-def voice_assistant(audio_file):
-    # Step 1: Transcribe audio to text
-    user_input = transcribe_speech(audio_file)
-    if not user_input:
-        return "Could not process your query. Please try again.", None
-    # Step 2: Detect language
-    language_code = detect_language(user_input)
-    # Step 3: Process the query using NLP
-    response_text = process_query(user_input, language_code)
-    # Step 4: Generate speech response
-    speech_output = generate_speech(response_text, language_code)
-    return response_text, speech_output
-# Gradio interface
-def app_interface(audio_file):
-    text_response, audio_response = voice_assistant(audio_file)
-    return text_response, (audio_response, "output.mp3")
-# Gradio UI
-app = gr.Interface(
-    fn=app_interface,
-    inputs=gr.Audio(source="microphone", type="filepath"),
-    outputs=[
-        gr.Textbox(label="Bot Response"),
-        gr.Audio(label="Bot Voice Response")
-    ],
-    title="Multilingual AI Voice Assistant",
-    description="Speak to the bot in your preferred regional language (Tamil, Hindi, Malayalam, Telugu, Kannada)."
 )
 if __name__ == "__main__":
-    app.launch()

 import os
 import gradio as gr
+from google.cloud import speech
+from google.cloud import texttospeech
 from nlp import process_query
+import warnings
+# Suppress FutureWarning from huggingface_hub
+warnings.filterwarnings("ignore", category=FutureWarning)
+# Google credentials setup
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials/credentials.json"
+# Initialize Google Cloud services
+speech_client = speech.SpeechClient()
 tts_client = texttospeech.TextToSpeechClient()
+# Function for speech-to-text conversion
+def speech_to_text(audio):
+    audio = speech.RecognitionAudio(content=audio)
     config = speech.RecognitionConfig(
         encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
+        sample_rate_hertz=16000,
+        language_code="en-US",  # You can set it dynamically based on user input
     )
     response = speech_client.recognize(config=config, audio=audio)
+    # Extract the first result
     if response.results:
         return response.results[0].alternatives[0].transcript
+    return ""
+# Function for text-to-speech conversion
+def text_to_speech(text, language_code="en-US"):
+    synthesis_input = texttospeech.SynthesisInput(text=text)
     voice = texttospeech.VoiceSelectionParams(
+        language_code=language_code, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
     )
     audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
     response = tts_client.synthesize_speech(
+        input=synthesis_input, voice=voice, audio_config=audio_config
     )
     return response.audio_content
+# Function for processing user query
+def handle_query(audio):
+    # Convert speech to text
+    query_text = speech_to_text(audio)
+    # Check if the query is in regional language and process accordingly
+    language = "en"  # Default language
+    if query_text:
+        # You can extend language detection if needed here.
+        response_text = process_query(query_text, language)
+        # Return the response as text and convert it to speech
+        audio_response = text_to_speech(response_text, language_code="en-US")
+        return response_text, audio_response
+    return "Sorry, I didn't catch that. Could you please repeat?", None
+# Gradio Interface
+iface = gr.Interface(
+    fn=handle_query,
+    inputs=gr.Audio(source="microphone", type="bytes"),
+    outputs=[gr.Textbox(), gr.Audio()],
+    live=True,
+    title="Ayurveda AI Voice Assistant",
+    description="A voice assistant to help with Ayurvedic queries in multiple languages.",
 )
+# Launch the interface
 if __name__ == "__main__":
+    iface.launch()