ayureasehealthcare commited on
Commit
93ca6c9
Β·
verified Β·
1 Parent(s): aa02ad6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -57
app.py CHANGED
@@ -1,85 +1,76 @@
1
  import os
2
  import gradio as gr
3
- from google.cloud import speech, texttospeech
 
4
  from nlp import process_query
5
- from utils.language_support import detect_language, translate_text
6
 
7
- # Set Google credentials
8
- os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials/google_credentials.json"
9
 
10
- # Initialize Google Speech-to-Text client
11
- speech_client = speech.SpeechClient()
12
 
13
- # Initialize Google Text-to-Speech client
 
14
  tts_client = texttospeech.TextToSpeechClient()
15
 
16
- # Function to transcribe speech to text
17
- def transcribe_speech(audio_file):
18
- with open(audio_file, "rb") as f:
19
- audio_data = f.read()
20
-
21
- audio = speech.RecognitionAudio(content=audio_data)
22
  config = speech.RecognitionConfig(
23
  encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
24
- language_code="ta-IN", # Default to Tamil; can detect dynamically
25
- enable_automatic_punctuation=True
26
  )
27
 
28
  response = speech_client.recognize(config=config, audio=audio)
 
 
29
  if response.results:
30
  return response.results[0].alternatives[0].transcript
31
- else:
32
- return "Could not transcribe the audio."
33
 
34
- # Function to convert text to speech
35
- def generate_speech(text, language_code="ta-IN"):
36
- input_text = texttospeech.SynthesisInput(text=text)
37
  voice = texttospeech.VoiceSelectionParams(
38
- language_code=language_code,
39
- ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
40
  )
41
  audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
42
 
43
  response = tts_client.synthesize_speech(
44
- input=input_text, voice=voice, audio_config=audio_config
45
  )
46
  return response.audio_content
47
 
48
- # Main function to handle user interaction
49
- def voice_assistant(audio_file):
50
- # Step 1: Transcribe audio to text
51
- user_input = transcribe_speech(audio_file)
52
- if not user_input:
53
- return "Could not process your query. Please try again.", None
54
-
55
- # Step 2: Detect language
56
- language_code = detect_language(user_input)
57
-
58
- # Step 3: Process the query using NLP
59
- response_text = process_query(user_input, language_code)
60
-
61
- # Step 4: Generate speech response
62
- speech_output = generate_speech(response_text, language_code)
63
-
64
- return response_text, speech_output
65
-
66
- # Gradio interface
67
- def app_interface(audio_file):
68
- text_response, audio_response = voice_assistant(audio_file)
69
- return text_response, (audio_response, "output.mp3")
70
-
71
- # Gradio UI
72
- app = gr.Interface(
73
- fn=app_interface,
74
- inputs=gr.Audio(source="microphone", type="filepath"),
75
- outputs=[
76
- gr.Textbox(label="Bot Response"),
77
- gr.Audio(label="Bot Voice Response")
78
- ],
79
- title="Multilingual AI Voice Assistant",
80
- description="Speak to the bot in your preferred regional language (Tamil, Hindi, Malayalam, Telugu, Kannada)."
81
  )
82
 
 
83
  if __name__ == "__main__":
84
- app.launch()
85
 
 
1
  import os
2
  import gradio as gr
3
+ from google.cloud import speech
4
+ from google.cloud import texttospeech
5
  from nlp import process_query
6
+ import warnings
7
 
8
+ # Suppress FutureWarning from huggingface_hub
9
+ warnings.filterwarnings("ignore", category=FutureWarning)
10
 
11
+ # Google credentials setup
12
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials/credentials.json"
13
 
14
+ # Initialize Google Cloud services
15
+ speech_client = speech.SpeechClient()
16
  tts_client = texttospeech.TextToSpeechClient()
17
 
18
+ # Function for speech-to-text conversion
19
+ def speech_to_text(audio):
20
+ audio = speech.RecognitionAudio(content=audio)
 
 
 
21
  config = speech.RecognitionConfig(
22
  encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
23
+ sample_rate_hertz=16000,
24
+ language_code="en-US", # You can set it dynamically based on user input
25
  )
26
 
27
  response = speech_client.recognize(config=config, audio=audio)
28
+
29
+ # Extract the first result
30
  if response.results:
31
  return response.results[0].alternatives[0].transcript
32
+ return ""
 
33
 
34
+ # Function for text-to-speech conversion
35
+ def text_to_speech(text, language_code="en-US"):
36
+ synthesis_input = texttospeech.SynthesisInput(text=text)
37
  voice = texttospeech.VoiceSelectionParams(
38
+ language_code=language_code, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
 
39
  )
40
  audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
41
 
42
  response = tts_client.synthesize_speech(
43
+ input=synthesis_input, voice=voice, audio_config=audio_config
44
  )
45
  return response.audio_content
46
 
47
+ # Function for processing user query
48
+ def handle_query(audio):
49
+ # Convert speech to text
50
+ query_text = speech_to_text(audio)
51
+
52
+ # Check if the query is in regional language and process accordingly
53
+ language = "en" # Default language
54
+ if query_text:
55
+ # You can extend language detection if needed here.
56
+ response_text = process_query(query_text, language)
57
+
58
+ # Return the response as text and convert it to speech
59
+ audio_response = text_to_speech(response_text, language_code="en-US")
60
+ return response_text, audio_response
61
+ return "Sorry, I didn't catch that. Could you please repeat?", None
62
+
63
+ # Gradio Interface
64
+ iface = gr.Interface(
65
+ fn=handle_query,
66
+ inputs=gr.Audio(source="microphone", type="bytes"),
67
+ outputs=[gr.Textbox(), gr.Audio()],
68
+ live=True,
69
+ title="Ayurveda AI Voice Assistant",
70
+ description="A voice assistant to help with Ayurvedic queries in multiple languages.",
 
 
 
 
 
 
 
 
 
71
  )
72
 
73
+ # Launch the interface
74
  if __name__ == "__main__":
75
+ iface.launch()
76