ayureasehealthcare commited on
Commit
60ed2af
Β·
verified Β·
1 Parent(s): 649d2a9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from google.cloud import speech, texttospeech
4
+ from nlp import process_query
5
+ from utils.language_support import detect_language, translate_text
6
+
7
+ # Set Google credentials
8
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "credentials/google_credentials.json"
9
+
10
+ # Initialize Google Speech-to-Text client
11
+ speech_client = speech.SpeechClient()
12
+
13
+ # Initialize Google Text-to-Speech client
14
+ tts_client = texttospeech.TextToSpeechClient()
15
+
16
+ # Function to transcribe speech to text
17
+ def transcribe_speech(audio_file):
18
+ with open(audio_file, "rb") as f:
19
+ audio_data = f.read()
20
+
21
+ audio = speech.RecognitionAudio(content=audio_data)
22
+ config = speech.RecognitionConfig(
23
+ encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
24
+ language_code="ta-IN", # Default to Tamil; can detect dynamically
25
+ enable_automatic_punctuation=True
26
+ )
27
+
28
+ response = speech_client.recognize(config=config, audio=audio)
29
+ if response.results:
30
+ return response.results[0].alternatives[0].transcript
31
+ else:
32
+ return "Could not transcribe the audio."
33
+
34
+ # Function to convert text to speech
35
+ def generate_speech(text, language_code="ta-IN"):
36
+ input_text = texttospeech.SynthesisInput(text=text)
37
+ voice = texttospeech.VoiceSelectionParams(
38
+ language_code=language_code,
39
+ ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
40
+ )
41
+ audio_config = texttospeech.AudioConfig(audio_encoding=texttospeech.AudioEncoding.MP3)
42
+
43
+ response = tts_client.synthesize_speech(
44
+ input=input_text, voice=voice, audio_config=audio_config
45
+ )
46
+ return response.audio_content
47
+
48
+ # Main function to handle user interaction
49
+ def voice_assistant(audio_file):
50
+ # Step 1: Transcribe audio to text
51
+ user_input = transcribe_speech(audio_file)
52
+ if not user_input:
53
+ return "Could not process your query. Please try again.", None
54
+
55
+ # Step 2: Detect language
56
+ language_code = detect_language(user_input)
57
+
58
+ # Step 3: Process the query using NLP
59
+ response_text = process_query(user_input, language_code)
60
+
61
+ # Step 4: Generate speech response
62
+ speech_output = generate_speech(response_text, language_code)
63
+
64
+ return response_text, speech_output
65
+
66
+ # Gradio interface
67
+ def app_interface(audio_file):
68
+ text_response, audio_response = voice_assistant(audio_file)
69
+ return text_response, (audio_response, "output.mp3")
70
+
71
+ # Gradio UI
72
+ app = gr.Interface(
73
+ fn=app_interface,
74
+ inputs=gr.Audio(source="microphone", type="filepath"),
75
+ outputs=[
76
+ gr.Textbox(label="Bot Response"),
77
+ gr.Audio(label="Bot Voice Response")
78
+ ],
79
+ title="Multilingual AI Voice Assistant",
80
+ description="Speak to the bot in your preferred regional language (Tamil, Hindi, Malayalam, Telugu, Kannada)."
81
+ )
82
+
83
+ if __name__ == "__main__":
84
+ app.launch()
85
+