dschandra commited on
Commit
698d57e
·
verified ·
1 Parent(s): b44f72b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -34
app.py CHANGED
@@ -1,10 +1,15 @@
 
1
  from transformers import pipeline
2
  from gtts import gTTS
3
- import gradio as gr
4
  import os
 
 
 
 
5
 
6
- # Initialize Whisper pipeline for speech-to-text
7
- pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo")
 
8
 
9
  # Menu for the restaurant
10
  menu = {
@@ -15,44 +20,83 @@ menu = {
15
  "Drinks": ["Mango Lassi", "Soda", "Water"]
16
  }
17
 
18
- # Function to convert text to speech
19
- def text_to_speech(text):
20
- tts = gTTS(text, lang="en")
21
- audio_file = "response.mp3"
22
- tts.save(audio_file)
23
- return audio_file
24
 
25
- # Chatbot logic
26
- def chatbot_conversation(audio_file):
27
- # Speech-to-text using Whisper
 
 
 
28
  try:
29
- transcription = pipe(audio_file)["text"]
30
  except Exception as e:
31
- return f"Error: {e}", None
32
-
33
- # Generate a response based on transcription
34
- if "menu" in transcription.lower():
35
- response = "Our menu categories are: " + ", ".join(menu.keys())
36
- elif "order" in transcription.lower():
37
- response = "What would you like to order? We have " + ", ".join(menu["Main Course"])
38
- elif "thank you" in transcription.lower():
39
- response = "You're welcome! Enjoy your meal!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  else:
41
- response = "I'm sorry, I didn't understand that. Could you please repeat?"
42
 
43
- # Convert response to audio
44
- audio_response = text_to_speech(response)
 
45
 
46
- return response, audio_response
 
 
 
 
47
 
48
  # Gradio Interface
49
- iface = gr.Interface(
50
- fn=chatbot_conversation,
51
- inputs=gr.Audio(type="filepath"),
52
- outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")],
53
- title="Restaurant Chatbot with Whisper ASR",
54
- description="Speak to the chatbot and get a response!",
55
- )
 
 
 
 
 
56
 
57
  if __name__ == "__main__":
58
- iface.launch()
 
 
 
 
 
1
+ import gradio as gr
2
  from transformers import pipeline
3
  from gtts import gTTS
 
4
  import os
5
+ import numpy as np
6
+
7
+ # Initialize the speech recognition pipeline
8
+ asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large")
9
 
10
+ # Conversation history and context
11
+ conversation_history = []
12
+ context = {"last_action": None, "order": []}
13
 
14
  # Menu for the restaurant
15
  menu = {
 
20
  "Drinks": ["Mango Lassi", "Soda", "Water"]
21
  }
22
 
23
+ # Text-to-Speech Function
24
+ def speak_and_save(text, filename="response.mp3"):
25
+ tts = gTTS(text=text, lang='en')
26
+ tts.save(filename)
27
+ return filename
 
28
 
29
+ # Process the audio file and generate response
30
+ def process_order(audio_file_path):
31
+ if audio_file_path is None:
32
+ raise ValueError("Audio file path is None. Please provide a valid path.")
33
+
34
+ # Recognize speech
35
  try:
36
+ transcript = asr_pipeline(audio_file_path)["text"]
37
  except Exception as e:
38
+ return f"Error in speech recognition: {e}", None
39
+
40
+ # Process the recognized text
41
+ global context
42
+ user_input = transcript.lower()
43
+ conversation_history.append(f"Customer: {user_input}")
44
+ response = ""
45
+
46
+ if context["last_action"] is None:
47
+ response = "Welcome to our restaurant! How can I assist you today?"
48
+ context["last_action"] = "greet"
49
+
50
+ elif "menu" in user_input:
51
+ response = "Here is our menu:\n"
52
+ for category, items in menu.items():
53
+ response += f"{category}: {', '.join(items)}\n"
54
+ response += "What would you like to order?"
55
+ context["last_action"] = "show_menu"
56
+
57
+ elif "order" in user_input or any(item.lower() in user_input for item in sum(menu.values(), [])):
58
+ for category, items in menu.items():
59
+ for item in items:
60
+ if item.lower() in user_input:
61
+ context["order"].append(item)
62
+ response = f"I have added {', '.join(context['order'])} to your order. Would you like anything else?"
63
+ context["last_action"] = "place_order"
64
+
65
+ elif "no" in user_input or "that's it" in user_input:
66
+ response = f"Your final order is: {', '.join(context['order'])}. Thank you for your order. Your food will arrive shortly."
67
+ context["last_action"] = "final_order"
68
+ context["order"] = [] # Reset the order
69
+
70
  else:
71
+ response = "I'm not sure what you meant. Could you clarify?"
72
 
73
+ conversation_history.append(f"AI: {response}")
74
+ audio_response_path = speak_and_save(response)
75
+ return response, audio_response_path
76
 
77
+ # Save Conversation History
78
+ def save_conversation():
79
+ with open("conversation_history.txt", "w") as f:
80
+ f.write("\n".join(conversation_history))
81
+ return "Conversation history saved successfully!"
82
 
83
  # Gradio Interface
84
+ def create_interface():
85
+ return gr.Interface(
86
+ fn=process_order,
87
+ inputs=gr.Audio(type="filepath", label="Your Voice Input"),
88
+ outputs=[
89
+ gr.Textbox(label="Text Response"),
90
+ gr.Audio(label="Audio Response")
91
+ ],
92
+ title="Restaurant Voice Assistant",
93
+ description="Talk to our voice assistant to place your order or ask about the menu!",
94
+ live=True
95
+ )
96
 
97
  if __name__ == "__main__":
98
+ try:
99
+ app = create_interface()
100
+ app.launch()
101
+ finally:
102
+ save_conversation()