Spaces:

dschandra
/

Voicebot

Sleeping

App Files Files Community

dschandra commited on Dec 26, 2024

Commit

698d57e

verified ·

1 Parent(s): b44f72b

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -34

app.py CHANGED Viewed

@@ -1,10 +1,15 @@
 from transformers import pipeline
 from gtts import gTTS
-import gradio as gr
 import os
-# Initialize Whisper pipeline for speech-to-text
-pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3-turbo")
 # Menu for the restaurant
 menu = {
@@ -15,44 +20,83 @@ menu = {
     "Drinks": ["Mango Lassi", "Soda", "Water"]
 }
-# Function to convert text to speech
-def text_to_speech(text):
-    tts = gTTS(text, lang="en")
-    audio_file = "response.mp3"
-    tts.save(audio_file)
-    return audio_file
-# Chatbot logic
-def chatbot_conversation(audio_file):
-    # Speech-to-text using Whisper
     try:
-        transcription = pipe(audio_file)["text"]
     except Exception as e:
-        return f"Error: {e}", None
-    # Generate a response based on transcription
-    if "menu" in transcription.lower():
-        response = "Our menu categories are: " + ", ".join(menu.keys())
-    elif "order" in transcription.lower():
-        response = "What would you like to order? We have " + ", ".join(menu["Main Course"])
-    elif "thank you" in transcription.lower():
-        response = "You're welcome! Enjoy your meal!"
     else:
-        response = "I'm sorry, I didn't understand that. Could you please repeat?"
-    # Convert response to audio
-    audio_response = text_to_speech(response)
-    return response, audio_response
 # Gradio Interface
-iface = gr.Interface(
-    fn=chatbot_conversation,
-    inputs=gr.Audio(type="filepath"),
-    outputs=[gr.Textbox(label="Transcription"), gr.Audio(label="Response Audio")],
-    title="Restaurant Chatbot with Whisper ASR",
-    description="Speak to the chatbot and get a response!",
-)
 if __name__ == "__main__":
-    iface.launch()

+import gradio as gr
 from transformers import pipeline
 from gtts import gTTS
 import os
+import numpy as np
+# Initialize the speech recognition pipeline
+asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large")
+# Conversation history and context
+conversation_history = []
+context = {"last_action": None, "order": []}
 # Menu for the restaurant
 menu = {
     "Drinks": ["Mango Lassi", "Soda", "Water"]
 }
+# Text-to-Speech Function
+def speak_and_save(text, filename="response.mp3"):
+    tts = gTTS(text=text, lang='en')
+    tts.save(filename)
+    return filename
+# Process the audio file and generate response
+def process_order(audio_file_path):
+    if audio_file_path is None:
+        raise ValueError("Audio file path is None. Please provide a valid path.")
+    # Recognize speech
     try:
+        transcript = asr_pipeline(audio_file_path)["text"]
     except Exception as e:
+        return f"Error in speech recognition: {e}", None
+    # Process the recognized text
+    global context
+    user_input = transcript.lower()
+    conversation_history.append(f"Customer: {user_input}")
+    response = ""
+    if context["last_action"] is None:
+        response = "Welcome to our restaurant! How can I assist you today?"
+        context["last_action"] = "greet"
+    elif "menu" in user_input:
+        response = "Here is our menu:\n"
+        for category, items in menu.items():
+            response += f"{category}: {', '.join(items)}\n"
+        response += "What would you like to order?"
+        context["last_action"] = "show_menu"
+    elif "order" in user_input or any(item.lower() in user_input for item in sum(menu.values(), [])):
+        for category, items in menu.items():
+            for item in items:
+                if item.lower() in user_input:
+                    context["order"].append(item)
+        response = f"I have added {', '.join(context['order'])} to your order. Would you like anything else?"
+        context["last_action"] = "place_order"
+    elif "no" in user_input or "that's it" in user_input:
+        response = f"Your final order is: {', '.join(context['order'])}. Thank you for your order. Your food will arrive shortly."
+        context["last_action"] = "final_order"
+        context["order"] = []  # Reset the order
     else:
+        response = "I'm not sure what you meant. Could you clarify?"
+    conversation_history.append(f"AI: {response}")
+    audio_response_path = speak_and_save(response)
+    return response, audio_response_path
+# Save Conversation History
+def save_conversation():
+    with open("conversation_history.txt", "w") as f:
+        f.write("\n".join(conversation_history))
+    return "Conversation history saved successfully!"
 # Gradio Interface
+def create_interface():
+    return gr.Interface(
+        fn=process_order,
+        inputs=gr.Audio(type="filepath", label="Your Voice Input"),
+        outputs=[
+            gr.Textbox(label="Text Response"),
+            gr.Audio(label="Audio Response")
+        ],
+        title="Restaurant Voice Assistant",
+        description="Talk to our voice assistant to place your order or ask about the menu!",
+        live=True
+    )
 if __name__ == "__main__":
+    try:
+        app = create_interface()
+        app.launch()
+    finally:
+        save_conversation()