hamaadayubkhan commited on
Commit
9e62d7d
1 Parent(s): 79d049f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +126 -0
app.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import necessary libraries
2
+ import whisper
3
+ import os
4
+ from gtts import gTTS
5
+ import gradio as gr
6
+ from groq import Groq
7
+ import time
8
+
9
+ # Load Whisper tiny model for faster transcription
10
+ model = whisper.load_model("tiny")
11
+
12
+ # Set up Groq API client (ensure GROQ_API_KEY is set in your environment)
13
+ GROQ_API_KEY = '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'
14
+ client = Groq(api_key=GROQ_API_KEY)
15
+
16
+ # Function to get the LLM response from Groq with error handling and timing
17
+ def get_llm_response(user_input):
18
+ try:
19
+ start_time = time.time() # Start time to track API delay
20
+ chat_completion = client.chat.completions.create(
21
+ messages=[{"role": "user", "content": user_input}],
22
+ model="llama3-8b-8192", # Replace with your desired model
23
+ )
24
+ response_time = time.time() - start_time # Calculate response time
25
+
26
+ # If it takes too long, return a warning
27
+ if response_time > 10: # You can adjust the timeout threshold
28
+ return "The response took too long, please try again."
29
+
30
+ return chat_completion.choices[0].message.content
31
+
32
+ except Exception as e:
33
+ return f"Error in LLM response: {str(e)}"
34
+
35
+ # Function to convert text to speech using gTTS
36
+ def text_to_speech(text, output_audio="output_audio.mp3"):
37
+ try:
38
+ tts = gTTS(text)
39
+ tts.save(output_audio)
40
+ return output_audio
41
+ except Exception as e:
42
+ return f"Error in Text-to-Speech: {str(e)}"
43
+
44
+ # Function for Text to Voice
45
+ def text_to_voice(user_text, voice="en"):
46
+ output_audio = text_to_speech(user_text)
47
+ return output_audio # Return only audio response
48
+
49
+ # Main chatbot function to handle audio or text input and output
50
+ def chatbot(audio=None, user_text=None, voice="en"):
51
+ try:
52
+ # Step 1: If audio is provided, transcribe the audio using Whisper
53
+ if audio:
54
+ result = model.transcribe(audio)
55
+ user_text = result["text"]
56
+
57
+ # Check if transcription is empty
58
+ if not user_text.strip():
59
+ return "No transcription found. Please try again.", None
60
+
61
+ # Step 2: Get LLM response from Groq
62
+ response_text = get_llm_response(user_text)
63
+
64
+ # Step 3: Convert the response text to speech
65
+ if response_text.startswith("Error"):
66
+ return response_text, None
67
+
68
+ output_audio = text_to_speech(response_text)
69
+
70
+ if output_audio.startswith("Error"):
71
+ return output_audio, None
72
+
73
+ return response_text, output_audio
74
+
75
+ except Exception as e:
76
+ return f"Error in chatbot processing: {str(e)}", None
77
+
78
+ # Define the About app section
79
+ def about_app():
80
+ about_text = """
81
+ **Voicesy AI** is a real-time chatbot and voice conversion app built by Hamaad Ayub Khan.
82
+ It uses advanced AI models for transcription and language processing. This app allows users
83
+ to interact through both voice and text, converting text to speech and providing quick,
84
+ intelligent responses.
85
+ **Disclaimer**: While the AI is powerful, it may make mistakes, and users should double-check critical information.
86
+ """
87
+ return about_text
88
+
89
+ # Gradio interface for real-time interaction with voice selection
90
+ with gr.Blocks(css="style.css") as iface: # Include the CSS file here
91
+ gr.Markdown("# Voicesy AI")
92
+
93
+ # Tab for Voice to Voice
94
+ with gr.Tab("Voice to Voice"):
95
+ audio_input = gr.Audio(type="filepath", label="Input Audio (optional)") # Input from mic or file
96
+ text_input = gr.Textbox(placeholder="Type your message here...", label="Input Text (optional)")
97
+ voice_selection = gr.Dropdown(choices=["en", "en-uk", "en-au", "fr", "de", "es"], label="Select Voice", value="en") # Voice selection
98
+
99
+ output_text = gr.Textbox(label="AI Response")
100
+ output_audio = gr.Audio(type="filepath", label="AI Audio Response")
101
+
102
+ # Button for Voice to Voice
103
+ voice_to_voice_button = gr.Button("Voice to Voice")
104
+
105
+ # Define button actions
106
+ voice_to_voice_button.click(chatbot, inputs=[audio_input, text_input, voice_selection], outputs=[output_text, output_audio])
107
+
108
+ # Tab for Text to Speech
109
+ with gr.Tab("Text to Speech"):
110
+ text_input = gr.Textbox(placeholder="Type your message here...", label="Input Text")
111
+ voice_selection = gr.Dropdown(choices=["en", "en-uk", "en-au", "fr", "de", "es"], label="Select Voice", value="en")
112
+ output_audio = gr.Audio(type="filepath", label="AI Audio Response")
113
+
114
+ # Button to convert text to speech
115
+ convert_button = gr.Button("Convert to Speech")
116
+ convert_button.click(text_to_voice, inputs=[text_input, voice_selection], outputs=[output_audio])
117
+
118
+ # Tab for About App
119
+ with gr.Tab("About App"):
120
+ about = gr.Markdown(about_app())
121
+
122
+ # Set up the footer
123
+ gr.Markdown("Voicesy AI | [Instagram](https://instagram.com/hamaadayubkhan) | [GitHub](https://github.com/hakgs1234) | [LinkedIn](https://www.linkedin.com/in/hamaadayubkhan)")
124
+
125
+ # Launch the Gradio app
126
+ iface.launch()