Spaces:
Running
Running
# Import necessary libraries | |
import whisper | |
import os | |
from gtts import gTTS | |
import gradio as gr | |
from groq import Groq | |
import time | |
# Load Whisper tiny model for faster transcription | |
model = whisper.load_model("tiny") | |
# Set up Groq API client (ensure GROQ_API_KEY is set in your environment) | |
GROQ_API_KEY = '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' | |
client = Groq(api_key=GROQ_API_KEY) | |
# Function to get the LLM response from Groq with error handling and timing | |
def get_llm_response(user_input): | |
try: | |
start_time = time.time() # Start time to track API delay | |
chat_completion = client.chat.completions.create( | |
messages=[{"role": "user", "content": user_input}], | |
model="llama3-8b-8192", # Replace with your desired model | |
) | |
response_time = time.time() - start_time # Calculate response time | |
# If it takes too long, return a warning | |
if response_time > 10: # You can adjust the timeout threshold | |
return "The response took too long, please try again." | |
return chat_completion.choices[0].message.content | |
except Exception as e: | |
return f"Error in LLM response: {str(e)}" | |
# Function to convert text to speech using gTTS | |
def text_to_speech(text, output_audio="output_audio.mp3"): | |
try: | |
tts = gTTS(text) | |
tts.save(output_audio) | |
return output_audio | |
except Exception as e: | |
return f"Error in Text-to-Speech: {str(e)}" | |
# Function for Text to Voice | |
def text_to_voice(user_text, voice="en"): | |
output_audio = text_to_speech(user_text) | |
return output_audio # Return only audio response | |
# Main chatbot function to handle audio or text input and output | |
def chatbot(audio=None, user_text=None, voice="en"): | |
try: | |
# Step 1: If audio is provided, transcribe the audio using Whisper | |
if audio: | |
result = model.transcribe(audio) | |
user_text = result["text"] | |
# Check if transcription is empty | |
if not user_text.strip(): | |
return "No transcription found. Please try again.", None | |
# Step 2: Get LLM response from Groq | |
response_text = get_llm_response(user_text) | |
# Step 3: Convert the response text to speech | |
if response_text.startswith("Error"): | |
return response_text, None | |
output_audio = text_to_speech(response_text) | |
if output_audio.startswith("Error"): | |
return output_audio, None | |
return response_text, output_audio | |
except Exception as e: | |
return f"Error in chatbot processing: {str(e)}", None | |
# Define the About app section | |
def about_app(): | |
about_text = """ | |
**Voicesy AI** is a real-time chatbot and voice conversion app built by Hamaad Ayub Khan. | |
It uses advanced AI models for transcription and language processing. This app allows users | |
to interact through both voice and text, converting text to speech and providing quick, | |
intelligent responses. | |
**Disclaimer**: While the AI is powerful, it may make mistakes, and users should double-check critical information. | |
""" | |
return about_text | |
# Gradio interface for real-time interaction with voice selection | |
with gr.Blocks(css="style.css") as iface: # Include the CSS file here | |
gr.Markdown("# Voicesy AI") | |
# Tab for Voice to Voice | |
with gr.Tab("Voice to Voice"): | |
audio_input = gr.Audio(type="filepath", label="Input Audio (optional)") # Input from mic or file | |
text_input = gr.Textbox(placeholder="Type your message here...", label="Input Text (optional)") | |
voice_selection = gr.Dropdown(choices=["en", "en-uk", "en-au", "fr", "de", "es"], label="Select Voice", value="en") # Voice selection | |
output_text = gr.Textbox(label="AI Response") | |
output_audio = gr.Audio(type="filepath", label="AI Audio Response") | |
# Button for Voice to Voice | |
voice_to_voice_button = gr.Button("Voice to Voice") | |
# Define button actions | |
voice_to_voice_button.click(chatbot, inputs=[audio_input, text_input, voice_selection], outputs=[output_text, output_audio]) | |
# Tab for Text to Speech | |
with gr.Tab("Text to Speech"): | |
text_input = gr.Textbox(placeholder="Type your message here...", label="Input Text") | |
voice_selection = gr.Dropdown(choices=["en", "en-uk", "en-au", "fr", "de", "es"], label="Select Voice", value="en") | |
output_audio = gr.Audio(type="filepath", label="AI Audio Response") | |
# Button to convert text to speech | |
convert_button = gr.Button("Convert to Speech") | |
convert_button.click(text_to_voice, inputs=[text_input, voice_selection], outputs=[output_audio]) | |
# Tab for About App | |
with gr.Tab("About App"): | |
about = gr.Markdown(about_app()) | |
# Set up the footer | |
gr.Markdown("Voicesy AI | [Instagram](https://instagram.com/hamaadayubkhan) | [GitHub](https://github.com/hakgs1234) | [LinkedIn](https://www.linkedin.com/in/hamaadayubkhan)") | |
# Launch the Gradio app | |
iface.launch() | |