# Import necessary libraries import whisper import os from gtts import gTTS import gradio as gr from groq import Groq import time # Load Whisper tiny model for faster transcription model = whisper.load_model("tiny") # Set up Groq API client (ensure GROQ_API_KEY is set in your environment) GROQ_API_KEY = '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' client = Groq(api_key=GROQ_API_KEY) # Function to get the LLM response from Groq with error handling and timing def get_llm_response(user_input): try: start_time = time.time() # Start time to track API delay chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": user_input}], model="llama3-8b-8192", # Replace with your desired model ) response_time = time.time() - start_time # Calculate response time # If it takes too long, return a warning if response_time > 10: # You can adjust the timeout threshold return "The response took too long, please try again." return chat_completion.choices[0].message.content except Exception as e: return f"Error in LLM response: {str(e)}" # Function to convert text to speech using gTTS def text_to_speech(text, output_audio="output_audio.mp3"): try: tts = gTTS(text) tts.save(output_audio) return output_audio except Exception as e: return f"Error in Text-to-Speech: {str(e)}" # Function for Text to Voice def text_to_voice(user_text, voice="en"): output_audio = text_to_speech(user_text) return output_audio # Return only audio response # Main chatbot function to handle audio or text input and output def chatbot(audio=None, user_text=None, voice="en"): try: # Step 1: If audio is provided, transcribe the audio using Whisper if audio: result = model.transcribe(audio) user_text = result["text"] # Check if transcription is empty if not user_text.strip(): return "No transcription found. Please try again.", None # Step 2: Get LLM response from Groq response_text = get_llm_response(user_text) # Step 3: Convert the response text to speech if response_text.startswith("Error"): return response_text, None output_audio = text_to_speech(response_text) if output_audio.startswith("Error"): return output_audio, None return response_text, output_audio except Exception as e: return f"Error in chatbot processing: {str(e)}", None # Define the About app section def about_app(): about_text = """ **Voicesy AI** is a real-time chatbot and voice conversion app built by Hamaad Ayub Khan. It uses advanced AI models for transcription and language processing. This app allows users to interact through both voice and text, converting text to speech and providing quick, intelligent responses. **Disclaimer**: While the AI is powerful, it may make mistakes, and users should double-check critical information. """ return about_text # Gradio interface for real-time interaction with voice selection with gr.Blocks(css="style.css") as iface: # Include the CSS file here gr.Markdown("# Voicesy AI") # Add logo at the top gr.Image("", label="Voicesy AI") # Update the path to your logo image # Tab for Voice to Voice with gr.Tab("Voice to Voice"): audio_input = gr.Audio(type="filepath", label="Input Audio (optional)") # Input from mic or file text_input = gr.Textbox(placeholder="Type your message here...", label="Input your Text To Interact with LLM") voice_selection = gr.Dropdown(choices=["en", "en-uk", "en-au", "fr", "de", "es"], label="Select Voice", value="en") # Voice selection output_text = gr.Textbox(label="AI Response") output_audio = gr.Audio(type="filepath", label="AI Audio Response") # Button for Voice to Voice voice_to_voice_button = gr.Button("Voice to Voice") # Define button actions voice_to_voice_button.click(chatbot, inputs=[audio_input, text_input, voice_selection], outputs=[output_text, output_audio]) # Tab for Text to Speech with gr.Tab("Text to Speech"): text_input = gr.Textbox(placeholder="Type your message here...", label="Input Text") voice_selection = gr.Dropdown(choices=["en", "en-uk", "en-au", "fr", "de", "es"], label="Select Voice", value="en") output_audio = gr.Audio(type="filepath", label="AI Audio Response") # Button to convert text to speech convert_button = gr.Button("Convert to Speech") convert_button.click(text_to_voice, inputs=[text_input, voice_selection], outputs=[output_audio]) # Tab for About App with gr.Tab("About App"): about = gr.Markdown(about_app()) # Set up the footer gr.Markdown("Voicesy AI | [Instagram](https://instagram.com/hamaadayubkhan) | [GitHub](https://github.com/hakgs1234) | [LinkedIn](https://www.linkedin.com/in/hamaadayubkhan)") # Launch the Gradio app iface.launch()