File size: 5,060 Bytes
9e62d7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# Import necessary libraries
import whisper
import os
from gtts import gTTS
import gradio as gr
from groq import Groq
import time

# Load Whisper tiny model for faster transcription
model = whisper.load_model("tiny")

# Set up Groq API client (ensure GROQ_API_KEY is set in your environment)
GROQ_API_KEY = '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'
client = Groq(api_key=GROQ_API_KEY)

# Function to get the LLM response from Groq with error handling and timing
def get_llm_response(user_input):
    try:
        start_time = time.time()  # Start time to track API delay
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": user_input}],
            model="llama3-8b-8192",  # Replace with your desired model
        )
        response_time = time.time() - start_time  # Calculate response time

        # If it takes too long, return a warning
        if response_time > 10:  # You can adjust the timeout threshold
            return "The response took too long, please try again."

        return chat_completion.choices[0].message.content

    except Exception as e:
        return f"Error in LLM response: {str(e)}"

# Function to convert text to speech using gTTS
def text_to_speech(text, output_audio="output_audio.mp3"):
    try:
        tts = gTTS(text)
        tts.save(output_audio)
        return output_audio
    except Exception as e:
        return f"Error in Text-to-Speech: {str(e)}"

# Function for Text to Voice
def text_to_voice(user_text, voice="en"):
    output_audio = text_to_speech(user_text)
    return output_audio  # Return only audio response

# Main chatbot function to handle audio or text input and output
def chatbot(audio=None, user_text=None, voice="en"):
    try:
        # Step 1: If audio is provided, transcribe the audio using Whisper
        if audio:
            result = model.transcribe(audio)
            user_text = result["text"]

            # Check if transcription is empty
            if not user_text.strip():
                return "No transcription found. Please try again.", None

        # Step 2: Get LLM response from Groq
        response_text = get_llm_response(user_text)

        # Step 3: Convert the response text to speech
        if response_text.startswith("Error"):
            return response_text, None

        output_audio = text_to_speech(response_text)

        if output_audio.startswith("Error"):
            return output_audio, None

        return response_text, output_audio

    except Exception as e:
        return f"Error in chatbot processing: {str(e)}", None

# Define the About app section
def about_app():
    about_text = """
    **Voicesy AI** is a real-time chatbot and voice conversion app built by Hamaad Ayub Khan.
    It uses advanced AI models for transcription and language processing. This app allows users 
    to interact through both voice and text, converting text to speech and providing quick, 
    intelligent responses.
    **Disclaimer**: While the AI is powerful, it may make mistakes, and users should double-check critical information.
    """
    return about_text

# Gradio interface for real-time interaction with voice selection
with gr.Blocks(css="style.css") as iface:  # Include the CSS file here
    gr.Markdown("# Voicesy AI")

    # Tab for Voice to Voice
    with gr.Tab("Voice to Voice"):
        audio_input = gr.Audio(type="filepath", label="Input Audio (optional)")  # Input from mic or file
        text_input = gr.Textbox(placeholder="Type your message here...", label="Input Text (optional)")
        voice_selection = gr.Dropdown(choices=["en", "en-uk", "en-au", "fr", "de", "es"], label="Select Voice", value="en")  # Voice selection
        
        output_text = gr.Textbox(label="AI Response")
        output_audio = gr.Audio(type="filepath", label="AI Audio Response")
        
        # Button for Voice to Voice
        voice_to_voice_button = gr.Button("Voice to Voice")
        
        # Define button actions
        voice_to_voice_button.click(chatbot, inputs=[audio_input, text_input, voice_selection], outputs=[output_text, output_audio])

    # Tab for Text to Speech
    with gr.Tab("Text to Speech"):
        text_input = gr.Textbox(placeholder="Type your message here...", label="Input Text")
        voice_selection = gr.Dropdown(choices=["en", "en-uk", "en-au", "fr", "de", "es"], label="Select Voice", value="en")
        output_audio = gr.Audio(type="filepath", label="AI Audio Response")

        # Button to convert text to speech
        convert_button = gr.Button("Convert to Speech")
        convert_button.click(text_to_voice, inputs=[text_input, voice_selection], outputs=[output_audio])  

    # Tab for About App
    with gr.Tab("About App"):
        about = gr.Markdown(about_app())

    # Set up the footer
    gr.Markdown("Voicesy AI | [Instagram](https://instagram.com/hamaadayubkhan) | [GitHub](https://github.com/hakgs1234) | [LinkedIn](https://www.linkedin.com/in/hamaadayubkhan)")

# Launch the Gradio app
iface.launch()