File size: 4,519 Bytes
c67e035
 
6c40506
 
c67e035
ba6c3d0
 
c67e035
6c40506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c67e035
ba6c3d0
c67e035
ba6c3d0
 
 
 
6c40506
 
c67e035
ba6c3d0
 
 
 
 
 
 
 
 
 
6c40506
 
c67e035
ba6c3d0
 
 
 
 
 
 
 
 
 
 
 
c67e035
ba6c3d0
c67e035
 
ba6c3d0
c67e035
 
ba6c3d0
6c40506
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba6c3d0
 
 
6c40506
 
 
ba6c3d0
 
c67e035
ba6c3d0
c67e035
2178804
 
 
 
6c40506
 
c67e035
2178804
 
6c40506
 
c67e035
 
ba6c3d0
c67e035
6c40506
ba6c3d0
6c40506
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import gradio as gr
from huggingface_hub import InferenceClient
import logging
from datetime import datetime

# Initialize the InferenceClient with the model ID from Hugging Face
client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta")

# Set up logging
logging.basicConfig(
    filename='chatbot_log.log',
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
)

def log_conversation(user_message, bot_response):
    """
    Logs the conversation between the user and the AI.
    
    Args:
        user_message (str): The user's input message.
        bot_response (str): The AI's response.
    """
    logging.info(f"User: {user_message}")
    logging.info(f"Bot: {bot_response}")

def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
    stop_sequence: str,
    stream_response: bool,
):
    """
    Generates a response from the AI model based on the user's message and chat history.
    
    Args:
        message (str): The user's input message.
        history (list): A list of tuples representing the conversation history (user, assistant).
        system_message (str): A system-level message guiding the AI's behavior.
        max_tokens (int): The maximum number of tokens for the output.
        temperature (float): Sampling temperature for controlling the randomness.
        top_p (float): Top-p (nucleus sampling) for controlling diversity.
        stop_sequence (str): A custom stop sequence to end the response generation.
        stream_response (bool): Whether to stream the response or return it as a whole.

    Yields:
        str: The AI's response as it is generated.
    """
    
    # Prepare the conversation history for the API call
    messages = [{"role": "system", "content": system_message}]
    
    for user_input, assistant_response in history:
        if user_input:
            messages.append({"role": "user", "content": user_input})
        if assistant_response:
            messages.append({"role": "assistant", "content": assistant_response})

    # Add the latest user message to the conversation
    messages.append({"role": "user", "content": message})

    # Initialize an empty response
    response = ""

    try:
        if stream_response:
            # Generate a response from the model with streaming
            for message in client.chat_completion(
                messages=messages,
                max_tokens=max_tokens,
                stream=True,
                temperature=temperature,
                top_p=top_p,
                stop=stop_sequence,
            ):
                token = message.choices[0].delta.get("content", "")
                response += token
                yield response
        else:
            # Generate a complete response without streaming
            result = client.chat_completion(
                messages=messages,
                max_tokens=max_tokens,
                stream=False,
                temperature=temperature,
                top_p=top_p,
                stop=stop_sequence,
            )
            response = result.choices[0].message.get("content", "")
            log_conversation(message, response)
            yield response

    except Exception as e:
        error_message = f"An error occurred: {str(e)}"
        logging.error(error_message)
        yield error_message

# Define the ChatInterface with additional input components for user customization
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System Message", lines=2),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max New Tokens"),
        gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Nucleus Sampling)"),
        gr.Textbox(value="", label="Stop Sequence (optional)", lines=1),
        gr.Checkbox(label="Stream Response", value=True),
    ],
    title="AI Chatbot Interface",
    description="Interact with an AI chatbot powered by Hugging Face's Zephyr-7B model. Customize the chatbot's behavior and response generation settings.",
    theme="default",
    allow_flagging="never",
)

# Launch the Gradio interface
if __name__ == "__main__":
    logging.info("Launching the Gradio interface...")
    demo.launch()
    logging.info("Gradio interface launched successfully.")