File size: 5,834 Bytes
c67e035
 
 
66980c9
 
 
 
 
 
c67e035
e7f3548
aea68a1
 
09fa947
aea68a1
ba6c3d0
 
 
66980c9
09fa947
 
 
e7f3548
09fa947
 
 
 
 
 
 
66980c9
09fa947
 
 
 
66980c9
 
e7f3548
09fa947
 
e7f3548
09fa947
e7f3548
 
09fa947
 
 
aea68a1
09fa947
e7f3548
09fa947
ba6c3d0
09fa947
e7f3548
7476b7e
 
 
 
 
 
e7f3548
 
 
09fa947
ba6c3d0
e7f3548
a6f79a9
66980c9
 
e18c985
 
 
 
 
 
 
 
 
 
 
66980c9
e18c985
 
 
 
 
 
 
 
 
 
66980c9
e18c985
7476b7e
66980c9
 
 
 
 
 
 
 
 
 
 
 
 
e84e0fa
e18c985
66980c9
e84e0fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66980c9
 
 
 
 
e84e0fa
66980c9
 
 
e84e0fa
 
 
 
 
 
 
 
e18c985
66980c9
 
e84e0fa
 
e18c985
c67e035
e18c985
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import gradio as gr
from huggingface_hub import InferenceClient

# Define available models and their Hugging Face IDs
available_models = {
    "Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
    "Llama 2 70B Chat": "meta-llama/Llama-2-70b-chat", 
    # Add more models here as needed
}


def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
    model_name: str,
):
    """
    Generates a response from the AI model based on the user's message and chat history.

    Args:
        message (str): The user's input message.
        history (list): A list of tuples representing the conversation history (user, assistant).
        system_message (str): A system-level message guiding the AI's behavior.
        max_tokens (int): The maximum number of tokens for the output.
        temperature (float): Sampling temperature for controlling the randomness.
        top_p (float): Top-p (nucleus sampling) for controlling diversity.
        model_name (str): The name of the model to use.

    Yields:
        str: The AI's response as it is generated.
    """
    # Initialize the InferenceClient with the selected model
    client = InferenceClient(model=available_models[model_name])

    # Prepare the conversation history for the API call
    messages = [{"role": "system", "content": system_message}]

    for user_input, assistant_response in history:
        messages.append({"role": "user", "content": user_input})
        messages.append({"role": "assistant", "content": assistant_response})

    # Add the latest user message to the conversation
    messages.append({"role": "user", "content": message})

    # Initialize an empty response
    streamed_response = ""

    try:
        # Generate a response from the model with streaming
        for response in client.chat_completion(
            messages=messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            chunk = response.choices[0].delta.get("content", "")
            streamed_response += chunk
            yield streamed_response

    except Exception as e:
        yield f"**Error:** {str(e)}"


def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
    """
    Shows the latest updates and then generates a response from the model based on the updates. 
    """
    history.append(("User: ", "Show me the latest updates"))
    yield from respond(
        message="Show me the latest updates",
        history=history,
        system_message=system_message,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        model_name=model_name,
    )
    history[-1] = ("User: ", "Show me the latest updates")
    history.append(("Assistant:", latest_updates))
    yield from respond(
        message="What are the latest updates?",
        history=history,
        system_message=system_message,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        model_name=model_name,
    )


# Latest updates (you can replace this with actual update information)
latest_updates = """
**Chatbot - Latest Updates:**

* **Multiple Model Support:** You can now choose from different models like Zephyr 7B and Llama 2.
* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
* **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
* **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
* **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content. 
"""


# Define the Gradio interface with the Blocks context
with gr.Blocks(css=".gradio-container {border: none;}") as demo:
    chat_history = gr.State([])  # Initialize an empty chat history state
    chat_interface = gr.ChatInterface(
        fn=respond,
        additional_inputs=[
            gr.Textbox(
                value="You are a friendly and helpful assistant.",
                label="System message",
                lines=2
            ),
            gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
            gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
            gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.95,
                step=0.05,
                label="Top-p (nucleus sampling)",
            ),
            gr.Dropdown(
                choices=list(available_models.keys()),
                value="Zephyr 7B Beta",
                label="Select Model",
            ),
        ],
        title="Multi-Model Chatbot",
        description="A customizable chatbot interface using Hugging Face's Inference API.",
        chat_history=chat_history,  # Pass the state to the ChatInterface
    )

    # Add the "Show Updates" button and output area
    with gr.Row():
        updates_button = gr.Button("Show Latest Updates")

    # Define the button's click event (now inside the Blocks context)
    updates_button.click(
        fn=show_updates_and_respond,
        inputs=[chat_history, chat_interface.textbox, gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), chat_interface.dropdown], 
        outputs=chat_history
    )

# Launch the Gradio interface in full screen
if __name__ == "__main__":
    demo.launch(share=True, fullscreen=True)