Spaces:
Runtime error
Runtime error
Threatthriver
commited on
Commit
•
66980c9
1
Parent(s):
e18c985
Update app.py
Browse files
app.py
CHANGED
@@ -1,18 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
|
4 |
-
#
|
5 |
-
|
|
|
|
|
|
|
|
|
6 |
|
7 |
-
# Latest updates (you can replace this with actual update information)
|
8 |
-
latest_updates = """
|
9 |
-
**Zephyr 7B Beta Chatbot - Latest Updates:**
|
10 |
-
|
11 |
-
* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
|
12 |
-
* **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
|
13 |
-
* **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
|
14 |
-
* **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
|
15 |
-
"""
|
16 |
|
17 |
def respond(
|
18 |
message: str,
|
@@ -21,6 +16,7 @@ def respond(
|
|
21 |
max_tokens: int,
|
22 |
temperature: float,
|
23 |
top_p: float,
|
|
|
24 |
):
|
25 |
"""
|
26 |
Generates a response from the AI model based on the user's message and chat history.
|
@@ -32,10 +28,13 @@ def respond(
|
|
32 |
max_tokens (int): The maximum number of tokens for the output.
|
33 |
temperature (float): Sampling temperature for controlling the randomness.
|
34 |
top_p (float): Top-p (nucleus sampling) for controlling diversity.
|
|
|
35 |
|
36 |
Yields:
|
37 |
str: The AI's response as it is generated.
|
38 |
"""
|
|
|
|
|
39 |
|
40 |
# Prepare the conversation history for the API call
|
41 |
messages = [{"role": "system", "content": system_message}]
|
@@ -66,7 +65,8 @@ def respond(
|
|
66 |
except Exception as e:
|
67 |
yield f"**Error:** {str(e)}"
|
68 |
|
69 |
-
|
|
|
70 |
"""
|
71 |
Shows the latest updates and then generates a response from the model based on the updates.
|
72 |
"""
|
@@ -78,6 +78,7 @@ def show_updates_and_respond(history, system_message, max_tokens, temperature, t
|
|
78 |
max_tokens=max_tokens,
|
79 |
temperature=temperature,
|
80 |
top_p=top_p,
|
|
|
81 |
)
|
82 |
history[-1] = ("User: ", "Show me the latest updates")
|
83 |
history.append(("Assistant:", latest_updates))
|
@@ -88,10 +89,25 @@ def show_updates_and_respond(history, system_message, max_tokens, temperature, t
|
|
88 |
max_tokens=max_tokens,
|
89 |
temperature=temperature,
|
90 |
top_p=top_p,
|
|
|
91 |
)
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
# Define the Gradio interface with the Blocks context
|
94 |
with gr.Blocks(css=".gradio-container {border: none;}") as demo:
|
|
|
95 |
chat_interface = gr.ChatInterface(
|
96 |
fn=respond,
|
97 |
additional_inputs=[
|
@@ -109,9 +125,15 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
|
|
109 |
step=0.05,
|
110 |
label="Top-p (nucleus sampling)",
|
111 |
),
|
|
|
|
|
|
|
|
|
|
|
112 |
],
|
113 |
-
title="
|
114 |
-
description="A customizable chatbot interface using Hugging Face's
|
|
|
115 |
)
|
116 |
|
117 |
# Add the "Show Updates" button and output area
|
@@ -121,11 +143,10 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
|
|
121 |
# Define the button's click event (now inside the Blocks context)
|
122 |
updates_button.click(
|
123 |
fn=show_updates_and_respond,
|
124 |
-
inputs=[
|
125 |
-
outputs=
|
126 |
)
|
127 |
|
128 |
-
|
129 |
# Launch the Gradio interface in full screen
|
130 |
if __name__ == "__main__":
|
131 |
demo.launch(share=True, fullscreen=True)
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import InferenceClient
|
3 |
|
4 |
+
# Define available models and their Hugging Face IDs
|
5 |
+
available_models = {
|
6 |
+
"Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
|
7 |
+
"Llama 2 70B Chat": "meta-llama/Llama-2-70b-chat",
|
8 |
+
# Add more models here as needed
|
9 |
+
}
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def respond(
|
13 |
message: str,
|
|
|
16 |
max_tokens: int,
|
17 |
temperature: float,
|
18 |
top_p: float,
|
19 |
+
model_name: str,
|
20 |
):
|
21 |
"""
|
22 |
Generates a response from the AI model based on the user's message and chat history.
|
|
|
28 |
max_tokens (int): The maximum number of tokens for the output.
|
29 |
temperature (float): Sampling temperature for controlling the randomness.
|
30 |
top_p (float): Top-p (nucleus sampling) for controlling diversity.
|
31 |
+
model_name (str): The name of the model to use.
|
32 |
|
33 |
Yields:
|
34 |
str: The AI's response as it is generated.
|
35 |
"""
|
36 |
+
# Initialize the InferenceClient with the selected model
|
37 |
+
client = InferenceClient(model=available_models[model_name])
|
38 |
|
39 |
# Prepare the conversation history for the API call
|
40 |
messages = [{"role": "system", "content": system_message}]
|
|
|
65 |
except Exception as e:
|
66 |
yield f"**Error:** {str(e)}"
|
67 |
|
68 |
+
|
69 |
+
def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
|
70 |
"""
|
71 |
Shows the latest updates and then generates a response from the model based on the updates.
|
72 |
"""
|
|
|
78 |
max_tokens=max_tokens,
|
79 |
temperature=temperature,
|
80 |
top_p=top_p,
|
81 |
+
model_name=model_name,
|
82 |
)
|
83 |
history[-1] = ("User: ", "Show me the latest updates")
|
84 |
history.append(("Assistant:", latest_updates))
|
|
|
89 |
max_tokens=max_tokens,
|
90 |
temperature=temperature,
|
91 |
top_p=top_p,
|
92 |
+
model_name=model_name,
|
93 |
)
|
94 |
|
95 |
+
|
96 |
+
# Latest updates (you can replace this with actual update information)
|
97 |
+
latest_updates = """
|
98 |
+
**Chatbot - Latest Updates:**
|
99 |
+
|
100 |
+
* **Multiple Model Support:** You can now choose from different models like Zephyr 7B and Llama 2.
|
101 |
+
* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
|
102 |
+
* **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
|
103 |
+
* **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
|
104 |
+
* **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
|
105 |
+
"""
|
106 |
+
|
107 |
+
|
108 |
# Define the Gradio interface with the Blocks context
|
109 |
with gr.Blocks(css=".gradio-container {border: none;}") as demo:
|
110 |
+
chat_history = gr.State([]) # Initialize an empty chat history state
|
111 |
chat_interface = gr.ChatInterface(
|
112 |
fn=respond,
|
113 |
additional_inputs=[
|
|
|
125 |
step=0.05,
|
126 |
label="Top-p (nucleus sampling)",
|
127 |
),
|
128 |
+
gr.Dropdown(
|
129 |
+
choices=list(available_models.keys()),
|
130 |
+
value="Zephyr 7B Beta",
|
131 |
+
label="Select Model",
|
132 |
+
),
|
133 |
],
|
134 |
+
title="Multi-Model Chatbot",
|
135 |
+
description="A customizable chatbot interface using Hugging Face's Inference API.",
|
136 |
+
chat_history=chat_history, # Pass the state to the ChatInterface
|
137 |
)
|
138 |
|
139 |
# Add the "Show Updates" button and output area
|
|
|
143 |
# Define the button's click event (now inside the Blocks context)
|
144 |
updates_button.click(
|
145 |
fn=show_updates_and_respond,
|
146 |
+
inputs=[chat_history, chat_interface.textbox, gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), chat_interface.dropdown],
|
147 |
+
outputs=chat_history
|
148 |
)
|
149 |
|
|
|
150 |
# Launch the Gradio interface in full screen
|
151 |
if __name__ == "__main__":
|
152 |
demo.launch(share=True, fullscreen=True)
|