Spaces:
Runtime error
Runtime error
Threatthriver
commited on
Commit
•
c51870d
1
Parent(s):
66980c9
Update app.py
Browse files
app.py
CHANGED
@@ -4,11 +4,9 @@ from huggingface_hub import InferenceClient
|
|
4 |
# Define available models and their Hugging Face IDs
|
5 |
available_models = {
|
6 |
"Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
|
7 |
-
"Llama 2 70B Chat": "meta-llama/Llama-2-70b-chat",
|
8 |
# Add more models here as needed
|
9 |
}
|
10 |
|
11 |
-
|
12 |
def respond(
|
13 |
message: str,
|
14 |
history: list[tuple[str, str]],
|
@@ -20,37 +18,17 @@ def respond(
|
|
20 |
):
|
21 |
"""
|
22 |
Generates a response from the AI model based on the user's message and chat history.
|
23 |
-
|
24 |
-
Args:
|
25 |
-
message (str): The user's input message.
|
26 |
-
history (list): A list of tuples representing the conversation history (user, assistant).
|
27 |
-
system_message (str): A system-level message guiding the AI's behavior.
|
28 |
-
max_tokens (int): The maximum number of tokens for the output.
|
29 |
-
temperature (float): Sampling temperature for controlling the randomness.
|
30 |
-
top_p (float): Top-p (nucleus sampling) for controlling diversity.
|
31 |
-
model_name (str): The name of the model to use.
|
32 |
-
|
33 |
-
Yields:
|
34 |
-
str: The AI's response as it is generated.
|
35 |
"""
|
36 |
-
# Initialize the InferenceClient with the selected model
|
37 |
client = InferenceClient(model=available_models[model_name])
|
38 |
|
39 |
-
# Prepare the conversation history for the API call
|
40 |
messages = [{"role": "system", "content": system_message}]
|
41 |
-
|
42 |
for user_input, assistant_response in history:
|
43 |
messages.append({"role": "user", "content": user_input})
|
44 |
messages.append({"role": "assistant", "content": assistant_response})
|
45 |
-
|
46 |
-
# Add the latest user message to the conversation
|
47 |
messages.append({"role": "user", "content": message})
|
48 |
|
49 |
-
# Initialize an empty response
|
50 |
streamed_response = ""
|
51 |
-
|
52 |
try:
|
53 |
-
# Generate a response from the model with streaming
|
54 |
for response in client.chat_completion(
|
55 |
messages=messages,
|
56 |
max_tokens=max_tokens,
|
@@ -61,15 +39,18 @@ def respond(
|
|
61 |
chunk = response.choices[0].delta.get("content", "")
|
62 |
streamed_response += chunk
|
63 |
yield streamed_response
|
64 |
-
|
65 |
except Exception as e:
|
66 |
yield f"**Error:** {str(e)}"
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
|
70 |
-
"""
|
71 |
-
Shows the latest updates and then generates a response from the model based on the updates.
|
72 |
-
"""
|
73 |
history.append(("User: ", "Show me the latest updates"))
|
74 |
yield from respond(
|
75 |
message="Show me the latest updates",
|
@@ -92,19 +73,6 @@ def show_updates_and_respond(history, system_message, max_tokens, temperature, t
|
|
92 |
model_name=model_name,
|
93 |
)
|
94 |
|
95 |
-
|
96 |
-
# Latest updates (you can replace this with actual update information)
|
97 |
-
latest_updates = """
|
98 |
-
**Chatbot - Latest Updates:**
|
99 |
-
|
100 |
-
* **Multiple Model Support:** You can now choose from different models like Zephyr 7B and Llama 2.
|
101 |
-
* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
|
102 |
-
* **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
|
103 |
-
* **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
|
104 |
-
* **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
|
105 |
-
"""
|
106 |
-
|
107 |
-
|
108 |
# Define the Gradio interface with the Blocks context
|
109 |
with gr.Blocks(css=".gradio-container {border: none;}") as demo:
|
110 |
chat_history = gr.State([]) # Initialize an empty chat history state
|
@@ -149,4 +117,4 @@ with gr.Blocks(css=".gradio-container {border: none;}") as demo:
|
|
149 |
|
150 |
# Launch the Gradio interface in full screen
|
151 |
if __name__ == "__main__":
|
152 |
-
demo.launch(share=True, fullscreen=True)
|
|
|
4 |
# Define available models and their Hugging Face IDs
|
5 |
available_models = {
|
6 |
"Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
|
|
|
7 |
# Add more models here as needed
|
8 |
}
|
9 |
|
|
|
10 |
def respond(
|
11 |
message: str,
|
12 |
history: list[tuple[str, str]],
|
|
|
18 |
):
|
19 |
"""
|
20 |
Generates a response from the AI model based on the user's message and chat history.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
"""
|
|
|
22 |
client = InferenceClient(model=available_models[model_name])
|
23 |
|
|
|
24 |
messages = [{"role": "system", "content": system_message}]
|
|
|
25 |
for user_input, assistant_response in history:
|
26 |
messages.append({"role": "user", "content": user_input})
|
27 |
messages.append({"role": "assistant", "content": assistant_response})
|
|
|
|
|
28 |
messages.append({"role": "user", "content": message})
|
29 |
|
|
|
30 |
streamed_response = ""
|
|
|
31 |
try:
|
|
|
32 |
for response in client.chat_completion(
|
33 |
messages=messages,
|
34 |
max_tokens=max_tokens,
|
|
|
39 |
chunk = response.choices[0].delta.get("content", "")
|
40 |
streamed_response += chunk
|
41 |
yield streamed_response
|
|
|
42 |
except Exception as e:
|
43 |
yield f"**Error:** {str(e)}"
|
44 |
|
45 |
+
# Latest updates
|
46 |
+
latest_updates = """
|
47 |
+
**Chatbot - Latest Updates:**
|
48 |
+
|
49 |
+
* **Multiple Model Support:** You can now choose from different models like Zephyr 7B.
|
50 |
+
* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
|
51 |
+
"""
|
52 |
|
53 |
def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
|
|
|
|
|
|
|
54 |
history.append(("User: ", "Show me the latest updates"))
|
55 |
yield from respond(
|
56 |
message="Show me the latest updates",
|
|
|
73 |
model_name=model_name,
|
74 |
)
|
75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
# Define the Gradio interface with the Blocks context
|
77 |
with gr.Blocks(css=".gradio-container {border: none;}") as demo:
|
78 |
chat_history = gr.State([]) # Initialize an empty chat history state
|
|
|
117 |
|
118 |
# Launch the Gradio interface in full screen
|
119 |
if __name__ == "__main__":
|
120 |
+
demo.launch(share=True, fullscreen=True)
|