Spaces:
Runtime error
Runtime error
File size: 5,834 Bytes
c67e035 66980c9 c67e035 e7f3548 aea68a1 09fa947 aea68a1 ba6c3d0 66980c9 09fa947 e7f3548 09fa947 66980c9 09fa947 66980c9 e7f3548 09fa947 e7f3548 09fa947 e7f3548 09fa947 aea68a1 09fa947 e7f3548 09fa947 ba6c3d0 09fa947 e7f3548 7476b7e e7f3548 09fa947 ba6c3d0 e7f3548 a6f79a9 66980c9 e18c985 66980c9 e18c985 66980c9 e18c985 7476b7e 66980c9 e84e0fa e18c985 66980c9 e84e0fa 66980c9 e84e0fa 66980c9 e84e0fa e18c985 66980c9 e84e0fa e18c985 c67e035 e18c985 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
import gradio as gr
from huggingface_hub import InferenceClient
# Define available models and their Hugging Face IDs
available_models = {
"Zephyr 7B Beta": "HuggingFaceH4/zephyr-7b-beta",
"Llama 2 70B Chat": "meta-llama/Llama-2-70b-chat",
# Add more models here as needed
}
def respond(
message: str,
history: list[tuple[str, str]],
system_message: str,
max_tokens: int,
temperature: float,
top_p: float,
model_name: str,
):
"""
Generates a response from the AI model based on the user's message and chat history.
Args:
message (str): The user's input message.
history (list): A list of tuples representing the conversation history (user, assistant).
system_message (str): A system-level message guiding the AI's behavior.
max_tokens (int): The maximum number of tokens for the output.
temperature (float): Sampling temperature for controlling the randomness.
top_p (float): Top-p (nucleus sampling) for controlling diversity.
model_name (str): The name of the model to use.
Yields:
str: The AI's response as it is generated.
"""
# Initialize the InferenceClient with the selected model
client = InferenceClient(model=available_models[model_name])
# Prepare the conversation history for the API call
messages = [{"role": "system", "content": system_message}]
for user_input, assistant_response in history:
messages.append({"role": "user", "content": user_input})
messages.append({"role": "assistant", "content": assistant_response})
# Add the latest user message to the conversation
messages.append({"role": "user", "content": message})
# Initialize an empty response
streamed_response = ""
try:
# Generate a response from the model with streaming
for response in client.chat_completion(
messages=messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
chunk = response.choices[0].delta.get("content", "")
streamed_response += chunk
yield streamed_response
except Exception as e:
yield f"**Error:** {str(e)}"
def show_updates_and_respond(history, system_message, max_tokens, temperature, top_p, model_name):
"""
Shows the latest updates and then generates a response from the model based on the updates.
"""
history.append(("User: ", "Show me the latest updates"))
yield from respond(
message="Show me the latest updates",
history=history,
system_message=system_message,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
model_name=model_name,
)
history[-1] = ("User: ", "Show me the latest updates")
history.append(("Assistant:", latest_updates))
yield from respond(
message="What are the latest updates?",
history=history,
system_message=system_message,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
model_name=model_name,
)
# Latest updates (you can replace this with actual update information)
latest_updates = """
**Chatbot - Latest Updates:**
* **Multiple Model Support:** You can now choose from different models like Zephyr 7B and Llama 2.
* **Improved Error Handling:** The chatbot now provides clearer error messages if something goes wrong.
* **Enhanced System Message Input:** You can now provide multi-line system messages to guide the AI's behavior.
* **Optimized Temperature Range:** The temperature slider's range has been adjusted for better control over randomness.
* **Robust Chunk Handling:** The chatbot now handles streamed responses more reliably, even if some chunks are missing content.
"""
# Define the Gradio interface with the Blocks context
with gr.Blocks(css=".gradio-container {border: none;}") as demo:
chat_history = gr.State([]) # Initialize an empty chat history state
chat_interface = gr.ChatInterface(
fn=respond,
additional_inputs=[
gr.Textbox(
value="You are a friendly and helpful assistant.",
label="System message",
lines=2
),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
gr.Dropdown(
choices=list(available_models.keys()),
value="Zephyr 7B Beta",
label="Select Model",
),
],
title="Multi-Model Chatbot",
description="A customizable chatbot interface using Hugging Face's Inference API.",
chat_history=chat_history, # Pass the state to the ChatInterface
)
# Add the "Show Updates" button and output area
with gr.Row():
updates_button = gr.Button("Show Latest Updates")
# Define the button's click event (now inside the Blocks context)
updates_button.click(
fn=show_updates_and_respond,
inputs=[chat_history, chat_interface.textbox, gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), chat_interface.dropdown],
outputs=chat_history
)
# Launch the Gradio interface in full screen
if __name__ == "__main__":
demo.launch(share=True, fullscreen=True) |