|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
import os |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
client = None |
|
try: |
|
client = InferenceClient( |
|
"HuggingFaceH4/zephyr-7b-beta", |
|
|
|
) |
|
print("InferenceClient initialized successfully.") |
|
except Exception as e: |
|
print(f"Error initializing InferenceClient: {e}") |
|
print("Please ensure the model identifier is correct and you have necessary permissions/token.") |
|
|
|
|
|
|
|
|
|
def respond( |
|
message: str, |
|
history: list[tuple[str, str]], |
|
system_message: str = "You are a friendly Chatbot.", |
|
max_tokens: int = 512, |
|
temperature: float = 0.7, |
|
top_p: float = 0.95, |
|
): |
|
""" |
|
Chat response function for the Gradio interface. |
|
""" |
|
|
|
if client is None: |
|
yield "Error: InferenceClient could not be initialized. Please check server logs." |
|
return |
|
|
|
|
|
if not message: |
|
yield "Error: Please enter a message." |
|
return |
|
if not system_message: |
|
system_message = "You are a helpful assistant." |
|
|
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
for user_msg, assistant_msg in history: |
|
if user_msg: |
|
messages.append({"role": "user", "content": user_msg}) |
|
if assistant_msg: |
|
messages.append({"role": "assistant", "content": assistant_msg}) |
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
response_text = "" |
|
|
|
try: |
|
|
|
for message_chunk in client.chat_completion( |
|
messages=messages, |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
): |
|
|
|
token = message_chunk.choices[0].delta.content |
|
|
|
|
|
if token is not None: |
|
response_text += token |
|
yield response_text |
|
|
|
except Exception as e: |
|
print(f"Error during API call: {e}") |
|
|
|
yield f"An error occurred while generating the response: {e}" |
|
|
|
|
|
|
|
demo = gr.ChatInterface( |
|
respond, |
|
chatbot=gr.Chatbot( |
|
height=500, |
|
label="Zephyr 7B Beta", |
|
show_label=True, |
|
bubble_full_width=False, |
|
), |
|
title="🤖 Zephyr 7B Beta Chat", |
|
description="Chat with the Zephyr 7B Beta model using the Hugging Face Inference API. \nEnter your message and adjust settings below.", |
|
examples=[ |
|
["Hello, how are you today?"], |
|
["What is the capital of France?"], |
|
["Explain the concept of large language models in simple terms."], |
|
["Write a short poem about the rain."] |
|
], |
|
cache_examples=False, |
|
additional_inputs=[ |
|
gr.Textbox( |
|
value="You are a friendly and helpful chatbot.", |
|
label="System Message", |
|
info="The instruction given to the chatbot to guide its behavior.", |
|
), |
|
gr.Slider( |
|
minimum=1, |
|
maximum=2048, |
|
value=512, |
|
step=1, |
|
label="Max New Tokens", |
|
info="Maximum number of tokens to generate." |
|
), |
|
gr.Slider( |
|
minimum=0.1, |
|
|
|
maximum=1.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Temperature", |
|
info="Controls randomness. Lower values make output more focused, higher values make it more diverse." |
|
), |
|
gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-p (nucleus sampling)", |
|
info="Considers only the most probable tokens with cumulative probability p. Helps prevent low-probability tokens." |
|
), |
|
], |
|
additional_inputs_accordion_name="⚙️ Advanced Settings" |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
demo.launch( |
|
|
|
|
|
|
|
) |