import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") with gr.Blocks() as demo: logo = gr.Image(value="(link unavailable)", label="Logo") input_box = gr.Textbox(label="Type your message...") chat_history = gr.Markdown(label="Chat History") chatbot = gr.Chatbot() def respond(message): # Set default values for sliders and system message system_message = "You are a friendly Chatbot." max_tokens = 512 temperature = 0.7 top_p = 0.95 # Generate response from chatbot messages = [{"role": "system", "content": system_message}] messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion(messages, max_tokens=2048, stream=True, temperature=0.2, top_p=.05): token = message.choices[0].delta.content response += token # Update chat history chat_history_value = [ ("User", "Hello!"), ("Assistant", "Hello! How can I help you today?"), ("User", "I need help with..."), ("Assistant", "I'd be happy to help!"), ] chat_history.update(value=chat_history_value) chat_history_value.append((message, response)) chat_history.update(value=chat_history_value) return chat_history input_box.submit(respond, inputs=input_box, outputs=chat_history) demo.launch()