import os import gradio as gr from huggingface_hub import InferenceClient # Load the Hugging Face token from the environment variable hf_token = os.getenv("HF_TOKEN") # Initialize InferenceClient using the environment token client = InferenceClient( "microsoft/Phi-3-mini-4k-instruct", token=hf_token, ) # Function to handle chatbot response def chat_with_model(user_input, history): response = "" # Send user input to the model for message in client.chat_completion( messages=[{"role": "user", "content": user_input}], max_tokens=500, stream=True, ): response += message.choices[0].delta.content # Append user input and response to the chat history history.append((user_input, response)) return history, history # Gradio interface with gr.Blocks(theme="nevreal/blues") as ui: gr.Markdown("# Gradio Chatbot with Phi-3-mini-4k-instruct") # User input and chatbot output chatbot = gr.Chatbot() with gr.Row(): user_input = gr.Textbox(show_label=False, placeholder="Type a message...") send_button = gr.Button("Send") # Store the chat history history = gr.State([]) # Send button action send_button.click(fn=chat_with_model, inputs=[user_input, history], outputs=[chatbot, history]) # Launch the web UI ui.launch()