import gradio as gr from huggingface_hub import InferenceClient # Replace this with the name of your merged model on Hugging Face MERGED_MODEL_REPO = "Grandediw/lora-model_finetuned" # Initialize the Inference Client with your merged model client = InferenceClient(MERGED_MODEL_REPO) # The guide uses a mini_chatbot function that accepts `message` and `history`. # We'll follow that pattern. We'll also introduce a system message for role context if needed. def mini_chatbot(message, history): """ This function simulates a conversation with the model. It takes the latest user message and the full conversation history, builds a prompt, and returns the model's response. """ # We can set a system message to define the assistant's behavior. system_message = "You are a helpful and friendly assistant." # Build the conversation prompt: # history is a list of (user_message, assistant_message) tuples. # We'll format it similarly to how we did before: prompt = system_message.strip() + "\n\n" for user_msg, assistant_msg in history: if user_msg: prompt += f"User: {user_msg}\n" if assistant_msg: prompt += f"Assistant: {assistant_msg}\n" prompt += f"User: {message}\nAssistant:" # Set generation parameters (you can adjust as needed or add sliders) max_tokens = 200 temperature = 0.7 top_p = 0.9 # Use text_generation to get the response response = "" for partial in client.text_generation( prompt=prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=True ): token = partial.token.text response += token return response # Create a Gradio ChatInterface similar to the guide: # Gradio will handle the `history` automatically, and pass (message, history) to mini_chatbot. demo_chatbot = gr.ChatInterface( fn=mini_chatbot, title="My Chatbot", description="Enter text to start chatting with the merged LoRA model." ) if __name__ == "__main__": demo_chatbot.launch()