import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline from peft import PeftModel # ✅ Model and Tokenizer Loading model_name = "microsoft/phi-2" #device_map = {"": 0} # Load base model base_model = AutoModelForCausalLM.from_pretrained( model_name, low_cpu_mem_usage=True, return_dict=True, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto", ) # Load fine-tuned LoRA weights fine_tuned_model_path = "piyushgrover/phi2-qlora-adapter-s18erav3" model = PeftModel.from_pretrained(base_model, fine_tuned_model_path) model = model.merge_and_unload() # Merge LoRA weights # ✅ Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" # ✅ Set up text generation pipeline generator = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=500, truncation=True) def chat(user_input, history=[]): """Generates a response from the fine-tuned Phi-2 model with conversation memory.""" ''' # Format conversation history formatted_history = "" for usr, bot in history: formatted_history += f"\n\n### User:\n{usr}\n\n### Assistant:\n{bot}" # Append the latest user message prompt = f"{formatted_history}\n\n### User:\n{user_input}\n\n### Assistant:\n" # Generate response response = generator(prompt, max_length=128, do_sample=True, truncation=True) answer = response[0]["generated_text"].split("### Assistant:\n")[-1].strip() # Append new response to history #history.append((user_input, answer)) return answer ''' prompt = f"\n\n### User:\n{user_input}\n\n### Assistant:\n" response = generator(prompt, max_length=128, do_sample=True, truncation=True) answer = response[0]["generated_text"].split("### Assistant:\n")[-1].strip() # Append new response to history # history.append((user_input, answer)) return answer # ✅ Create Gradio Chat Interface chatbot = gr.ChatInterface( fn=chat, title="Fine-Tuned Phi-2 Conversational Chat Assistant", description="🚀 Chat with a fine-tuned Phi-2 model. It remembers the conversation!", theme="compact", ) # ✅ Launch App if __name__ == "__main__": chatbot.launch(debug=True)