import gradio as gr import os import torch from dotenv import load_dotenv from datasets import load_dataset from peft import AutoPeftModelForCausalLM from transformers import AutoTokenizer, AutoModelForCausalLM load_dotenv() def format_instruction(report): return """### Instruction: Classify the student into Placed/NotPlaced based on his/her college report details. The report includes marks scored by the student in various courses and extra curricular activities taken by them. ### Report: {report} ### Label: """ def postprocess(outputs, tokenizer, prompt): outputs = outputs.numpy() outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True) output = outputs[0][len(prompt):] return output def run_model(report): # load dataset and select a random sample prompt = format_instruction(report) # load base LLM model, LoRA params and tokenizer model = AutoPeftModelForCausalLM.from_pretrained( os.getenv('Model_Repo_ID'), low_cpu_mem_usage=True, torch_dtype=torch.float16, load_in_4bit=True, ) tokenizer = AutoTokenizer.from_pretrained(os.getenv('Model_Repo_ID')) input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cpu() # inference with torch.inference_mode(): outputs = model.generate( input_ids=input_ids, max_new_tokens=800, do_sample=True, top_p=0.9, temperature=0.9 ) return postprocess(outputs, tokenizer, report) iface = gr.Interface(fn=run_model, inputs="text", outputs="text") iface.launch()