Spaces:
Runtime error
Runtime error
import gradio as gr | |
import os | |
import torch | |
from dotenv import load_dotenv | |
from datasets import load_dataset | |
from peft import AutoPeftModelForCausalLM | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
load_dotenv() | |
def format_instruction(report): | |
return """### Instruction: | |
Classify the student into Placed/NotPlaced based on his/her college report details. The report includes marks scored by the student in various courses and extra curricular activities taken by them. | |
### Report: | |
{report} | |
### Label: | |
""" | |
def postprocess(outputs, tokenizer, prompt): | |
outputs = outputs.numpy() | |
outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
output = outputs[0][len(prompt):] | |
return output | |
def run_model(report): | |
# load dataset and select a random sample | |
prompt = format_instruction(report) | |
# load base LLM model, LoRA params and tokenizer | |
model = AutoPeftModelForCausalLM.from_pretrained( | |
os.getenv('Model_Repo_ID'), | |
low_cpu_mem_usage=True, | |
torch_dtype=torch.float16, | |
load_in_4bit=True, | |
) | |
tokenizer = AutoTokenizer.from_pretrained(os.getenv('Model_Repo_ID')) | |
input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cpu() | |
# inference | |
with torch.inference_mode(): | |
outputs = model.generate( | |
input_ids=input_ids, | |
max_new_tokens=800, | |
do_sample=True, | |
top_p=0.9, | |
temperature=0.9 | |
) | |
return postprocess(outputs, tokenizer, report) | |
iface = gr.Interface(fn=run_model, inputs="text", outputs="text") | |
iface.launch() |