Spaces:

asusevski
/

mistraloo-sft

Runtime error

File size: 1,501 Bytes

ab95205
17ede41
ab95205
17ede41
ab95205
 
 
 
 
17ede41
 
 
 
 
ab95205
 
 
17ede41
d201139
ab95205
 
d201139
ab95205

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel, PeftConfig


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


peft_model_id = "asusevski/mistraloo-sft"
peft_config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(peft_config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, peft_model_id).to(device)
model.eval()


tokenizer = AutoTokenizer.from_pretrained(
    peft_config.base_model_name_or_path,
    add_bos_token=True
)


def uwaterloo_output(post_title, post_text):
    prompt = f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
            
### Instruction:
Respond to the reddit post in the style of a University of Waterloo student.

### Input:
{post_title}
{post_text}

### Response:
"""
    model_input = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        model_output = model.generate(**model_input, max_new_tokens=256, repetition_penalty=1.15)[0]
    output = tokenizer.decode(model_output, skip_special_tokens=True)
    return output.split('### Response:\n')[-1]


iface = gr.Interface(
    fn=uwaterloo_output,
    inputs=[
        gr.Textbox("", label="Post Title"),
        gr.Textbox("", label="Post Text"),
    ],
    outputs=gr.Textbox("", label="Mistraloo-SFT")
)

iface.launch()