|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer |
|
|
|
class Pipline: |
|
def __init__(self, model, tokenizer, device='cpu'): |
|
self.device = device |
|
self.model = model.to(self.device) |
|
self.tokenizer = tokenizer |
|
self.pre_prompt = "\n\nYou are a AI assistant who helps the user to solve their issue\n\n" |
|
|
|
@torch.no_grad() |
|
def respond(self, Instruction=None, input=None, temperature=0.8, max_length=200, do_sample=True, top_k=0, top_p=0.9, repetition_penalty=1.0, num_return_sequences=1, num_beams=1, early_stopping=False, use_cache=True, **generate_kwargs): |
|
if not Instruction and not input: |
|
raise ValueError("Either Instruction or input must be passed.") |
|
query = f"""{self.pre_prompt} |
|
Instruction: {Instruction if Instruction else ""} |
|
Input: {input if input else ""} |
|
Output:""" |
|
inp_tokens_l = self.tokenizer(query, return_tensors='pt').input_ids |
|
inp_tokens = inp_tokens_l.to(self.device) |
|
out_tokens = self.model.generate(inp_tokens, max_length=max_length, temperature=temperature, do_sample=do_sample, top_k=top_k, top_p=top_p, repetition_penalty=repetition_penalty, num_return_sequences=num_return_sequences, num_beams=num_beams, early_stopping=early_stopping, use_cache=use_cache, **generate_kwargs) |
|
out_text = self.tokenizer.batch_decode(out_tokens, skip_special_tokens=True) |
|
|
|
return out_text |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125M") |
|
model = torch.load('./model-cpu.pkl') |
|
|
|
pipe = Pipline(model=model, tokenizer=tokenizer, device='cpu') |
|
|
|
input_components = [ |
|
gr.inputs.Textbox(label='Instruction', placeholder='Enter instruction...'), |
|
gr.inputs.Textbox(label='Input', placeholder='Enter input...'), |
|
] |
|
|
|
output_components = [ |
|
gr.outputs.Textbox(label='Output'), |
|
] |
|
|
|
def chatbot_response(Instruction, input, max_length, temperature): |
|
output = pipe.respond( |
|
Instruction=Instruction, |
|
input=input, |
|
max_length=int(max_length), |
|
temperature=float(temperature), |
|
) |
|
return output[0] |
|
|
|
interface = gr.Interface( |
|
fn=chatbot_response, |
|
inputs=input_components + [ |
|
gr.inputs.Slider( |
|
label='Max Length', |
|
minimum=10, |
|
maximum=500, |
|
step=10, |
|
default=200, |
|
), |
|
gr.inputs.Slider( |
|
label='Temperature', |
|
minimum=0.1, |
|
maximum=1.0, |
|
step=0.1, |
|
default=0.8, |
|
), |
|
], |
|
outputs=output_components, |
|
title='ChatOPT', |
|
description='Type in an instruction and input, and get a response from the model', |
|
) |
|
|
|
interface.launch() |
|
|