Prome-LLM / app.py
Neu256's picture
Create app.py
0efbf87
raw
history blame
1.54 kB
import gradio as gr
import torch
import numpy as np
from model import Transformer
from transformers import AutoTokenizer # pip install transformers
from utils import (
BLOCK_SIZE,
DEVICE,
DROPOUT,
NUM_EMBED,
NUM_HEAD,
NUM_LAYER,
encode,
decode
)
#tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/rubert-base-cased")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
vocab_size = tokenizer.vocab_size
# train a new model
model = Transformer(
vocab_size=vocab_size,
num_embed=NUM_EMBED,
block_size=BLOCK_SIZE,
num_heads=NUM_HEAD,
num_layers=NUM_LAYER,
dropout=DROPOUT
)
# load model to GPU if available
m = model.to(DEVICE)
# print the number of parameters in the model
m = torch.load("base_model.pth", map_location=torch.device(DEVICE))
m.eval()
#print(
# "Model with {:.2f}M parameters".format(sum(p.numel() for p in m.parameters()) / 1e6)
#)
def model_generate(text):
# generate some output based on the context
#context = torch.tensor(np.array(encode("Hello! My name is ", tokenizer)))
#context = torch.zeros((1, 1), dtype=torch.long, device=DEVICE)
text_input = str(input())
context_np = np.array(encode(text_input, tokenizer))
context_np = np.array([context_np])
context = torch.from_numpy(context_np)
#print(context)
return decode(enc_sec=m.generate(idx=context, max_new_tokens=100, block_size=BLOCK_SIZE)[0], tokenizer=tokenizer)
iface = gr.Interface(fn=model_generate, inputs="text", outputs="text")
iface.launch()