BOLT2.5B / app.py
Nicholas Meisburger
decrease beam_width and num tokens
079c194
raw
history blame
1.12 kB
import gradio as gr
from transformers import GPT2Tokenizer
import os
os.system(
"pip3 install thirdai-0.7.18+a1506df-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"
)
import importlib
import site
importlib.reload(site)
from thirdai import bolt, licensing
licensing.activate("7511CC-0E24D7-69439D-5D6CBA-33AAFD-V3")
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = bolt.GenerativeModel.load("./generative.model")
def generate(prompt):
prompt = tokenizer.encode(prompt)
stream = model.streaming_generation(
input_tokens=prompt,
prediction_chunk_size=2,
max_predictions=80,
beam_width=3,
)
for res in stream:
yield tokenizer.decode(res)
with gr.Blocks() as demo:
output = gr.TextArea(label="Output")
prompt = gr.Textbox(
label="Prompt",
)
prompt.submit(generate, inputs=[prompt], outputs=[output])
btn = gr.Button(value="Generate")
btn.click(generate, inputs=[prompt], outputs=[output])
gr.ClearButton(components=[prompt, output])
if __name__ == "__main__":
demo.queue()
demo.launch()