felixz's picture
Create app.py
3d7c5d1
raw
history blame
2.7 kB
import gradio as gr
import ctranslate2
from transformers import AutoModel
# xl size run out of memory on 16GB VM
model_name = 'google/flan-t5-large'
#model_name = 'jncraton/fastchat-t5-3b-v1.0-ct2-int8'
# Load model directly
#from transformers import AutoModel
#model = AutoModel.from_pretrained(model_name)
translator = ctranslate2.Translator("t5-small-ct2")
tokenizer = T5Tokenizer.from_pretrained(model_name)
#model = T5ForConditionalGeneration.from_pretrained(model_name)
title = ""
def get_examples ():
return [
["Peter goes to the store to buy a soda. The soda costs $.25 an ounce. \
He brought $2 with him and leaves with $.50. How many ounces of soda did he buy?",
"How much did Peter spend on soda? ** He spend $1.5 on soda because 2 - .5 = <<2-.5=1.5>>1.5 \
How many ounces of soda did Peter buy? ** He bought 6 ounces of soda because 1.5 / .25 = <<6=6>>6 #### 6"
],
["Krystian works in the library. He borrows an average of 40 books every day. \
Every Friday, his number of borrowed books is about 40% higher than the daily average. How many books does he borrow in a week if the library is open from Monday to Friday?"
,"How many books does Krystian borrow on Friday? ** The number of books borrowed \
on Friday is higher by 40 * 40/100 = <<40*40/100=16>>16 books. How many books does Krystian borrow in a week? ** There are 5 days from Monday to Friday inclusive, so Krystian borrows an average of 5 * 40 = <<5*40=200>>200 books during that time. How many books does Krystian borrow in a week? ** With Friday's increase in borrowings, during one week Krystian borrows 200 + 16 = <<200+16=216>>216 books."]
, ["Jane had $60 but gave $30 to dave and went to movies and spend $2. How much money does Jane has left? Answer by reasoning step by step:", "$28"]
]
def text2text(input_text):
input_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text))
results = translator.translate_batch([input_tokens])
output_tokens = results[0].hypotheses[0]
output_text = tokenizer.decode(tokenizer.convert_tokens_to_ids(output_tokens))
return output_text
with gr.Blocks() as demo:
gr.Markdown(
"""
# Fast Chat T5 Demo
Fast inference with quantized LLM
Prompt the model in the Input box.
""")
txt_in = gr.Textbox(label="Input", lines=3)
correct_label = gr.Label(label="Correct")
txt_out = gr.Textbox(value="", label="Output", lines=4)
btn = gr.Button(value="Submit")
btn.click(text2text, inputs=[txt_in], outputs=[txt_out])
gr.Examples(
examples=get_examples(),
inputs=[txt_in,correct_label]
)
if __name__ == "__main__":
demo.launch()