Spaces:

felixz
/

Fast-Chat-T5-Demo

Runtime error

App Files Files Community

Fast-Chat-T5-Demo / app.py

felixz

Create app.py

3d7c5d1 over 1 year ago

raw

history blame

2.7 kB

	import gradio as gr
	import ctranslate2
	from transformers import AutoModel

	# xl size run out of memory on 16GB VM
	model_name = 'google/flan-t5-large'
	#model_name = 'jncraton/fastchat-t5-3b-v1.0-ct2-int8'

	# Load model directly
	#from transformers import AutoModel
	#model = AutoModel.from_pretrained(model_name)

	translator = ctranslate2.Translator("t5-small-ct2")
	tokenizer = T5Tokenizer.from_pretrained(model_name)
	#model = T5ForConditionalGeneration.from_pretrained(model_name)

	title = ""

	def get_examples ():
	return [
	["Peter goes to the store to buy a soda. The soda costs $.25 an ounce. \
	He brought $2 with him and leaves with $.50. How many ounces of soda did he buy?",
	"How much did Peter spend on soda? ** He spend $1.5 on soda because 2 - .5 = <<2-.5=1.5>>1.5 \
	How many ounces of soda did Peter buy? ** He bought 6 ounces of soda because 1.5 / .25 = <<6=6>>6 #### 6"
	],
	["Krystian works in the library. He borrows an average of 40 books every day. \
	Every Friday, his number of borrowed books is about 40% higher than the daily average. How many books does he borrow in a week if the library is open from Monday to Friday?"
	,"How many books does Krystian borrow on Friday? ** The number of books borrowed \
	on Friday is higher by 40 * 40/100 = <<4040/100=16>>16 books. How many books does Krystian borrow in a week? * There are 5 days from Monday to Friday inclusive, so Krystian borrows an average of 5 * 40 = <<540=200>>200 books during that time. How many books does Krystian borrow in a week? * With Friday's increase in borrowings, during one week Krystian borrows 200 + 16 = <<200+16=216>>216 books."]
	, ["Jane had $60 but gave $30 to dave and went to movies and spend $2. How much money does Jane has left? Answer by reasoning step by step:", "$28"]
	]


	def text2text(input_text):
	input_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(input_text))
	results = translator.translate_batch([input_tokens])

	output_tokens = results[0].hypotheses[0]
	output_text = tokenizer.decode(tokenizer.convert_tokens_to_ids(output_tokens))

	return output_text


	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# Fast Chat T5 Demo
	Fast inference with quantized LLM
	Prompt the model in the Input box.
	""")
	txt_in = gr.Textbox(label="Input", lines=3)
	correct_label = gr.Label(label="Correct")
	txt_out = gr.Textbox(value="", label="Output", lines=4)


	btn = gr.Button(value="Submit")
	btn.click(text2text, inputs=[txt_in], outputs=[txt_out])


	gr.Examples(
	examples=get_examples(),
	inputs=[txt_in,correct_label]
	)


	if __name__ == "__main__":
	demo.launch()