Spaces:

Wuyouz
/

English-Writing-Teacher

Sleeping

liudongqing

The first version, only recongnize the image

56427b4 2 months ago

1.05 kB

	from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer
	import torch
	from threading import Thread
	import gradio as gr
	from gradio import FileData
	import spaces
	model_id = "meta-llama/Llama-3.2-11B-Vision"

	model = MllamaForConditionalGeneration.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)

	processor = AutoProcessor.from_pretrained(model)


	@spaces.GPU
	def score_it(input_img):
	image = input_img.convert("RGB").resize((224, 224))

	prompt = "<\|image\|><\|begin_of_text\|>extract the text in this picture"
	inputs = processor(image, prompt, return_tensors="pt").to(model.device)

	output = model.generate(**inputs, max_new_tokens=30)
	print(processor.decode(output[0]))


	demo = gr.ChatInterface(fn=score_it, title="Upload your English script and get the score",

	inputs=[gr.Image()],
	outputs=['text'],
	stop_btn="Stop Generation",
	)

	demo.launch(debug=True)