liudongqing
Switched to the cloned model
a3172b5
raw
history blame
1.05 kB
from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer
import torch
from threading import Thread
import gradio as gr
from gradio import FileData
import spaces
model_id = "alpindale/Llama-3.2-11B-Vision"
model = MllamaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(model)
@spaces.GPU
def score_it(input_img):
image = input_img.convert("RGB").resize((224, 224))
prompt = "<|image|><|begin_of_text|>extract the text in this picture"
inputs = processor(image, prompt, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=30)
print(processor.decode(output[0]))
demo = gr.ChatInterface(fn=score_it, title="Upload your English script and get the score",
inputs=[gr.Image()],
outputs=['text'],
stop_btn="Stop Generation",
)
demo.launch(debug=True)