Spaces:
Sleeping
Sleeping
File size: 1,046 Bytes
56427b4 5bb878a 56427b4 a3172b5 56427b4 5bb878a 56427b4 5bb878a 56427b4 5bb878a 56427b4 5bb878a 56427b4 5bb878a 56427b4 5bb878a 56427b4 5bb878a 56427b4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer
import torch
from threading import Thread
import gradio as gr
from gradio import FileData
import spaces
model_id = "alpindale/Llama-3.2-11B-Vision"
model = MllamaForConditionalGeneration.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
processor = AutoProcessor.from_pretrained(model)
@spaces.GPU
def score_it(input_img):
image = input_img.convert("RGB").resize((224, 224))
prompt = "<|image|><|begin_of_text|>extract the text in this picture"
inputs = processor(image, prompt, return_tensors="pt").to(model.device)
output = model.generate(**inputs, max_new_tokens=30)
print(processor.decode(output[0]))
demo = gr.ChatInterface(fn=score_it, title="Upload your English script and get the score",
inputs=[gr.Image()],
outputs=['text'],
stop_btn="Stop Generation",
)
demo.launch(debug=True)
|