Spaces:
Sleeping
Sleeping
from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer | |
import torch | |
from threading import Thread | |
import gradio as gr | |
from gradio import FileData | |
import spaces | |
model_id = "meta-llama/Llama-3.2-11B-Vision" | |
model = MllamaForConditionalGeneration.from_pretrained( | |
model_id, | |
torch_dtype=torch.bfloat16, | |
device_map="auto", | |
) | |
processor = AutoProcessor.from_pretrained(model) | |
def score_it(input_img): | |
image = input_img.convert("RGB").resize((224, 224)) | |
prompt = "<|image|><|begin_of_text|>extract the text in this picture" | |
inputs = processor(image, prompt, return_tensors="pt").to(model.device) | |
output = model.generate(**inputs, max_new_tokens=30) | |
print(processor.decode(output[0])) | |
demo = gr.ChatInterface(fn=score_it, title="Upload your English script and get the score", | |
inputs=[gr.Image()], | |
outputs=['text'], | |
stop_btn="Stop Generation", | |
) | |
demo.launch(debug=True) | |