Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoProcessor | |
from PIL import Image | |
import torch | |
# Load model and processor | |
model = AutoModelForCausalLM.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", trust_remote_code=True) | |
processor = AutoProcessor.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA") | |
def generate_response(image, question): | |
try: | |
if image.mode != "RGB": | |
image = image.convert("RGB") | |
inputs = processor(text=question, images=image, return_tensors="pt") | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
inputs = {key: value.to(device) for key, value in inputs.items()} | |
generated_ids = model.generate( | |
input_ids=inputs["input_ids"], | |
pixel_values=inputs["pixel_values"], | |
max_length=1024, | |
num_beams=3, | |
early_stopping=True | |
) | |
response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return response | |
except Exception as e: | |
return f"Error processing image: {e}" | |
# Example images for demonstration (update paths as needed) | |
examples = [ | |
["demo.png", "what is the address in the page?"], | |
["demo2.jpg", "what is the date in the page?"], | |
["demo.png", "what is the name in the page?"] | |
] | |
# Gradio interface | |
iface = gr.Interface( | |
fn=generate_response, | |
inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")], | |
outputs=gr.Textbox(label="Response"), | |
examples=examples, | |
title="Image to Text Extractor", | |
description="Upload an image and provide a question. This tool will extract the relevant information from the image based on your question." | |
) | |
# Launch the interface | |
iface.launch() | |