Spaces:

mynkchaudhry
/

Image2text

Runtime error

App Files Files Community

Image2text / app.py

mynkchaudhry

upload all the files

5b7cddd 12 months ago

raw

history blame contribute delete

1.77 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoProcessor
	from PIL import Image
	import torch

	# Load model and processor
	model = AutoModelForCausalLM.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA", trust_remote_code=True)
	processor = AutoProcessor.from_pretrained("mynkchaudhry/Florence-2-FT-DocVQA")

	def generate_response(image, question):
	try:
	if image.mode != "RGB":
	image = image.convert("RGB")

	inputs = processor(text=question, images=image, return_tensors="pt")

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)
	inputs = {key: value.to(device) for key, value in inputs.items()}

	generated_ids = model.generate(
	input_ids=inputs["input_ids"],
	pixel_values=inputs["pixel_values"],
	max_length=1024,
	num_beams=3,
	early_stopping=True
	)

	response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return response
	except Exception as e:
	return f"Error processing image: {e}"

	# Example images for demonstration (update paths as needed)
	examples = [
	["demo.png", "what is the address in the page?"],
	["demo2.jpg", "what is the date in the page?"],
	["demo.png", "what is the name in the page?"]
	]

	# Gradio interface
	iface = gr.Interface(
	fn=generate_response,
	inputs=[gr.Image(type="pil"), gr.Textbox(label="Question")],
	outputs=gr.Textbox(label="Response"),
	examples=examples,
	title="Image to Text Extractor",
	description="Upload an image and provide a question. This tool will extract the relevant information from the image based on your question."
	)

	# Launch the interface
	iface.launch()