RT-Detr-ArabicLayoutAnalysisR

Running

App Files Files Community

RT-Detr-ArabicLayoutAnalysisR / app.py

omarelsayeed

Update app.py

fa50974 verified about 1 month ago

raw

history blame

6.43 kB

	from ultralytics import RTDETR
	import gradio as gr
	from huggingface_hub import snapshot_download
	from PIL import Image
	from PIL import Image, ImageDraw, ImageFont
	from surya.ordering import batch_ordering
	from surya.model.ordering.processor import load_processor
	from surya.model.ordering.model import load_model

	model_dir = snapshot_download("omarelsayeed/DETR-ARABIC-DOCUMENT-LAYOUT-ANALYSIS") + "/rtdetr_1024_crops.pt"
	model = RTDETR(model_dir)
	order_model = load_model()
	processor = load_processor()

	def detect_layout(img, conf_threshold, iou_threshold):
	"""Predicts objects in an image using a YOLO11 model with adjustable confidence and IOU thresholds."""
	results = model.predict(
	source=img,
	conf=conf_threshold,
	iou=iou_threshold,
	show_labels=True,
	show_conf=True,
	imgsz=1024,
	agnostic_nms= True,
	max_det=34,
	nms=True
	)[0]
	bboxes = results.boxes.xyxy.cpu().tolist()
	classes = results.boxes.cls.cpu().tolist()
	mapping = {0: 'CheckBox',
	1: 'List',
	2: 'P',
	3: 'abandon',
	4: 'figure',
	5: 'gridless_table',
	6: 'handwritten_signature',
	7: 'qr_code',
	8: 'table',
	9: 'title'}
	classes = [mapping[i] for i in classes]
	return bboxes , classes

	def get_orders(image_path , boxes):
	image = Image.open(image_path)
	order_predictions = batch_ordering([image], [bboxes], order_model, processor)
	return [i.position for i in order_predictions[0].bboxes]

	def draw_bboxes_on_image(image_path, bboxes, classes, reading_order):
	# Define a color map for each class name
	class_colors = {
	'CheckBox': 'orange',
	'List': 'blue',
	'P': 'green',
	'abandon': 'purple',
	'figure': 'cyan',
	'gridless_table': 'yellow',
	'handwritten_signature': 'magenta',
	'qr_code': 'red',
	'table': 'brown',
	'title': 'pink'
	}

	# Open the image using PIL
	image = Image.open(image_path)

	# Prepare to draw on the image
	draw = ImageDraw.Draw(image)

	# Try loading a default font, if it fails, use a basic font
	try:
	font = ImageFont.truetype("arial.ttf", 20)
	title_font = ImageFont.truetype("arial.ttf", 30) # Larger font for titles
	except IOError:
	font = ImageFont.load_default(size = 30)
	title_font = font # Use the same font for title if custom font fails

	# Loop through the bounding boxes and corresponding labels
	for i in range(len(bboxes)):
	x1, y1, x2, y2 = bboxes[i]
	class_name = classes[i]
	order = reading_order[i]

	# Get the color for the class
	color = class_colors[class_name]

	# If it's a title, make the bounding box thicker and text larger
	if class_name == 'title':
	box_thickness = 4 # Thicker box for title
	label_font = title_font # Larger font for title
	else:
	box_thickness = 2 # Default box thickness
	label_font = font # Default font for other classes

	# Draw the rectangle with the class color and box thickness
	draw.rectangle([x1, y1, x2, y2], outline=color, width=box_thickness)

	# Label the box with the class and order
	label = f"{class_name}-{order}"

	# Calculate text size using textbbox() to get the bounding box of the text
	bbox = draw.textbbox((x1, y1 - 20), label, font=label_font)
	label_width = bbox[2] - bbox[0]
	label_height = bbox[3] - bbox[1]

	# Draw the text above the box
	draw.text((x1, y1 - label_height), label, fill="black", font=label_font)

	# Return the modified image as a PIL image object
	return image
	from PIL import Image, ImageDraw

	def is_inside(box1, box2):
	# Check if box1 is inside box2
	return box1[0] >= box2[0] and box1[1] >= box2[1] and box1[2] <= box2[2] and box1[3] <= box2[3]

	def is_overlap(box1, box2):
	# Check if box1 overlaps with box2
	x1, y1, x2, y2 = box1
	x3, y3, x4, y4 = box2

	# No overlap if one box is to the left, right, above, or below the other box
	return not (x2 <= x3 or x4 <= x1 or y2 <= y3 or y4 <= y1)

	def remove_overlapping_and_inside_boxes(boxes, classes):
	to_remove = []

	for i, box1 in enumerate(boxes):
	for j, box2 in enumerate(boxes):
	if i != j:
	if is_inside(box1, box2):
	# Mark the smaller (inside) box for removal
	to_remove.append(i)
	elif is_inside(box2, box1):
	# Mark the smaller (inside) box for removal
	to_remove.append(j)
	elif is_overlap(box1, box2):
	# If the boxes overlap, mark the smaller one for removal
	if (box2[2] - box2[0]) * (box2[3] - box2[1]) < (box1[2] - box1[0]) * (box1[3] - box1[1]):
	to_remove.append(j)
	else:
	to_remove.append(i)

	# Remove duplicates and sort by the index to keep original boxes
	to_remove = sorted(set(to_remove), reverse=True)

	# Remove the boxes and their corresponding classes from the list
	for idx in to_remove:
	del boxes[idx]
	del classes[idx]

	return boxes, classes
	def full_predictions(IMAGE_PATH)
	bboxes , classes = detect_layout(IMAGE_PATH , 0.3, 0)
	bboxes , classes = remove_overlapping_and_inside_boxes(bboxes,classes)
	orders = get_orders(IMAGE_PATH , bboxes)
	final_image = draw_bboxes_on_image(IMAGE_PATH , bboxes , classes , orders)
	return final_image

	iface = gr.Interface(
	fn=full_predictions,
	inputs=[
	gr.Image(type="pil", label="Upload Image"),
	gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold"),
	gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU threshold"),
	],
	outputs=gr.Image(type="pil", label="Result"),
	title="Ultralytics Gradio",
	description="Upload images for inference. The Ultralytics YOLO11n model is used by default.",
	examples=[
	["kashida.png", 0.2, 0.45],
	["image.jpg", 0.2, 0.45],
	["Screenshot 2024-11-06 130230.png" , 0.25 , 0.45]
	],
	theme=gr.themes.Default()
	)

	if __name__ == "__main__":
	iface.launch()