RT-Detr-ArabicLayoutAnalysisR

Running

File size: 6,429 Bytes

from ultralytics import RTDETR
import gradio as gr
from huggingface_hub import snapshot_download
from PIL import Image
from PIL import Image, ImageDraw, ImageFont
from surya.ordering import batch_ordering
from surya.model.ordering.processor import load_processor
from surya.model.ordering.model import load_model

model_dir = snapshot_download("omarelsayeed/DETR-ARABIC-DOCUMENT-LAYOUT-ANALYSIS") + "/rtdetr_1024_crops.pt"
model = RTDETR(model_dir)
order_model = load_model()
processor = load_processor()

def detect_layout(img, conf_threshold, iou_threshold):
    """Predicts objects in an image using a YOLO11 model with adjustable confidence and IOU thresholds."""
    results = model.predict(
        source=img,
        conf=conf_threshold,
        iou=iou_threshold,
        show_labels=True,
        show_conf=True,
        imgsz=1024,
        agnostic_nms= True,
        max_det=34,
        nms=True
    )[0]
    bboxes = results.boxes.xyxy.cpu().tolist()
    classes = results.boxes.cls.cpu().tolist()
    mapping = {0: 'CheckBox',
              1: 'List',
              2: 'P',
              3: 'abandon',
              4: 'figure',
              5: 'gridless_table',
              6: 'handwritten_signature',
              7: 'qr_code',
              8: 'table',
              9: 'title'}
    classes = [mapping[i] for i in classes]
    return bboxes , classes 
    
def get_orders(image_path , boxes):
  image = Image.open(image_path)
  order_predictions = batch_ordering([image], [bboxes], order_model, processor)
  return [i.position for i in order_predictions[0].bboxes]

def draw_bboxes_on_image(image_path, bboxes, classes, reading_order):
    # Define a color map for each class name
    class_colors = {
        'CheckBox': 'orange',
        'List': 'blue',
        'P': 'green',
        'abandon': 'purple',
        'figure': 'cyan',
        'gridless_table': 'yellow',
        'handwritten_signature': 'magenta',
        'qr_code': 'red',
        'table': 'brown',
        'title': 'pink'
    }

    # Open the image using PIL
    image = Image.open(image_path)
    
    # Prepare to draw on the image
    draw = ImageDraw.Draw(image)
    
    # Try loading a default font, if it fails, use a basic font
    try:
        font = ImageFont.truetype("arial.ttf", 20)
        title_font = ImageFont.truetype("arial.ttf", 30)  # Larger font for titles
    except IOError:
        font = ImageFont.load_default(size = 30)
        title_font = font  # Use the same font for title if custom font fails

    # Loop through the bounding boxes and corresponding labels
    for i in range(len(bboxes)):
        x1, y1, x2, y2 = bboxes[i]
        class_name = classes[i]
        order = reading_order[i]
        
        # Get the color for the class
        color = class_colors[class_name]
        
        # If it's a title, make the bounding box thicker and text larger
        if class_name == 'title':
            box_thickness = 4  # Thicker box for title
            label_font = title_font  # Larger font for title
        else:
            box_thickness = 2  # Default box thickness
            label_font = font  # Default font for other classes
        
        # Draw the rectangle with the class color and box thickness
        draw.rectangle([x1, y1, x2, y2], outline=color, width=box_thickness)
        
        # Label the box with the class and order
        label = f"{class_name}-{order}"
        
        # Calculate text size using textbbox() to get the bounding box of the text
        bbox = draw.textbbox((x1, y1 - 20), label, font=label_font)
        label_width = bbox[2] - bbox[0]
        label_height = bbox[3] - bbox[1]
        
        # Draw the text above the box
        draw.text((x1, y1 - label_height), label, fill="black", font=label_font)
    
    # Return the modified image as a PIL image object
    return image
from PIL import Image, ImageDraw

def is_inside(box1, box2):
    # Check if box1 is inside box2
    return box1[0] >= box2[0] and box1[1] >= box2[1] and box1[2] <= box2[2] and box1[3] <= box2[3]

def is_overlap(box1, box2):
    # Check if box1 overlaps with box2
    x1, y1, x2, y2 = box1
    x3, y3, x4, y4 = box2

    # No overlap if one box is to the left, right, above, or below the other box
    return not (x2 <= x3 or x4 <= x1 or y2 <= y3 or y4 <= y1)

def remove_overlapping_and_inside_boxes(boxes, classes):
    to_remove = []
    
    for i, box1 in enumerate(boxes):
        for j, box2 in enumerate(boxes):
            if i != j:
                if is_inside(box1, box2):
                    # Mark the smaller (inside) box for removal
                    to_remove.append(i)
                elif is_inside(box2, box1):
                    # Mark the smaller (inside) box for removal
                    to_remove.append(j)
                elif is_overlap(box1, box2):
                    # If the boxes overlap, mark the smaller one for removal
                    if (box2[2] - box2[0]) * (box2[3] - box2[1]) < (box1[2] - box1[0]) * (box1[3] - box1[1]):
                        to_remove.append(j)
                    else:
                        to_remove.append(i)

    # Remove duplicates and sort by the index to keep original boxes
    to_remove = sorted(set(to_remove), reverse=True)

    # Remove the boxes and their corresponding classes from the list
    for idx in to_remove:
        del boxes[idx]
        del classes[idx]

    return boxes, classes
def full_predictions(IMAGE_PATH)
  bboxes , classes = detect_layout(IMAGE_PATH , 0.3, 0)
  bboxes , classes = remove_overlapping_and_inside_boxes(bboxes,classes)
  orders = get_orders(IMAGE_PATH , bboxes)
  final_image = draw_bboxes_on_image(IMAGE_PATH , bboxes , classes , orders)
  return final_image

iface = gr.Interface(
    fn=full_predictions,
    inputs=[
        gr.Image(type="pil", label="Upload Image"),
        gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence threshold"),
        gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU threshold"),
    ],
    outputs=gr.Image(type="pil", label="Result"),
    title="Ultralytics Gradio",
    description="Upload images for inference. The Ultralytics YOLO11n model is used by default.",
    examples=[
        ["kashida.png", 0.2, 0.45],
        ["image.jpg", 0.2, 0.45],
        ["Screenshot 2024-11-06 130230.png" , 0.25 , 0.45]
    ],
    theme=gr.themes.Default()
)

if __name__ == "__main__":
    iface.launch()