Spaces:

ivan-wald
/

document-layout-analysis

Sleeping

App Files Files Community

ivan-wald commited on Nov 5, 2024

Commit

80ceed3

verified ·

1 Parent(s): 8e5a190

Upload 7 files

Browse files

Files changed (7) hide show

Base-RCNN-FPN.yml +69 -0
README.md +12 -0
app.py +77 -0
cascade_dit_base.yml +20 -0
packages.txt +1 -0
publaynet_example.jpeg +0 -0
requirements.txt +10 -0

Base-RCNN-FPN.yml ADDED Viewed

	@@ -0,0 +1,69 @@

+MODEL:
+  MASK_ON: True
+  META_ARCHITECTURE: "GeneralizedRCNN"
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  BACKBONE:
+    NAME: "build_vit_fpn_backbone"
+  VIT:
+    OUT_FEATURES: ["layer3", "layer5", "layer7", "layer11"]
+    DROP_PATH: 0.1
+    IMG_SIZE: [224,224]
+    POS_TYPE: "abs"
+  FPN:
+    IN_FEATURES: ["layer3", "layer5", "layer7", "layer11"]
+  ANCHOR_GENERATOR:
+    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
+    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
+  RPN:
+    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
+    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
+    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
+    # Detectron1 uses 2000 proposals per-batch,
+    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
+    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
+    POST_NMS_TOPK_TRAIN: 1000
+    POST_NMS_TOPK_TEST: 1000
+  ROI_HEADS:
+    NAME: "StandardROIHeads"
+    IN_FEATURES: ["p2", "p3", "p4", "p5"]
+    NUM_CLASSES: 5
+  ROI_BOX_HEAD:
+    NAME: "FastRCNNConvFCHead"
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+  ROI_MASK_HEAD:
+    NAME: "MaskRCNNConvUpsampleHead"
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+DATASETS:
+  TRAIN: ("publaynet_train",)
+  TEST: ("publaynet_val",)
+SOLVER:
+  LR_SCHEDULER_NAME: "WarmupCosineLR"
+  AMP:
+    ENABLED: True
+  OPTIMIZER: "ADAMW"
+  BACKBONE_MULTIPLIER: 1.0
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "full_model"
+    CLIP_VALUE: 1.0
+    NORM_TYPE: 2.0
+  WARMUP_FACTOR: 0.01
+  BASE_LR: 0.0004
+  WEIGHT_DECAY: 0.05
+  IMS_PER_BATCH: 32
+INPUT:
+  CROP:
+    ENABLED: True
+    TYPE: "absolute_range"
+    SIZE: (384, 600)
+  MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
+  FORMAT: "RGB"
+DATALOADER:
+  FILTER_EMPTY_ANNOTATIONS: False
+VERSION: 2
+AUG:
+  DETR: True
+SEED: 42

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Dit Document Layout Analysis
+emoji: 👀
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 5.5.0
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import os
+os.system('git clone https://github.com/facebookresearch/detectron2.git')
+os.system('pip install -e detectron2')
+os.system("git clone https://github.com/microsoft/unilm.git")
+os.system("sed -i 's/from collections import Iterable/from collections.abc import Iterable/' unilm/dit/object_detection/ditod/table_evaluation/data_structure.py")
+os.system("curl -LJ -o publaynet_dit-b_cascade.pth 'https://layoutlm.blob.core.windows.net/dit/dit-fts/publaynet_dit-b_cascade.pth?sv=2022-11-02&ss=b&srt=o&sp=r&se=2033-06-08T16:48:15Z&st=2023-06-08T08:48:15Z&spr=https&sig=a9VXrihTzbWyVfaIDlIT1Z0FoR1073VB0RLQUMuudD4%3D'")
+import sys
+sys.path.append("unilm")
+sys.path.append("detectron2")
+import cv2
+from unilm.dit.object_detection.ditod import add_vit_config
+import torch
+from detectron2.config import CfgNode as CN
+from detectron2.config import get_cfg
+from detectron2.utils.visualizer import ColorMode, Visualizer
+from detectron2.data import MetadataCatalog
+from detectron2.engine import DefaultPredictor
+from huggingface_hub import hf_hub_download
+import gradio as gr
+# Step 1: instantiate config
+cfg = get_cfg()
+add_vit_config(cfg)
+cfg.merge_from_file("cascade_dit_base.yml")
+# Step 2: add model weights URL to config
+filepath = hf_hub_download(repo_id="Sebas6k/DiT_weights", filename="publaynet_dit-b_cascade.pth", repo_type="model")
+cfg.MODEL.WEIGHTS = filepath
+# Step 3: set device
+cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Step 4: define model
+predictor = DefaultPredictor(cfg)
+def analyze_image(img):
+    img = img.astype("float32")
+    md = MetadataCatalog.get(cfg.DATASETS.TEST[0])
+    if cfg.DATASETS.TEST[0]=='icdar2019_test':
+        md.set(thing_classes=["table"])
+    else:
+        md.set(thing_classes=["text","title","list","table","figure"])
+    output = predictor(img)["instances"]
+    v = Visualizer(img[:, :, ::-1],
+                    md,
+                    scale=1.0,
+                    instance_mode=ColorMode.SEGMENTATION)
+    result = v.draw_instance_predictions(output.to("cpu"))
+    result_image = result.get_image()[:, :, ::-1]
+    return result_image
+title = "Document Layout Analysis"
+description = "Demo"
+article = ""
+examples =[['publaynet_example.jpeg']]
+css = ".output-image, .input-image, .image-preview {height: 600px !important}"
+iface = gr.Interface(fn=analyze_image,
+                     inputs=gr.Image(type="numpy", label="document image"),
+                     outputs=gr.Image(type="numpy", label="annotated document"),
+                     title=title,
+                     description=description,
+                     examples=examples,
+                     article=article,
+                     css=css)
+iface.queue(5).launch()

cascade_dit_base.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+_BASE_: "Base-RCNN-FPN.yml"
+MODEL:
+  PIXEL_MEAN: [ 127.5, 127.5, 127.5 ]
+  PIXEL_STD: [ 127.5, 127.5, 127.5 ]
+  WEIGHTS: "https://layoutlm.blob.core.windows.net/dit/dit-pts/dit-base-224-p16-500k-62d53a.pth"
+  VIT:
+    NAME: "dit_base_patch16"
+  ROI_HEADS:
+    NAME: CascadeROIHeads
+  ROI_BOX_HEAD:
+    CLS_AGNOSTIC_BBOX_REG: True
+  RPN:
+    POST_NMS_TOPK_TRAIN: 2000
+SOLVER:
+  WARMUP_ITERS: 1000
+  IMS_PER_BATCH: 16
+  MAX_ITER: 60000
+  CHECKPOINT_PERIOD: 2000
+TEST:
+  EVAL_PERIOD: 2000

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python3-opencv

publaynet_example.jpeg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+pyyaml==5.1
+torch==1.11.0
+torchvision==0.12.0
+gradio
+numpy<2
+scipy
+shapely
+timm
+opencv-python