Spaces:

ChaseHan
/

Latex2Layout_PDF_Layout_Parsing

Running

App Files Files Community

ChaseHan commited on 22 days ago

Commit

3ab79b5

verified ·

1 Parent(s): b1925a1

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -170

app.py CHANGED Viewed

@@ -4,206 +4,133 @@ import numpy as np
 import os
 import tempfile
 from ultralytics import YOLO
-import logging
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
 # Load the Latex2Layout model
-model_path = "latex2layout_object_detection_yolov8.pt"
 try:
-    if not os.path.exists(model_path):
-        raise FileNotFoundError(f"Model file not found: {model_path}")
     model = YOLO(model_path)
-    logger.info("Model loaded successfully")
 except Exception as e:
-    logger.error(f"Error loading model: {str(e)}")
-    raise
 def detect_and_visualize(image):
     """
-    Perform layout detection on the uploaded image using the Latex2Layout model and visualize the results.
     Args:
-        image: The uploaded image
     Returns:
-        annotated_image: Image with detection boxes
-        layout_annotations: Annotations in YOLO format
     """
     try:
-        if image is None:
-            return None, "Error: No image uploaded."
-        # Validate image format and dimensions
-        if not isinstance(image, np.ndarray):
-            return None, "Error: Invalid image format."
-        if image.size == 0:
-            return None, "Error: Empty image."
-        # Run detection using the Latex2Layout model
         results = model(image)
-        result = results[0]
-        # Create a copy of the image for visualization
-        annotated_image = image.copy()
-        layout_annotations = []
-        # Get image dimensions
-        img_height, img_width = image.shape[:2]
-        # Draw detection results
-        for box in result.boxes:
-            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
-            conf = float(box.conf[0])
-            cls_id = int(box.cls[0])
-            cls_name = result.names[cls_id]
-            # Generate a color for each class
-            color = tuple(np.random.randint(0, 255, 3).tolist())
-            # Draw bounding box and label
-            cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
-            label = f'{cls_name} {conf:.2f}'
-            (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
-            cv2.rectangle(annotated_image, (x1, y1-label_height-5), (x1+label_width, y1), color, -1)
-            cv2.putText(annotated_image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
-            # Convert to YOLO format (normalized)
-            x_center = (x1 + x2) / (2 * img_width)
-            y_center = (y1 + y2) / (2 * img_height)
-            width = (x2 - x1) / img_width
-            height = (y2 - y1) / img_height
-            layout_annotations.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
-        return annotated_image, "\n".join(layout_annotations)
     except Exception as e:
-        logger.error(f"Error in detect_and_visualize: {str(e)}")
-        return None, f"Error during detection: {str(e)}"
-def save_layout_annotations(layout_annotations_str):
     """
-    Save layout annotations to a temporary file and return the file path.
     Args:
-        layout_annotations_str: Annotations string in YOLO format
     Returns:
-        file_path: Path to the saved annotation file
     """
     try:
-        if not layout_annotations_str:
-            return None
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
-        with open(temp_file.name, "w") as f:
-            f.write(layout_annotations_str)
-        return temp_file.name
     except Exception as e:
-        logger.error(f"Error in save_layout_annotations: {str(e)}")
-        return None
-def load_example_image():
-    """
-    Load an example image for demonstration.
-    Returns:
-        image: The loaded example image or None if loading fails
-    """
-    try:
-        example_path = "example_image.jpg"
-        if not os.path.exists(example_path):
-            logger.error(f"Example image not found: {example_path}")
-            return None
-        return cv2.imread(example_path)
-    except Exception as e:
-        logger.error(f"Error loading example image: {str(e)}")
-        return None
-# Custom CSS for styling
-custom_css = """
-    .container { max-width: 1200px; margin: auto; }
-    .button-primary { background-color: #4CAF50; color: white; }
-    .button-secondary { background-color: #008CBA; color: white; }
-    .gr-image { border: 2px solid #ddd; border-radius: 5px; }
-    .gr-textbox { font-family: monospace; }
-"""
-# Create Gradio interface with enhanced styling
-with gr.Blocks(
-    title="Latex2Layout Detection",
-    theme=gr.themes.Default(),
-    css=custom_css
-) as demo:
-    # Header with instructions
-    gr.Markdown(
-        """
-        # Latex2Layout Layout Detection
-        Upload an image to detect layout elements using the **Latex2Layout** model. View the annotated image and download the results in YOLO format.
-        """
-    )
-    # Main layout with two columns
     with gr.Row():
-        # Input column
-        with gr.Column(scale=1):
-            input_image = gr.Image(
-                label="Upload Image",
-                type="numpy",
-                height=400,
-                elem_classes="gr-image"
-            )
-            detect_btn = gr.Button(
-                "Start Detection",
-                variant="primary",
-                elem_classes="button-primary"
-            )
-            gr.Markdown("**Tip**: Upload a clear image for optimal detection results.")
-        # Output column
-        with gr.Column(scale=1):
-            output_image = gr.Image(
-                label="Detection Results",
-                height=400,
-                elem_classes="gr-image"
-            )
-            layout_annotations = gr.Textbox(
-                label="Layout Annotations (YOLO Format)",
-                lines=10,
-                max_lines=15,
-                elem_classes="gr-textbox"
-            )
-            download_btn = gr.Button(
-                "Download Annotations",
-                variant="secondary",
-                elem_classes="button-secondary"
-            )
-            download_file = gr.File(
-                label="Download File",
-                interactive=False
-            )
-    # Example image button (optional)
-    with gr.Row():
-        gr.Button("Load Example Image").click(
-            fn=load_example_image,
-            outputs=input_image
-        )
-    # Event handlers
     detect_btn.click(
         fn=detect_and_visualize,
-        inputs=input_image,
-        outputs=[output_image, layout_annotations],
-        show_progress=True
     )
     download_btn.click(
-        fn=save_layout_annotations,
-        inputs=layout_annotations,
-        outputs=download_file
     )
 # Launch the application

 import os
 import tempfile
 from ultralytics import YOLO
+# Define the model path for Latex2Layout
+model_path = "latex2layout_object_detection_yolov8.pt"
+# Check if the model file exists before loading
+if not os.path.exists(model_path):
+    raise FileNotFoundError(f"Model file not found at {model_path}")
 # Load the Latex2Layout model
 try:
     model = YOLO(model_path)
 except Exception as e:
+    raise RuntimeError(f"Failed to load Latex2Layout model: {e}")
 def detect_and_visualize(image):
     """
+    Perform object detection on the uploaded image and visualize the results.
     Args:
+        image: The uploaded image as a numpy array.
     Returns:
+        annotated_image: Image with bounding boxes drawn.
+        yolo_annotations: Annotations in YOLO format as a string.
     """
+    # Validate input image
+    if image is None or not isinstance(image, np.ndarray):
+        raise ValueError("Invalid image input: Please upload a valid image.")
+    # Run object detection with error handling
     try:
         results = model(image)
     except Exception as e:
+        raise RuntimeError(f"Error during Latex2Layout detection: {e}")
+    # Extract results from the first frame
+    result = results[0]
+    annotated_image = image.copy()
+    yolo_annotations = []
+    # Get image dimensions
+    img_height, img_width = image.shape[:2]
+    # Process each detected object
+    for box in result.boxes:
+        # Extract bounding box coordinates
+        x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
+        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
+        # Get confidence and class details
+        conf = float(box.conf[0])
+        cls_id = int(box.cls[0])
+        cls_name = result.names[cls_id]
+        # Assign a random color to the class
+        color = tuple(np.random.randint(0, 255, 3).tolist())
+        # Draw bounding box on the image
+        cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
+        # Create and draw label with confidence
+        label = f"{cls_name} {conf:.2f}"
+        (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+        cv2.rectangle(annotated_image, (x1, y1 - label_height - 5), (x1 + label_width, y1), color, -1)
+        cv2.putText(annotated_image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)
+        # Convert bounding box to YOLO format (normalized coordinates)
+        x_center = (x1 + x2) / (2 * img_width)
+        y_center = (y1 + y2) / (2 * img_height)
+        width = (x2 - x1) / img_width
+        height = (y2 - y1) / img_height
+        yolo_annotations.append(f"{cls_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")
+    # Combine annotations into a single string
+    yolo_annotations_str = "\n".join(yolo_annotations) if yolo_annotations else "No objects detected."
+    return annotated_image, yolo_annotations_str
+def save_yolo_annotations(yolo_annotations_str):
     """
+    Save YOLO annotations to a temporary file and return its path.
     Args:
+        yolo_annotations_str: Annotations string in YOLO format.
     Returns:
+        file_path: Path to the saved annotation file.
     """
+    # Handle empty annotations
+    if not yolo_annotations_str or yolo_annotations_str == "No objects detected.":
+        raise ValueError("No annotations available to save.")
+    # Save annotations to a temporary file with error handling
     try:
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".txt")
+        temp_file_path = temp_file.name
+        with open(temp_file_path, "w") as f:
+            f.write(yolo_annotations_str)
+        return temp_file_path
     except Exception as e:
+        raise RuntimeError(f"Failed to save annotations: {e}")
+# Build the Gradio interface
+with gr.Blocks(title="Latex2Layout Object Detection Visualization") as demo:
+    gr.Markdown("# Latex2Layout Object Detection Visualization")
+    gr.Markdown("Upload an image to detect objects using the Latex2Layout model. View the results with bounding boxes and download annotations in YOLO format.")
     with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(label="Upload Image", type="numpy")
+            detect_btn = gr.Button("Start Detection")
+        with gr.Column():
+            output_image = gr.Image(label="Detection Results")
+            yolo_annotations = gr.Textbox(label="YOLO Annotations", lines=10)
+            download_btn = gr.Button("Download YOLO Annotations")
+            download_file = gr.File(label="Download Annotations")
+    # Define button click events
     detect_btn.click(
         fn=detect_and_visualize,
+        inputs=[input_image],
+        outputs=[output_image, yolo_annotations]
     )
     download_btn.click(
+        fn=save_yolo_annotations,
+        inputs=[yolo_annotations],
+        outputs=[download_file]
     )
 # Launch the application