Spaces:

Norakneath
/

TestingYolo

Sleeping

App Files Files Community

Norakneath commited on 19 days ago

Commit

4d73384

verified ·

1 Parent(s): 07e5b4d

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -90

app.py CHANGED Viewed

@@ -2,117 +2,127 @@ import gradio as gr
 from ultralytics import YOLO
 from PIL import Image, ImageDraw, ImageFont
 import random
-# Load YOLO model (ensure the model file exists in the working directory)
 YOLO_MODEL_PATH = "best-Dense.pt"
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
-# Define a set of colors for different classes
 CLASS_COLORS = {}
 def get_class_color(class_id):
-    """Assign a random color to each class."""
     if class_id not in CLASS_COLORS:
         CLASS_COLORS[class_id] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
     return CLASS_COLORS[class_id]
-# Class Names (Modify based on your dataset)
-CLASS_NAMES = {0: "Text"}  # Example labels
-def detect_text_lines(image):
-    """Detects text lines with two different confidence and IoU thresholds."""
-    image = Image.fromarray(image)
-    original_image = image.copy()
-    # Define thresholds for debugging
-    thresholds = [
-        {"conf": 0.6, "iou": 0.5},  # Default thresholds
-        {"conf": 0.4, "iou": 0.3},  # Lower thresholds for comparison
-    ]
-    results_list = []
-    for threshold in thresholds:
-        conf = threshold["conf"]
-        iou = threshold["iou"]
-        # Run YOLO text detection with specific thresholds
-        results = model.predict(image, conf=conf, iou=iou, device="cpu")
-        detected_boxes = results[0].boxes.xyxy.tolist() if hasattr(results[0].boxes, 'xyxy') else []
-        class_ids = results[0].boxes.cls.tolist() if hasattr(results[0].boxes, 'cls') else []
-        detected_boxes = [list(map(int, box)) for box in detected_boxes]
-        # Draw bounding boxes on the image
-        annotated_image = original_image.copy()
-        draw = ImageDraw.Draw(annotated_image)
-        try:
-            font = ImageFont.truetype("arial.ttf", 18)  # Load a font (ensure arial.ttf is available)
-        except:
-            font = ImageFont.load_default()  # Fallback in case font is missing
-        for idx, (x1, y1, x2, y2) in enumerate(detected_boxes):
-            class_id = int(class_ids[idx]) if idx < len(class_ids) else -1
-            color = get_class_color(class_id)
-            class_name = CLASS_NAMES.get(class_id, f"Class {class_id}")
-            # Draw bounding box
-            draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
-            # Draw label with background
-            text_size = draw.textbbox((0, 0), class_name, font=font)
-            text_width = text_size[2] - text_size[0]
-            text_height = text_size[3] - text_size[1]
-            # Draw filled rectangle behind text for better visibility
-            draw.rectangle([x1, y1 - text_height - 4, x1 + text_width + 6, y1], fill=color)
-            draw.text((x1 + 3, y1 - text_height - 2), class_name, fill="white", font=font)
-        total_objects = len(detected_boxes)
-        total_classes = len(set(class_ids))
-        results_list.append({
-            "image": annotated_image,
-            "objects": f"Total Objects Detected: {total_objects} (Conf={conf}, IoU={iou})",
-            "classes": f"Total Classes Detected: {total_classes} (Conf={conf}, IoU={iou})"
         })
-    return (
-        results_list[0]["image"], results_list[0]["objects"], results_list[0]["classes"],
-        results_list[1]["image"], results_list[1]["objects"], results_list[1]["classes"]
     )
-# Gradio UI
 with gr.Blocks() as iface:
     gr.Markdown("# 📜 Text Line Detection with YOLO")
-    gr.Markdown("## 📷 Upload an image to detect text lines")
     with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### 📤 Upload Image")
-            image_input = gr.Image(type="numpy", label="Upload an image")
-        with gr.Column(scale=2):
-            gr.Markdown("### 🖼 Annotated Images with Bounding Boxes")
-            output_annotated_1 = gr.Image(type="pil", label="Detection (Conf=0.6, IoU=0.5)")
-            output_annotated_2 = gr.Image(type="pil", label="Detection (Conf=0.4, IoU=0.3)")
-    gr.Markdown("### 🔢 Detection Results")
-    output_objects_1 = gr.Textbox(label="Total Objects Detected (Conf=0.6)", lines=1)
-    output_classes_1 = gr.Textbox(label="Total Classes Detected (Conf=0.6)", lines=1)
-    output_objects_2 = gr.Textbox(label="Total Objects Detected (Conf=0.4)", lines=1)
-    output_classes_2 = gr.Textbox(label="Total Classes Detected (Conf=0.4)", lines=1)
-    image_input.upload(
         detect_text_lines,
-        inputs=image_input,
-        outputs=[
-            output_annotated_1, output_objects_1, output_classes_1,
-            output_annotated_2, output_objects_2, output_classes_2
-        ]
     )
-# 🚀 Run the app locally
 if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860)

 from ultralytics import YOLO
 from PIL import Image, ImageDraw, ImageFont
 import random
+import numpy as np
+import os
+# Check if YOLO model exists
 YOLO_MODEL_PATH = "best-Dense.pt"
+if not os.path.exists(YOLO_MODEL_PATH):
+    raise FileNotFoundError(f"YOLO model file not found at {YOLO_MODEL_PATH}")
+# Load YOLO model
 model = YOLO(YOLO_MODEL_PATH, task='detect').to("cpu")
+# Define class colors and names
 CLASS_COLORS = {}
+CLASS_NAMES = {0: "Text"}
 def get_class_color(class_id):
+    """Assign consistent random colors to classes."""
     if class_id not in CLASS_COLORS:
         CLASS_COLORS[class_id] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
     return CLASS_COLORS[class_id]
+def safe_font_load():
+    """Safely load font with fallback."""
+    try:
+        return ImageFont.truetype("arial.ttf", 18)
+    except:
+        return ImageFont.load_default()
+def process_detection(image, conf, iou):
+    """Process detection with error handling."""
+    pil_image = Image.fromarray(image)
+    draw = ImageDraw.Draw(pil_image)
+    font = safe_font_load()
+    # Run model prediction
+    results = model.predict(pil_image, conf=conf, iou=iou, device="cpu")
+    # Handle empty results safely
+    detected_boxes = []
+    class_ids = []
+    if results[0].boxes is not None:
+        detected_boxes = results[0].boxes.xyxy.cpu().numpy().tolist()
+        class_ids = results[0].boxes.cls.cpu().numpy().astype(int).tolist()
+    # Draw bounding boxes and labels
+    for idx, (x1, y1, x2, y2) in enumerate(detected_boxes):
+        class_id = class_ids[idx] if idx < len(class_ids) else 0
+        color = get_class_color(class_id)
+        class_name = CLASS_NAMES.get(class_id, f"Class {class_id}")
+        # Draw rectangle
+        draw.rectangle([x1, y1, x2, y2], outline=color, width=2)
+        # Draw text label
+        text = f"{class_name}"
+        text_bbox = draw.textbbox((0, 0), text, font=font)
+        draw.rectangle(
+            [x1, y1 - (text_bbox[3] - text_bbox[1]) - 4, x1 + (text_bbox[2] - text_bbox[0]) + 6, y1],
+            fill=color
+        )
+        draw.text(
+            (x1 + 3, y1 - (text_bbox[3] - text_bbox[1]) - 2),
+            text,
+            fill="white",
+            font=font
+        )
+    return pil_image, len(detected_boxes), len(set(class_ids))
+def detect_text_lines(image):
+    """Main detection function with dual threshold handling."""
+    # Process with two different threshold sets
+    results = []
+    for thresholds in [(0.6, 0.5), (0.4, 0.3)]:
+        conf, iou = thresholds
+        annotated_img, obj_count, class_count = process_detection(
+            np.array(image), conf, iou
+        )
+        results.append({
+            "image": annotated_img,
+            "objects": f"Objects: {obj_count} (Conf={conf}, IoU={iou})",
+            "classes": f"Classes: {class_count} (Conf={conf}, IoU={iou})"
         })
+    return tuple(
+        item for sublist in [
+            (results[0]["image"], results[0]["objects"], results[0]["classes"],
+             results[1]["image"], results[1]["objects"], results[1]["classes"])
+        ] for item in sublist
     )
+# Gradio interface
 with gr.Blocks() as iface:
     gr.Markdown("# 📜 Text Line Detection with YOLO")
     with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(type="numpy", label="Input Image")
+            submit_btn = gr.Button("Detect Text")
+        with gr.Column():
+            with gr.Tab("High Confidence"):
+                high_conf_img = gr.Image(type="pil", label="Detections (0.6 conf)")
+                high_conf_obj = gr.Textbox(label="Object Count")
+                high_conf_cls = gr.Textbox(label="Class Count")
+            with gr.Tab("Low Confidence"):
+                low_conf_img = gr.Image(type="pil", label="Detections (0.4 conf)")
+                low_conf_obj = gr.Textbox(label="Object Count")
+                low_conf_cls = gr.Textbox(label="Class Count")
+    submit_btn.click(
         detect_text_lines,
+        inputs=input_image,
+        outputs=[high_conf_img, high_conf_obj, high_conf_cls, low_conf_img, low_conf_obj, low_conf_cls]
     )
 if __name__ == "__main__":
+    iface.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True,
+        share=False
+    )