Spaces:

enusbaum
/

JunkWaxHero-Space

Sleeping

App Files Files Community

Eric P. Nusbaum commited on 25 days ago

Commit

f61c335

1 Parent(s): b5e07e6

Update Space

Browse files

Files changed (2) hide show

app.py +106 -103
requirements.txt +5 -4

app.py CHANGED Viewed

@@ -1,149 +1,152 @@
 import os
 import numpy as np
 import onnxruntime
 from PIL import Image, ImageDraw, ImageFont
 import gradio as gr
-# Define paths
 MODEL_PATH = os.path.join("onnx", "model.onnx")
 LABELS_PATH = os.path.join("onnx", "labels.txt")
 # Load labels
 with open(LABELS_PATH, "r") as f:
-    LABELS = [line.strip() for line in f.readlines()]
-# Initialize ONNX Runtime session
 class Model:
     def __init__(self, model_filepath):
-        # Initialize the InferenceSession
         self.session = onnxruntime.InferenceSession(model_filepath)
-        # Ensure the model has exactly one input
-        assert len(self.session.get_inputs()) == 1, "Model should have exactly one input."
-        # Extract input details
         self.input_shape = self.session.get_inputs()[0].shape[2:]  # (H, W)
         self.input_name = self.session.get_inputs()[0].name
-        self.input_type = {
-            'tensor(float)': np.float32,
-            'tensor(float16)': np.float16
-        }.get(self.session.get_inputs()[0].type, np.float32)
-        # Extract output names
-        self.output_names = [output.name for output in self.session.get_outputs()]
-        # Default preprocessing flags
         self.is_bgr = False
         self.is_range255 = False
-        # Retrieve metadata from the model
-        metadata_map = self.session.get_modelmeta().custom_metadata_map
-        for key, value in metadata_map.items():
-            if key == 'Image.BitmapPixelFormat' and value == 'Bgr8':
                 self.is_bgr = True
-            elif key == 'Image.NominalPixelRange' and value == 'NominalRange_0_255':
                 self.is_range255 = True
-    def predict(self, image):
         # Preprocess image
         image_resized = image.resize(self.input_shape)
         input_array = np.array(image_resized, dtype=np.float32)[np.newaxis, :, :, :]
         input_array = input_array.transpose((0, 3, 1, 2))  # (N, C, H, W)
         if self.is_bgr:
-            input_array = input_array[:, (2, 1, 0), :, :]  # Convert RGB to BGR
         if not self.is_range255:
             input_array = input_array / 255.0  # Normalize to [0,1]
-        # Prepare input tensor
-        input_tensor = input_array.astype(self.input_type)
         # Run inference
-        outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
-        # Process outputs
-        # Extract the first (and only) batch element
-        if len(outputs) >= 3:
-            boxes = outputs[0][0]    # shape: [num_detections, 4]
-            labels = outputs[1][0].astype(int)  # shape: [num_detections]
-            scores = outputs[2][0]   # shape: [num_detections]
-            return boxes, labels, scores
-        else:
-            raise ValueError("Unexpected number of outputs from the model.")
-# Load the model
-model = Model(MODEL_PATH)
-# Function to draw bounding boxes
-def draw_boxes(image, boxes, labels, scores, threshold=0.5):
-    draw = ImageDraw.Draw(image)
     try:
-        font = ImageFont.truetype("arial.ttf", 15)
     except IOError:
         font = ImageFont.load_default()
-    for box, label, score in zip(boxes, labels, scores):
-        if score < threshold:
-            continue
-        if len(box) != 4:
-            print(f"Invalid box format: {box}")
             continue
-        xmin, ymin, xmax, ymax = box
-        width, height = image.size
-        xmin = int(xmin * width)
-        ymin = int(ymin * height)
-        xmax = int(xmax * width)
-        ymax = int(ymax * height)
-        # Draw rectangle
-        draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
-        # Draw label
-        label_text = f"{LABELS[label]}: {score:.2f}"
-        text_size = font.getsize(label_text)  # Updated line
-        draw.rectangle([(xmin, ymin - text_size[1]), (xmin + text_size[0], ymin)], fill="red")
-        draw.text((xmin, ymin - text_size[1]), label_text, fill="white", font=font)
     return image
-# Prediction function for Gradio
-def predict_image(input_image):
-    try:
-        boxes, labels, scores = model.predict(input_image)
-        output_image = input_image.copy()
-        output_image = draw_boxes(output_image, boxes, labels, scores, threshold=0.5)
-        return output_image
-    except Exception as e:
-        print(f"Error during prediction: {e}")
-        return input_image  # Return the original image if prediction fails
-# Define Gradio Interface
-def get_example_images():
-    examples_dir = "examples"
-    return [
-        os.path.join(examples_dir, img)
-        for img in os.listdir(examples_dir)
-        if img.lower().endswith(('.png', '.jpg', '.jpeg'))
-    ]
-example_images = get_example_images()
-title = "JunkWaxHero: Object Detection for Junk Wax Baseball Cards"
-description = """
-Upload an image of a Junk Wax Baseball Card, and the model will identify the card by its set (1980-1999).
-"""
 iface = gr.Interface(
-    fn=predict_image,
     inputs=gr.Image(type="pil"),
-    outputs=gr.Image(type="pil"),
-    examples=example_images,
-    title=title,
-    description=description,
-    flagging_mode="never"  # Updated from allow_flagging="never"
 )
-# Launch the interface
 if __name__ == "__main__":
     iface.launch()

 import os
 import numpy as np
+import onnx
 import onnxruntime
 from PIL import Image, ImageDraw, ImageFont
 import gradio as gr
+# Constants
+PROB_THRESHOLD = 0.5  # Minimum probability to show results
 MODEL_PATH = os.path.join("onnx", "model.onnx")
 LABELS_PATH = os.path.join("onnx", "labels.txt")
 # Load labels
 with open(LABELS_PATH, "r") as f:
+    LABELS = f.read().strip().split("\n")
 class Model:
     def __init__(self, model_filepath):
         self.session = onnxruntime.InferenceSession(model_filepath)
+        assert len(self.session.get_inputs()) == 1
         self.input_shape = self.session.get_inputs()[0].shape[2:]  # (H, W)
         self.input_name = self.session.get_inputs()[0].name
+        self.input_type = {'tensor(float)': np.float32, 'tensor(float16)': np.float16}.get(
+            self.session.get_inputs()[0].type, np.float32
+        )
+        self.output_names = [o.name for o in self.session.get_outputs()]
         self.is_bgr = False
         self.is_range255 = False
+        onnx_model = onnx.load(model_filepath)
+        for metadata in onnx_model.metadata_props:
+            if metadata.key == 'Image.BitmapPixelFormat' and metadata.value == 'Bgr8':
                 self.is_bgr = True
+            elif metadata.key == 'Image.NominalPixelRange' and metadata.value == 'NominalRange_0_255':
                 self.is_range255 = True
+    def predict(self, image: Image.Image):
         # Preprocess image
         image_resized = image.resize(self.input_shape)
         input_array = np.array(image_resized, dtype=np.float32)[np.newaxis, :, :, :]
         input_array = input_array.transpose((0, 3, 1, 2))  # (N, C, H, W)
         if self.is_bgr:
+            input_array = input_array[:, (2, 1, 0), :, :]
         if not self.is_range255:
             input_array = input_array / 255.0  # Normalize to [0,1]
         # Run inference
+        outputs = self.session.run(self.output_names, {self.input_name: input_array.astype(self.input_type)})
+        return {name: outputs[i] for i, name in enumerate(self.output_names)}
+def draw_boxes(image: Image.Image, outputs: dict):
+    draw = ImageDraw.Draw(image, "RGBA")  # Use RGBA for transparency
+    # Dynamic font size based on image dimensions
+    image_width, image_height = image.size
+    font_size = max(20, image_width // 50)  # Increased minimum font size
     try:
+        # Attempt to load a truetype font; adjust the path if necessary
+        font = ImageFont.truetype("arial.ttf", size=font_size)
     except IOError:
+        # Fallback to default font if truetype font is not found
         font = ImageFont.load_default()
+    boxes = outputs.get('detected_boxes', [])
+    classes = outputs.get('detected_classes', [])
+    scores = outputs.get('detected_scores', [])
+    for box, cls, score in zip(boxes[0], classes[0], scores[0]):
+        if score < PROB_THRESHOLD:
             continue
+        label = LABELS[int(cls)]
+        # Assuming box format: [ymin, xmin, ymax, xmax] normalized [0,1]
+        ymin, xmin, ymax, xmax = box
+        left = xmin * image_width
+        right = xmax * image_width
+        top = ymin * image_height
+        bottom = ymax * image_height
+        # Draw bounding box
+        draw.rectangle([left, top, right, bottom], outline="red", width=3)
+        # Prepare label text
+        text = f"{label}: {score:.2f}"
+        # Calculate text size using textbbox
+        text_bbox = draw.textbbox((0, 0), text, font=font)
+        text_width = text_bbox[2] - text_bbox[0]
+        text_height = text_bbox[3] - text_bbox[1]
+        # Calculate label background position
+        # Ensure the label box does not go above the image
+        label_top = max(top - text_height - 10, 0)
+        label_left = left
+        # Draw semi-transparent rectangle behind text
+        draw.rectangle(
+            [label_left, label_top, label_left + text_width + 10, label_top + text_height + 10],
+            fill=(255, 0, 0, 160)  # Semi-transparent red
+        )
+        # Draw text
+        draw.text(
+            (label_left + 5, label_top + 5),
+            text,
+            fill="white",
+            font=font
+        )
     return image
+# Initialize model
+model = Model(MODEL_PATH)
+def detect_objects(image):
+    outputs = model.predict(image)
+    annotated_image = draw_boxes(image.copy(), outputs)
+    # Prepare detection summary
+    detections = []
+    boxes = outputs.get('detected_boxes', [])
+    classes = outputs.get('detected_classes', [])
+    scores = outputs.get('detected_scores', [])
+    for box, cls, score in zip(boxes[0], classes[0], scores[0]):
+        if score < PROB_THRESHOLD:
+            continue
+        label = LABELS[int(cls)]
+        detections.append(f"{label}: {score:.2f}")
+    detection_summary = "\n".join(detections) if detections else "No objects detected."
+    return annotated_image, detection_summary
+# Gradio Interface
 iface = gr.Interface(
+    fn=detect_objects,
     inputs=gr.Image(type="pil"),
+    outputs=[
+        gr.Image(type="pil", label="Detected Objects"),
+        gr.Textbox(label="Detections")
+    ],
+    title="Object Detection with ONNX Model",
+    description="Upload an image to detect objects using the ONNX model.",
+    examples=["examples/card1.jpg", "examples/card2.jpg", "examples/card3.jpg"],
+    theme="default",  # You can choose other themes if desired
+    allow_flagging="never"  # Disable flagging if not needed
+    # Removed 'layout' parameter
 )
 if __name__ == "__main__":
     iface.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
-gradio
-numpy
-onnxruntime
-pillow

+gradio==3.32.0
+onnx==1.14.0
+onnxruntime==1.15.1
+Pillow>=10.0.0
+numpy==1.25.0