Spaces:

enusbaum
/

JunkWaxHero-Space

Sleeping

App Files Files Community

Eric P. Nusbaum commited on Jan 10

Commit

0b444ec

1 Parent(s): f60a7c0

Update Space

Browse files

Files changed (1) hide show

app.py +98 -106

app.py CHANGED Viewed

@@ -1,152 +1,144 @@
 import os
 import numpy as np
-import onnx
 import onnxruntime
 from PIL import Image, ImageDraw, ImageFont
 import gradio as gr
-# Constants
-PROB_THRESHOLD = 0.5  # Minimum probability to show results
 MODEL_PATH = os.path.join("onnx", "model.onnx")
 LABELS_PATH = os.path.join("onnx", "labels.txt")
 # Load labels
 with open(LABELS_PATH, "r") as f:
-    LABELS = f.read().strip().split("\n")
 class Model:
     def __init__(self, model_filepath):
         self.session = onnxruntime.InferenceSession(model_filepath)
-        assert len(self.session.get_inputs()) == 1
         self.input_shape = self.session.get_inputs()[0].shape[2:]  # (H, W)
         self.input_name = self.session.get_inputs()[0].name
-        self.input_type = {'tensor(float)': np.float32, 'tensor(float16)': np.float16}.get(
-            self.session.get_inputs()[0].type, np.float32
-        )
-        self.output_names = [o.name for o in self.session.get_outputs()]
         self.is_bgr = False
         self.is_range255 = False
-        onnx_model = onnx.load(model_filepath)
-        for metadata in onnx_model.metadata_props:
-            if metadata.key == 'Image.BitmapPixelFormat' and metadata.value == 'Bgr8':
                 self.is_bgr = True
-            elif metadata.key == 'Image.NominalPixelRange' and metadata.value == 'NominalRange_0_255':
                 self.is_range255 = True
-    def predict(self, image: Image.Image):
         # Preprocess image
         image_resized = image.resize(self.input_shape)
         input_array = np.array(image_resized, dtype=np.float32)[np.newaxis, :, :, :]
         input_array = input_array.transpose((0, 3, 1, 2))  # (N, C, H, W)
         if self.is_bgr:
-            input_array = input_array[:, (2, 1, 0), :, :]
         if not self.is_range255:
             input_array = input_array / 255.0  # Normalize to [0,1]
         # Run inference
-        outputs = self.session.run(self.output_names, {self.input_name: input_array.astype(self.input_type)})
-        return {name: outputs[i] for i, name in enumerate(self.output_names)}
-def draw_boxes(image: Image.Image, outputs: dict):
-    draw = ImageDraw.Draw(image, "RGBA")  # Use RGBA for transparency
-    # Dynamic font size based on image dimensions
-    image_width, image_height = image.size
-    font_size = max(20, image_width // 50)  # Increased minimum font size
     try:
-        # Attempt to load a truetype font; adjust the path if necessary
-        font = ImageFont.truetype("arial.ttf", size=font_size)
     except IOError:
-        # Fallback to default font if truetype font is not found
         font = ImageFont.load_default()
-    boxes = outputs.get('detected_boxes', [])
-    classes = outputs.get('detected_classes', [])
-    scores = outputs.get('detected_scores', [])
-    for box, cls, score in zip(boxes[0], classes[0], scores[0]):
-        if score < PROB_THRESHOLD:
             continue
-        label = LABELS[int(cls)]
-        # Assuming box format: [ymin, xmin, ymax, xmax] normalized [0,1]
-        ymin, xmin, ymax, xmax = box
-        left = xmin * image_width
-        right = xmax * image_width
-        top = ymin * image_height
-        bottom = ymax * image_height
-        # Draw bounding box
-        draw.rectangle([left, top, right, bottom], outline="red", width=3)
-        # Prepare label text
-        text = f"{label}: {score:.2f}"
-        # Calculate text size using textbbox
-        text_bbox = draw.textbbox((0, 0), text, font=font)
-        text_width = text_bbox[2] - text_bbox[0]
-        text_height = text_bbox[3] - text_bbox[1]
-        # Calculate label background position
-        # Ensure the label box does not go above the image
-        label_top = max(top - text_height - 10, 0)
-        label_left = left
-        # Draw semi-transparent rectangle behind text
-        draw.rectangle(
-            [label_left, label_top, label_left + text_width + 10, label_top + text_height + 10],
-            fill=(255, 0, 0, 160)  # Semi-transparent red
-        )
-        # Draw text
-        draw.text(
-            (label_left + 5, label_top + 5),
-            text,
-            fill="white",
-            font=font
-        )
     return image
-# Initialize model
-model = Model(MODEL_PATH)
-def detect_objects(image):
-    outputs = model.predict(image)
-    annotated_image = draw_boxes(image.copy(), outputs)
-    # Prepare detection summary
-    detections = []
-    boxes = outputs.get('detected_boxes', [])
-    classes = outputs.get('detected_classes', [])
-    scores = outputs.get('detected_scores', [])
-    for box, cls, score in zip(boxes[0], classes[0], scores[0]):
-        if score < PROB_THRESHOLD:
-            continue
-        label = LABELS[int(cls)]
-        detections.append(f"{label}: {score:.2f}")
-    detection_summary = "\n".join(detections) if detections else "No objects detected."
-    return annotated_image, detection_summary
-# Gradio Interface
 iface = gr.Interface(
-    fn=detect_objects,
     inputs=gr.Image(type="pil"),
-    outputs=[
-        gr.Image(type="pil", label="Detected Objects"),
-        gr.Textbox(label="Detections")
-    ],
-    title="Object Detection with ONNX Model",
-    description="Upload an image to detect objects using the ONNX model.",
-    examples=["examples/card1.jpg", "examples/card2.jpg", "examples/card3.jpg"],
-    theme="default",  # You can choose other themes if desired
-    allow_flagging="never"  # Disable flagging if not needed
-    # Removed 'layout' parameter
 )
 if __name__ == "__main__":
     iface.launch()

 import os
 import numpy as np
 import onnxruntime
 from PIL import Image, ImageDraw, ImageFont
 import gradio as gr
+# Define paths
 MODEL_PATH = os.path.join("onnx", "model.onnx")
 LABELS_PATH = os.path.join("onnx", "labels.txt")
 # Load labels
 with open(LABELS_PATH, "r") as f:
+    LABELS = [line.strip() for line in f.readlines()]
+# Initialize ONNX Runtime session
 class Model:
     def __init__(self, model_filepath):
+        # Initialize the InferenceSession
         self.session = onnxruntime.InferenceSession(model_filepath)
+        # Ensure the model has exactly one input
+        assert len(self.session.get_inputs()) == 1, "Model should have exactly one input."
+        # Extract input details
         self.input_shape = self.session.get_inputs()[0].shape[2:]  # (H, W)
         self.input_name = self.session.get_inputs()[0].name
+        self.input_type = {
+            'tensor(float)': np.float32,
+            'tensor(float16)': np.float16
+        }.get(self.session.get_inputs()[0].type, np.float32)
+        # Extract output names
+        self.output_names = [output.name for output in self.session.get_outputs()]
+        # Default preprocessing flags
         self.is_bgr = False
         self.is_range255 = False
+        # Retrieve metadata from the model
+        metadata_map = self.session.get_modelmeta().custom_metadata_map
+        for key, value in metadata_map.items():
+            if key == 'Image.BitmapPixelFormat' and value == 'Bgr8':
                 self.is_bgr = True
+            elif key == 'Image.NominalPixelRange' and value == 'NominalRange_0_255':
                 self.is_range255 = True
+    def predict(self, image):
         # Preprocess image
         image_resized = image.resize(self.input_shape)
         input_array = np.array(image_resized, dtype=np.float32)[np.newaxis, :, :, :]
         input_array = input_array.transpose((0, 3, 1, 2))  # (N, C, H, W)
         if self.is_bgr:
+            input_array = input_array[:, (2, 1, 0), :, :]  # Convert RGB to BGR
         if not self.is_range255:
             input_array = input_array / 255.0  # Normalize to [0,1]
+        # Prepare input tensor
+        input_tensor = input_array.astype(self.input_type)
         # Run inference
+        outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
+        # Process outputs
+        # Assuming outputs are in the format: [boxes, labels, scores]
+        # Adjust based on your actual model's output format
+        if len(outputs) >= 3:
+            boxes = outputs[0]  # shape: [num_detections, 4]
+            labels = outputs[1].astype(int)  # shape: [num_detections]
+            scores = outputs[2]  # shape: [num_detections]
+            return boxes, labels, scores
+        else:
+            raise ValueError("Unexpected number of outputs from the model.")
+# Load the model
+model = Model(MODEL_PATH)
+# Function to draw bounding boxes
+def draw_boxes(image, boxes, labels, scores, threshold=0.5):
+    draw = ImageDraw.Draw(image)
     try:
+        font = ImageFont.truetype("arial.ttf", 15)
     except IOError:
         font = ImageFont.load_default()
+    for box, label, score in zip(boxes, labels, scores):
+        if score < threshold:
             continue
+        # Assuming box format is [xmin, ymin, xmax, ymax] normalized [0,1]
+        xmin, ymin, xmax, ymax = box
+        width, height = image.size
+        xmin = int(xmin * width)
+        ymin = int(ymin * height)
+        xmax = int(xmax * width)
+        ymax = int(ymax * height)
+        # Draw rectangle
+        draw.rectangle([(xmin, ymin), (xmax, ymax)], outline="red", width=2)
+        # Draw label
+        label_text = f"{LABELS[label]}: {score:.2f}"
+        text_size = draw.textsize(label_text, font=font)
+        draw.rectangle([(xmin, ymin - text_size[1]), (xmin + text_size[0], ymin)], fill="red")
+        draw.text((xmin, ymin - text_size[1]), label_text, fill="white", font=font)
     return image
+# Prediction function for Gradio
+def predict_image(input_image):
+    boxes, labels, scores = model.predict(input_image)
+    output_image = input_image.copy()
+    output_image = draw_boxes(output_image, boxes, labels, scores, threshold=0.5)
+    return output_image
+# Define Gradio Interface
+def get_example_images():
+    examples_dir = "examples"
+    return [
+        os.path.join(examples_dir, img)
+        for img in os.listdir(examples_dir)
+        if img.lower().endswith(('.png', '.jpg', '.jpeg'))
+    ]
+example_images = get_example_images()
+title = "JunkWaxHero: Object Detection for Junk Wax Baseball Cards"
+description = """
+Upload an image of a Junk Wax Baseball Card, and the model will identify the card by its set (1980-1999).
+"""
 iface = gr.Interface(
+    fn=predict_image,
     inputs=gr.Image(type="pil"),
+    outputs=gr.Image(type="pil"),
+    examples=example_images,
+    title=title,
+    description=description,
+    allow_flagging="never"
 )
+# Launch the interface
 if __name__ == "__main__":
     iface.launch()