Spaces:

automi
/

keypoint-demo

Runtime error

App Files Files

Jyothish CHANDRASENAN commited on Feb 2, 2023

Commit

6280a7e

1 Parent(s): f3aea51

Update app.py

Browse files

Files changed (1) hide show

app.py +150 -14

app.py CHANGED Viewed

@@ -1,28 +1,164 @@
-import torch
-import requests
-from PIL import Image
-from torchvision import transforms
 import gradio as gr
-# Download human-readable labels for ImageNet.
-response = requests.get("https://git.io/JJkYN")
-labels = response.text.split("\n")
-model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True).eval()
 def predict(inp):
-  inp = transforms.ToTensor()(inp).unsqueeze(0)
-  with torch.no_grad():
-    prediction = torch.nn.functional.softmax(model(inp)[0], dim=0)
-    confidences = {labels[i]: float(prediction[i]) for i in range(1000)}
   return confidences
 gr.Interface(fn=predict,
              inputs=gr.Image(type="pil"),
-             outputs=gr.Label(num_top_classes=3),
              css="footer {visibility: hidden} body}, .gradio-container {background-color: white}",
              examples=["001.png","002.png","003.png","004.png","005.png"]).launch(share=False)

 import gradio as gr
+import cv2
+import numpy as np
+import onnxruntime as ort
+import json
+import os
+import csv
+import shutil
+import matplotlib.pyplot as plt
+import copy
+# path to the model
+MODEL_PATH = "./model_keypoints.onnx"
+IMG_SIZE = (224, 224)
+MAX_VALUES_BY_DTYPE = {
+    np.dtype("uint8"): 255,
+    np.dtype("uint16"): 65535,
+    np.dtype("uint32"): 4294967295,
+    np.dtype("float32"): 1.0,
+}
+def to_float(img, max_value=None):
+    if max_value is None:
+        try:
+            max_value = MAX_VALUES_BY_DTYPE[img.dtype]
+        except KeyError:
+            raise RuntimeError(
+                "Can't infer the maximum value for dtype {}. You need to specify the maximum value manually by "
+                "passing the max_value argument".format(img.dtype)
+            )
+    return img.astype("float32") / max_value
+def preprocessor(img):
+    # Load image and preprocess
+    original_img = cv2.resize(img, IMG_SIZE)
+    original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
+    preprocessed = to_float(original_img)
+    preprocessed = np.moveaxis(preprocessed,-1,0)
+    preprocessed = np.expand_dims(preprocessed, axis=0)
+    preprocessed = np.asarray(preprocessed, dtype=np.float32)
+    return preprocessed, original_img
+def nms(bounding_boxes, confidence_score, threshold):
+    # If no bounding boxes, return empty list
+    if len(bounding_boxes) == 0:
+        return [], []
+    # Bounding boxes
+    boxes = np.array(bounding_boxes)
+    # coordinates of bounding boxes
+    start_x = boxes[:, 0]
+    start_y = boxes[:, 1]
+    end_x = boxes[:, 2]
+    end_y = boxes[:, 3]
+    # Confidence scores of bounding boxes
+    score = np.array(confidence_score)
+    # Picked bounding boxes
+    picked_boxes = []
+    picked_score = []
+    picked_boxes_idx = []
+    # Compute areas of bounding boxes
+    areas = (end_x - start_x + 1) * (end_y - start_y + 1)
+    # Sort by confidence score of bounding boxes
+    order = np.argsort(score)
+    # Iterate bounding boxes
+    while order.size > 0:
+        # The index of largest confidence score
+        index = order[-1]
+        # Pick the bounding box with largest confidence score
+        picked_boxes.append(bounding_boxes[index])
+        picked_boxes_idx.append(index)
+        picked_score.append(confidence_score[index])
+        # Compute ordinates of intersection-over-union(IOU)
+        x1 = np.maximum(start_x[index], start_x[order[:-1]])
+        x2 = np.minimum(end_x[index], end_x[order[:-1]])
+        y1 = np.maximum(start_y[index], start_y[order[:-1]])
+        y2 = np.minimum(end_y[index], end_y[order[:-1]])
+        # Compute areas of intersection-over-union
+        w = np.maximum(0.0, x2 - x1 + 1)
+        h = np.maximum(0.0, y2 - y1 + 1)
+        intersection = w * h
+        # Compute the ratio between intersection and union
+        ratio = intersection / (areas[index] + areas[order[:-1]] - intersection)
+        left = np.where(ratio < threshold)
+        order = order[left]
+    return picked_boxes_idx, picked_boxes, picked_score
+def postprocessor(prediction, orig_img):
+    boxes = prediction[0]
+    scores = prediction[1]
+    keypoints = prediction[2]
+    high_scores_idxs = np.where(scores > 0.7)[0].tolist()
+    post_nms_idxs, best_boxes, best_scores = nms(boxes[high_scores_idxs], scores[high_scores_idxs], 0.3)
+    keypoints_picked = keypoints[high_scores_idxs]
+    keypoints = []
+    for kps in keypoints_picked[post_nms_idxs]:
+        keypoints.append([list(map(int, kp[:2])) for kp in kps])
+    img_copy = copy.deepcopy(orig_img)
+    result_image = None
+    text = ["A", "B"]
+    for bbox in best_boxes:
+        start_point = (int(bbox[0]), int(bbox[1]))
+        end_point = (int(bbox[2]), int(bbox[3]))
+        color = (0, 255, 0)
+        thickness = 1
+        #crop_img = img_copy[int(bbox[0]):int(bbox[2]), int(bbox[1]):int(bbox[3])]
+        result_image = cv2.rectangle(img_copy, start_point, end_point, color, thickness)
+    for ab in keypoints:
+        for idx, point in enumerate(ab):
+            x = int(point[0])
+            y = int(point[1])
+            color = (255, 0, 0)
+            thickness = 3
+            result_image = cv2.circle(result_image, (x,y), radius=0, color=color, thickness=thickness)
+            result_image = cv2.putText(result_image,text[idx], (x-10,y+10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 1, cv2.LINE_AA)
+    return result_image
+def infer(preprocessed):
+    ort_sess = ort.InferenceSession(MODEL_PATH, providers = [('CUDAExecutionProvider')])
+    input_name = ort_sess.get_inputs()[0].name
+    boxes = ort_sess.get_outputs()[0].name
+    scores = ort_sess.get_outputs()[2].name
+    keypoints = ort_sess.get_outputs()[3].name
+    prediction = ort_sess.run([boxes, scores, keypoints], {input_name: preprocessed})
+    #prediction = prediction[0].squeeze()
+    return prediction
 def predict(inp):
+    raw_img = cv2.imread(inp)
+    # preprocess
+    preprocessed_img, orig_img = preprocessor(raw_img)
+    # infer
+    prediction = infer(preprocessed_img)
+    keypoints_map = postprocessor(prediction, orig_img)
+    cv2.imwrite("keypoints_map.png"), keypoints_map[:,:,::-1])
   return confidences
 gr.Interface(fn=predict,
              inputs=gr.Image(type="pil"),
+             outputs=gr.Image("keypoints_map.png"),
              css="footer {visibility: hidden} body}, .gradio-container {background-color: white}",
              examples=["001.png","002.png","003.png","004.png","005.png"]).launch(share=False)