Jyothish CHANDRASENAN commited on
Commit
6280a7e
·
1 Parent(s): f3aea51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +150 -14
app.py CHANGED
@@ -1,28 +1,164 @@
1
- import torch
2
-
3
- import requests
4
- from PIL import Image
5
- from torchvision import transforms
6
 
7
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Download human-readable labels for ImageNet.
10
- response = requests.get("https://git.io/JJkYN")
11
- labels = response.text.split("\n")
12
 
13
- model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18', pretrained=True).eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def predict(inp):
17
- inp = transforms.ToTensor()(inp).unsqueeze(0)
18
- with torch.no_grad():
19
- prediction = torch.nn.functional.softmax(model(inp)[0], dim=0)
20
- confidences = {labels[i]: float(prediction[i]) for i in range(1000)}
 
 
 
 
 
21
  return confidences
22
 
23
 
24
  gr.Interface(fn=predict,
25
  inputs=gr.Image(type="pil"),
26
- outputs=gr.Label(num_top_classes=3),
27
  css="footer {visibility: hidden} body}, .gradio-container {background-color: white}",
28
  examples=["001.png","002.png","003.png","004.png","005.png"]).launch(share=False)
 
 
 
 
 
 
1
 
2
  import gradio as gr
3
+ import cv2
4
+ import numpy as np
5
+ import onnxruntime as ort
6
+ import json
7
+ import os
8
+ import csv
9
+ import shutil
10
+ import matplotlib.pyplot as plt
11
+ import copy
12
+
13
+ # path to the model
14
+ MODEL_PATH = "./model_keypoints.onnx"
15
+ IMG_SIZE = (224, 224)
16
+
17
+
18
+ MAX_VALUES_BY_DTYPE = {
19
+ np.dtype("uint8"): 255,
20
+ np.dtype("uint16"): 65535,
21
+ np.dtype("uint32"): 4294967295,
22
+ np.dtype("float32"): 1.0,
23
+ }
24
+
25
+ def to_float(img, max_value=None):
26
+ if max_value is None:
27
+ try:
28
+ max_value = MAX_VALUES_BY_DTYPE[img.dtype]
29
+ except KeyError:
30
+ raise RuntimeError(
31
+ "Can't infer the maximum value for dtype {}. You need to specify the maximum value manually by "
32
+ "passing the max_value argument".format(img.dtype)
33
+ )
34
+ return img.astype("float32") / max_value
35
+
36
+ def preprocessor(img):
37
+ # Load image and preprocess
38
+ original_img = cv2.resize(img, IMG_SIZE)
39
+ original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)
40
+ preprocessed = to_float(original_img)
41
+ preprocessed = np.moveaxis(preprocessed,-1,0)
42
+ preprocessed = np.expand_dims(preprocessed, axis=0)
43
+ preprocessed = np.asarray(preprocessed, dtype=np.float32)
44
+ return preprocessed, original_img
45
+
46
+ def nms(bounding_boxes, confidence_score, threshold):
47
+ # If no bounding boxes, return empty list
48
+ if len(bounding_boxes) == 0:
49
+ return [], []
50
+
51
+ # Bounding boxes
52
+ boxes = np.array(bounding_boxes)
53
+
54
+ # coordinates of bounding boxes
55
+ start_x = boxes[:, 0]
56
+ start_y = boxes[:, 1]
57
+ end_x = boxes[:, 2]
58
+ end_y = boxes[:, 3]
59
+
60
+ # Confidence scores of bounding boxes
61
+ score = np.array(confidence_score)
62
+
63
+ # Picked bounding boxes
64
+ picked_boxes = []
65
+ picked_score = []
66
+ picked_boxes_idx = []
67
+
68
+ # Compute areas of bounding boxes
69
+ areas = (end_x - start_x + 1) * (end_y - start_y + 1)
70
+
71
+ # Sort by confidence score of bounding boxes
72
+ order = np.argsort(score)
73
+
74
+ # Iterate bounding boxes
75
+ while order.size > 0:
76
+ # The index of largest confidence score
77
+ index = order[-1]
78
+
79
+ # Pick the bounding box with largest confidence score
80
+ picked_boxes.append(bounding_boxes[index])
81
+ picked_boxes_idx.append(index)
82
+ picked_score.append(confidence_score[index])
83
+
84
+ # Compute ordinates of intersection-over-union(IOU)
85
+ x1 = np.maximum(start_x[index], start_x[order[:-1]])
86
+ x2 = np.minimum(end_x[index], end_x[order[:-1]])
87
+ y1 = np.maximum(start_y[index], start_y[order[:-1]])
88
+ y2 = np.minimum(end_y[index], end_y[order[:-1]])
89
+
90
+ # Compute areas of intersection-over-union
91
+ w = np.maximum(0.0, x2 - x1 + 1)
92
+ h = np.maximum(0.0, y2 - y1 + 1)
93
+ intersection = w * h
94
+
95
+ # Compute the ratio between intersection and union
96
+ ratio = intersection / (areas[index] + areas[order[:-1]] - intersection)
97
+
98
+ left = np.where(ratio < threshold)
99
+ order = order[left]
100
 
101
+ return picked_boxes_idx, picked_boxes, picked_score
 
 
102
 
103
+ def postprocessor(prediction, orig_img):
104
+ boxes = prediction[0]
105
+ scores = prediction[1]
106
+ keypoints = prediction[2]
107
+ high_scores_idxs = np.where(scores > 0.7)[0].tolist()
108
+ post_nms_idxs, best_boxes, best_scores = nms(boxes[high_scores_idxs], scores[high_scores_idxs], 0.3)
109
+ keypoints_picked = keypoints[high_scores_idxs]
110
+ keypoints = []
111
+ for kps in keypoints_picked[post_nms_idxs]:
112
+ keypoints.append([list(map(int, kp[:2])) for kp in kps])
113
+ img_copy = copy.deepcopy(orig_img)
114
+ result_image = None
115
+ text = ["A", "B"]
116
+ for bbox in best_boxes:
117
+ start_point = (int(bbox[0]), int(bbox[1]))
118
+ end_point = (int(bbox[2]), int(bbox[3]))
119
+ color = (0, 255, 0)
120
+ thickness = 1
121
+ #crop_img = img_copy[int(bbox[0]):int(bbox[2]), int(bbox[1]):int(bbox[3])]
122
+ result_image = cv2.rectangle(img_copy, start_point, end_point, color, thickness)
123
+ for ab in keypoints:
124
+ for idx, point in enumerate(ab):
125
+ x = int(point[0])
126
+ y = int(point[1])
127
+ color = (255, 0, 0)
128
+ thickness = 3
129
+ result_image = cv2.circle(result_image, (x,y), radius=0, color=color, thickness=thickness)
130
+ result_image = cv2.putText(result_image,text[idx], (x-10,y+10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,0,0), 1, cv2.LINE_AA)
131
+ return result_image
132
+
133
 
134
+ def infer(preprocessed):
135
+
136
+ ort_sess = ort.InferenceSession(MODEL_PATH, providers = [('CUDAExecutionProvider')])
137
+ input_name = ort_sess.get_inputs()[0].name
138
+ boxes = ort_sess.get_outputs()[0].name
139
+ scores = ort_sess.get_outputs()[2].name
140
+ keypoints = ort_sess.get_outputs()[3].name
141
+ prediction = ort_sess.run([boxes, scores, keypoints], {input_name: preprocessed})
142
+ #prediction = prediction[0].squeeze()
143
+
144
+ return prediction
145
+
146
 
147
  def predict(inp):
148
+
149
+ raw_img = cv2.imread(inp)
150
+ # preprocess
151
+ preprocessed_img, orig_img = preprocessor(raw_img)
152
+ # infer
153
+ prediction = infer(preprocessed_img)
154
+ keypoints_map = postprocessor(prediction, orig_img)
155
+ cv2.imwrite("keypoints_map.png"), keypoints_map[:,:,::-1])
156
+
157
  return confidences
158
 
159
 
160
  gr.Interface(fn=predict,
161
  inputs=gr.Image(type="pil"),
162
+ outputs=gr.Image("keypoints_map.png"),
163
  css="footer {visibility: hidden} body}, .gradio-container {background-color: white}",
164
  examples=["001.png","002.png","003.png","004.png","005.png"]).launch(share=False)