Spaces:

biglab
/

webui-screenrecognition

Running

App Files Files Community

Jsonwu commited on Jan 28, 2024

Commit

289bee5

verified ·

1 Parent(s): b1289cd

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -3

app.py CHANGED Viewed

@@ -11,12 +11,79 @@ model = torch.jit.load(TORCHSCRIPT_PATH)
 with open(LABELS_PATH, "r") as f:
     idx2Label = json.load(f)["idx2Label"]
 img_transforms = transforms.ToTensor()
 def predict(img, conf_thresh=0.4):
     img_input = [img_transforms(img)]
     _, pred = model(img_input)
     out_img = img.copy()
     draw = ImageDraw.Draw(out_img)
     font = ImageFont.truetype("res/Tuffy_Bold.ttf", 25)
@@ -37,7 +104,7 @@ def predict(img, conf_thresh=0.4):
             draw.text((x1, y1), text, font=font, fill="black")
     return out_img
 example_imgs = [
     ["res/example.jpg", 0.4],
     ["res/screenlane-snapchat-profile.jpg", 0.4],

 with open(LABELS_PATH, "r") as f:
     idx2Label = json.load(f)["idx2Label"]
 img_transforms = transforms.ToTensor()
+# inter_class_nms and iou functions implemented by GPT
+def inter_class_nms(boxes, scores, iou_threshold=0.5):
+    # Convert boxes and scores to torch tensors if they are not already
+    boxes = torch.as_tensor(boxes)
+    scores, class_indices = scores.max(dim=1)
+    # Keep track of final boxes and scores
+    final_boxes = []
+    final_scores = []
+    final_class_indices = []
+    for class_index in range(scores.shape[1]):
+        # Filter boxes and scores for the current class
+        class_scores = scores[:, class_index]
+        class_boxes = boxes
+        # Indices of boxes sorted by score (highest first)
+        sorted_indices = torch.argsort(class_scores, descending=True)
+        while len(sorted_indices) > 0:
+            # Take the box with the highest score
+            highest_index = sorted_indices[0]
+            highest_box = class_boxes[highest_index]
+            # Add the highest box and score to the final list
+            final_boxes.append(highest_box)
+            final_scores.append(class_scores[highest_index])
+            final_class_indices.append(class_index)
+            # Remove the highest box from the list
+            sorted_indices = sorted_indices[1:]
+            # Compute IoU of the highest box with the rest
+            ious = iou(class_boxes[sorted_indices], highest_box)
+            # Keep only boxes with IoU less than the threshold
+            sorted_indices = sorted_indices[ious < iou_threshold]
+    return {'boxes': final_boxes, 'scores': final_scores}
+def iou(boxes1, boxes2):
+    """
+    Compute the Intersection over Union (IoU) of two sets of boxes.
+    Args:
+    - boxes1 (Tensor[N, 4]): ground truth boxes
+    - boxes2 (Tensor[M, 4]): predicted boxes
+    Returns:
+    - iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2
+    """
+    area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
+    area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
+    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
+    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
+    wh = (rb - lt).clamp(min=0)  # [N,M,2]
+    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
+    iou = inter / (area1[:, None] + area2 - inter)
+    return iou
 def predict(img, conf_thresh=0.4):
     img_input = [img_transforms(img)]
     _, pred = model(img_input)
+    pred = inter_class_nms(pred['boxes'], pred['scores'])
     out_img = img.copy()
     draw = ImageDraw.Draw(out_img)
     font = ImageFont.truetype("res/Tuffy_Bold.ttf", 25)
             draw.text((x1, y1), text, font=font, fill="black")
     return out_img
 example_imgs = [
     ["res/example.jpg", 0.4],
     ["res/screenlane-snapchat-profile.jpg", 0.4],