Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import torch
|
|
2 |
import gradio as gr
|
3 |
import json
|
4 |
from torchvision import transforms
|
|
|
5 |
from PIL import Image, ImageDraw, ImageFont
|
6 |
|
7 |
TORCHSCRIPT_PATH = "res/screenrecognition-web350k-vins.torchscript"
|
@@ -14,71 +15,17 @@ with open(LABELS_PATH, "r") as f:
|
|
14 |
|
15 |
img_transforms = transforms.ToTensor()
|
16 |
|
17 |
-
# inter_class_nms
|
18 |
def inter_class_nms(boxes, scores, iou_threshold=0.5):
|
19 |
-
#
|
20 |
-
|
21 |
-
scores, class_indices = scores.max(dim=1)
|
22 |
|
23 |
-
#
|
24 |
-
|
25 |
-
|
26 |
-
final_class_indices = []
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
class_scores = scores[:, class_index]
|
31 |
-
class_boxes = boxes
|
32 |
-
|
33 |
-
# Indices of boxes sorted by score (highest first)
|
34 |
-
sorted_indices = torch.argsort(class_scores, descending=True)
|
35 |
-
|
36 |
-
while len(sorted_indices) > 0:
|
37 |
-
# Take the box with the highest score
|
38 |
-
highest_index = sorted_indices[0]
|
39 |
-
highest_box = class_boxes[highest_index]
|
40 |
-
|
41 |
-
# Add the highest box and score to the final list
|
42 |
-
final_boxes.append(highest_box)
|
43 |
-
final_scores.append(class_scores[highest_index])
|
44 |
-
final_class_indices.append(class_index)
|
45 |
-
|
46 |
-
# Remove the highest box from the list
|
47 |
-
sorted_indices = sorted_indices[1:]
|
48 |
-
|
49 |
-
# Compute IoU of the highest box with the rest
|
50 |
-
ious = iou(class_boxes[sorted_indices], highest_box)
|
51 |
-
|
52 |
-
# Keep only boxes with IoU less than the threshold
|
53 |
-
sorted_indices = sorted_indices[ious < iou_threshold]
|
54 |
-
|
55 |
-
return {'boxes': final_boxes, 'scores': final_scores}
|
56 |
-
|
57 |
-
|
58 |
-
def iou(boxes1, boxes2):
|
59 |
-
"""
|
60 |
-
Compute the Intersection over Union (IoU) of two sets of boxes.
|
61 |
-
|
62 |
-
Args:
|
63 |
-
- boxes1 (Tensor[N, 4]): ground truth boxes
|
64 |
-
- boxes2 (Tensor[M, 4]): predicted boxes
|
65 |
-
|
66 |
-
Returns:
|
67 |
-
- iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2
|
68 |
-
"""
|
69 |
-
|
70 |
-
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
|
71 |
-
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
|
72 |
-
|
73 |
-
lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
|
74 |
-
rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
|
75 |
-
|
76 |
-
wh = (rb - lt).clamp(min=0) # [N,M,2]
|
77 |
-
inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
|
78 |
-
|
79 |
-
iou = inter / (area1[:, None] + area2 - inter)
|
80 |
-
|
81 |
-
return iou
|
82 |
|
83 |
def predict(img, conf_thresh=0.4):
|
84 |
img_input = [img_transforms(img)]
|
|
|
2 |
import gradio as gr
|
3 |
import json
|
4 |
from torchvision import transforms
|
5 |
+
from torchvision.ops import nms
|
6 |
from PIL import Image, ImageDraw, ImageFont
|
7 |
|
8 |
TORCHSCRIPT_PATH = "res/screenrecognition-web350k-vins.torchscript"
|
|
|
15 |
|
16 |
img_transforms = transforms.ToTensor()
|
17 |
|
18 |
+
# inter_class_nms implemented by GPT
|
19 |
def inter_class_nms(boxes, scores, iou_threshold=0.5):
|
20 |
+
# Perform non-maximum suppression
|
21 |
+
keep = nms(boxes, scores, iou_threshold)
|
|
|
22 |
|
23 |
+
# Filter boxes and scores
|
24 |
+
new_boxes = boxes[keep]
|
25 |
+
new_scores = scores[keep]
|
|
|
26 |
|
27 |
+
# Return the result in a dictionary
|
28 |
+
return {'boxes': new_boxes, 'scores': new_scores}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
def predict(img, conf_thresh=0.4):
|
31 |
img_input = [img_transforms(img)]
|