Browse files
@@ -2,6 +2,7 @@ import torch
2 |
import gradio as gr
3 |
import json
4 |
from torchvision import transforms
5 |
from PIL import Image, ImageDraw, ImageFont
6 |
7 |
TORCHSCRIPT_PATH = "res/screenrecognition-web350k-vins.torchscript"
@@ -14,71 +15,17 @@ with open(LABELS_PATH, "r") as f:
14 |
15 |
img_transforms = transforms.ToTensor()
16 |
17 |
# inter_class_nms
18 |
def inter_class_nms(boxes, scores, iou_threshold=0.5):
19 |
20 |
21 |
scores, class_indices = scores.max(dim=1)
22 |
23 |
24 |
25 |
26 |
final_class_indices = []
27 |
28 |
29 |
30 |
class_scores = scores[:, class_index]
31 |
class_boxes = boxes
32 |
33 |
# Indices of boxes sorted by score (highest first)
34 |
sorted_indices = torch.argsort(class_scores, descending=True)
35 |
36 |
while len(sorted_indices) > 0:
37 |
# Take the box with the highest score
38 |
highest_index = sorted_indices[0]
39 |
highest_box = class_boxes[highest_index]
40 |
41 |
# Add the highest box and score to the final list
42 |
43 |
44 |
45 |
46 |
# Remove the highest box from the list
47 |
sorted_indices = sorted_indices[1:]
48 |
49 |
# Compute IoU of the highest box with the rest
50 |
ious = iou(class_boxes[sorted_indices], highest_box)
51 |
52 |
# Keep only boxes with IoU less than the threshold
53 |
sorted_indices = sorted_indices[ious < iou_threshold]
54 |
55 |
return {'boxes': final_boxes, 'scores': final_scores}
56 |
57 |
58 |
def iou(boxes1, boxes2):
59 |
60 |
Compute the Intersection over Union (IoU) of two sets of boxes.
61 |
62 |
63 |
- boxes1 (Tensor[N, 4]): ground truth boxes
64 |
- boxes2 (Tensor[M, 4]): predicted boxes
65 |
66 |
67 |
- iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2
68 |
69 |
70 |
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
71 |
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
72 |
73 |
lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
74 |
rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
75 |
76 |
wh = (rb - lt).clamp(min=0) # [N,M,2]
77 |
inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
78 |
79 |
iou = inter / (area1[:, None] + area2 - inter)
80 |
81 |
return iou
82 |
83 |
def predict(img, conf_thresh=0.4):
84 |
img_input = [img_transforms(img)]
2 |
import gradio as gr
3 |
import json
4 |
from torchvision import transforms
5 |
from torchvision.ops import nms
6 |
from PIL import Image, ImageDraw, ImageFont
7 |
8 |
TORCHSCRIPT_PATH = "res/screenrecognition-web350k-vins.torchscript"
15 |
16 |
img_transforms = transforms.ToTensor()
17 |
18 |
# inter_class_nms implemented by GPT
19 |
def inter_class_nms(boxes, scores, iou_threshold=0.5):
20 |
# Perform non-maximum suppression
21 |
keep = nms(boxes, scores, iou_threshold)
22 |
23 |
# Filter boxes and scores
24 |
new_boxes = boxes[keep]
25 |
new_scores = scores[keep]
26 |
27 |
# Return the result in a dictionary
28 |
return {'boxes': new_boxes, 'scores': new_scores}
29 |
30 |
def predict(img, conf_thresh=0.4):
31 |
img_input = [img_transforms(img)]