liuyizhang commited on
Commit
59d64d3
1 Parent(s): 1de504b

update app.py

Browse files
Grounding_DINO/groundingdino/transforms.py DELETED
@@ -1,311 +0,0 @@
1
- # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2
- """
3
- Transforms and data augmentation for both image + bbox.
4
- """
5
- import os
6
- import random
7
-
8
- import PIL
9
- import torch
10
- import torchvision.transforms as T
11
- import torchvision.transforms.functional as F
12
-
13
- from .util.box_ops import box_xyxy_to_cxcywh
14
- from .util.misc import interpolate
15
-
16
-
17
- def crop(image, target, region):
18
- cropped_image = F.crop(image, *region)
19
-
20
- target = target.copy()
21
- i, j, h, w = region
22
-
23
- # should we do something wrt the original size?
24
- target["size"] = torch.tensor([h, w])
25
-
26
- fields = ["labels", "area", "iscrowd", "positive_map"]
27
-
28
- if "boxes" in target:
29
- boxes = target["boxes"]
30
- max_size = torch.as_tensor([w, h], dtype=torch.float32)
31
- cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
32
- cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
33
- cropped_boxes = cropped_boxes.clamp(min=0)
34
- area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
35
- target["boxes"] = cropped_boxes.reshape(-1, 4)
36
- target["area"] = area
37
- fields.append("boxes")
38
-
39
- if "masks" in target:
40
- # FIXME should we update the area here if there are no boxes?
41
- target["masks"] = target["masks"][:, i : i + h, j : j + w]
42
- fields.append("masks")
43
-
44
- # remove elements for which the boxes or masks that have zero area
45
- if "boxes" in target or "masks" in target:
46
- # favor boxes selection when defining which elements to keep
47
- # this is compatible with previous implementation
48
- if "boxes" in target:
49
- cropped_boxes = target["boxes"].reshape(-1, 2, 2)
50
- keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
51
- else:
52
- keep = target["masks"].flatten(1).any(1)
53
-
54
- for field in fields:
55
- if field in target:
56
- target[field] = target[field][keep]
57
-
58
- if os.environ.get("IPDB_SHILONG_DEBUG", None) == "INFO":
59
- # for debug and visualization only.
60
- if "strings_positive" in target:
61
- target["strings_positive"] = [
62
- _i for _i, _j in zip(target["strings_positive"], keep) if _j
63
- ]
64
-
65
- return cropped_image, target
66
-
67
-
68
- def hflip(image, target):
69
- flipped_image = F.hflip(image)
70
-
71
- w, h = image.size
72
-
73
- target = target.copy()
74
- if "boxes" in target:
75
- boxes = target["boxes"]
76
- boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor([-1, 1, -1, 1]) + torch.as_tensor(
77
- [w, 0, w, 0]
78
- )
79
- target["boxes"] = boxes
80
-
81
- if "masks" in target:
82
- target["masks"] = target["masks"].flip(-1)
83
-
84
- return flipped_image, target
85
-
86
-
87
- def resize(image, target, size, max_size=None):
88
- # size can be min_size (scalar) or (w, h) tuple
89
-
90
- def get_size_with_aspect_ratio(image_size, size, max_size=None):
91
- w, h = image_size
92
- if max_size is not None:
93
- min_original_size = float(min((w, h)))
94
- max_original_size = float(max((w, h)))
95
- if max_original_size / min_original_size * size > max_size:
96
- size = int(round(max_size * min_original_size / max_original_size))
97
-
98
- if (w <= h and w == size) or (h <= w and h == size):
99
- return (h, w)
100
-
101
- if w < h:
102
- ow = size
103
- oh = int(size * h / w)
104
- else:
105
- oh = size
106
- ow = int(size * w / h)
107
-
108
- return (oh, ow)
109
-
110
- def get_size(image_size, size, max_size=None):
111
- if isinstance(size, (list, tuple)):
112
- return size[::-1]
113
- else:
114
- return get_size_with_aspect_ratio(image_size, size, max_size)
115
-
116
- size = get_size(image.size, size, max_size)
117
- rescaled_image = F.resize(image, size)
118
-
119
- if target is None:
120
- return rescaled_image, None
121
-
122
- ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
123
- ratio_width, ratio_height = ratios
124
-
125
- target = target.copy()
126
- if "boxes" in target:
127
- boxes = target["boxes"]
128
- scaled_boxes = boxes * torch.as_tensor(
129
- [ratio_width, ratio_height, ratio_width, ratio_height]
130
- )
131
- target["boxes"] = scaled_boxes
132
-
133
- if "area" in target:
134
- area = target["area"]
135
- scaled_area = area * (ratio_width * ratio_height)
136
- target["area"] = scaled_area
137
-
138
- h, w = size
139
- target["size"] = torch.tensor([h, w])
140
-
141
- if "masks" in target:
142
- target["masks"] = (
143
- interpolate(target["masks"][:, None].float(), size, mode="nearest")[:, 0] > 0.5
144
- )
145
-
146
- return rescaled_image, target
147
-
148
-
149
- def pad(image, target, padding):
150
- # assumes that we only pad on the bottom right corners
151
- padded_image = F.pad(image, (0, 0, padding[0], padding[1]))
152
- if target is None:
153
- return padded_image, None
154
- target = target.copy()
155
- # should we do something wrt the original size?
156
- target["size"] = torch.tensor(padded_image.size[::-1])
157
- if "masks" in target:
158
- target["masks"] = torch.nn.functional.pad(target["masks"], (0, padding[0], 0, padding[1]))
159
- return padded_image, target
160
-
161
-
162
- class ResizeDebug(object):
163
- def __init__(self, size):
164
- self.size = size
165
-
166
- def __call__(self, img, target):
167
- return resize(img, target, self.size)
168
-
169
-
170
- class RandomCrop(object):
171
- def __init__(self, size):
172
- self.size = size
173
-
174
- def __call__(self, img, target):
175
- region = T.RandomCrop.get_params(img, self.size)
176
- return crop(img, target, region)
177
-
178
-
179
- class RandomSizeCrop(object):
180
- def __init__(self, min_size: int, max_size: int, respect_boxes: bool = False):
181
- # respect_boxes: True to keep all boxes
182
- # False to tolerence box filter
183
- self.min_size = min_size
184
- self.max_size = max_size
185
- self.respect_boxes = respect_boxes
186
-
187
- def __call__(self, img: PIL.Image.Image, target: dict):
188
- init_boxes = len(target["boxes"])
189
- max_patience = 10
190
- for i in range(max_patience):
191
- w = random.randint(self.min_size, min(img.width, self.max_size))
192
- h = random.randint(self.min_size, min(img.height, self.max_size))
193
- region = T.RandomCrop.get_params(img, [h, w])
194
- result_img, result_target = crop(img, target, region)
195
- if (
196
- not self.respect_boxes
197
- or len(result_target["boxes"]) == init_boxes
198
- or i == max_patience - 1
199
- ):
200
- return result_img, result_target
201
- return result_img, result_target
202
-
203
-
204
- class CenterCrop(object):
205
- def __init__(self, size):
206
- self.size = size
207
-
208
- def __call__(self, img, target):
209
- image_width, image_height = img.size
210
- crop_height, crop_width = self.size
211
- crop_top = int(round((image_height - crop_height) / 2.0))
212
- crop_left = int(round((image_width - crop_width) / 2.0))
213
- return crop(img, target, (crop_top, crop_left, crop_height, crop_width))
214
-
215
-
216
- class RandomHorizontalFlip(object):
217
- def __init__(self, p=0.5):
218
- self.p = p
219
-
220
- def __call__(self, img, target):
221
- if random.random() < self.p:
222
- return hflip(img, target)
223
- return img, target
224
-
225
-
226
- class RandomResize(object):
227
- def __init__(self, sizes, max_size=None):
228
- assert isinstance(sizes, (list, tuple))
229
- self.sizes = sizes
230
- self.max_size = max_size
231
-
232
- def __call__(self, img, target=None):
233
- size = random.choice(self.sizes)
234
- return resize(img, target, size, self.max_size)
235
-
236
-
237
- class RandomPad(object):
238
- def __init__(self, max_pad):
239
- self.max_pad = max_pad
240
-
241
- def __call__(self, img, target):
242
- pad_x = random.randint(0, self.max_pad)
243
- pad_y = random.randint(0, self.max_pad)
244
- return pad(img, target, (pad_x, pad_y))
245
-
246
-
247
- class RandomSelect(object):
248
- """
249
- Randomly selects between transforms1 and transforms2,
250
- with probability p for transforms1 and (1 - p) for transforms2
251
- """
252
-
253
- def __init__(self, transforms1, transforms2, p=0.5):
254
- self.transforms1 = transforms1
255
- self.transforms2 = transforms2
256
- self.p = p
257
-
258
- def __call__(self, img, target):
259
- if random.random() < self.p:
260
- return self.transforms1(img, target)
261
- return self.transforms2(img, target)
262
-
263
-
264
- class ToTensor(object):
265
- def __call__(self, img, target):
266
- return F.to_tensor(img), target
267
-
268
-
269
- class RandomErasing(object):
270
- def __init__(self, *args, **kwargs):
271
- self.eraser = T.RandomErasing(*args, **kwargs)
272
-
273
- def __call__(self, img, target):
274
- return self.eraser(img), target
275
-
276
-
277
- class Normalize(object):
278
- def __init__(self, mean, std):
279
- self.mean = mean
280
- self.std = std
281
-
282
- def __call__(self, image, target=None):
283
- image = F.normalize(image, mean=self.mean, std=self.std)
284
- if target is None:
285
- return image, None
286
- target = target.copy()
287
- h, w = image.shape[-2:]
288
- if "boxes" in target:
289
- boxes = target["boxes"]
290
- boxes = box_xyxy_to_cxcywh(boxes)
291
- boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
292
- target["boxes"] = boxes
293
- return image, target
294
-
295
-
296
- class Compose(object):
297
- def __init__(self, transforms):
298
- self.transforms = transforms
299
-
300
- def __call__(self, image, target):
301
- for t in self.transforms:
302
- image, target = t(image, target)
303
- return image, target
304
-
305
- def __repr__(self):
306
- format_string = self.__class__.__name__ + "("
307
- for t in self.transforms:
308
- format_string += "\n"
309
- format_string += " {0}".format(t)
310
- format_string += "\n)"
311
- return format_string
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -3,8 +3,8 @@ import subprocess
3
 
4
  result = subprocess.run(['pip', 'install', '-e', 'segment_anything'], check=True)
5
  print(f'liuyz_install segment_anything result = {result}')
6
- result = subprocess.run(['pip', 'install', '-e', 'GroundingDINO'], check=True)
7
- print(f'liuyz_install GroundingDINO result = {result}')
8
 
9
  result = subprocess.run(['pip', 'list'], check=True)
10
  print(f'liuyz_pip list result = {result}')
 
3
 
4
  result = subprocess.run(['pip', 'install', '-e', 'segment_anything'], check=True)
5
  print(f'liuyz_install segment_anything result = {result}')
6
+ result = subprocess.run(['pip', 'install', '-e', 'Grounding_DINO'], check=True)
7
+ print(f'liuyz_install Grounding_DINO result = {result}')
8
 
9
  result = subprocess.run(['pip', 'list'], check=True)
10
  print(f'liuyz_pip list result = {result}')