dbogdollumich commited on
Commit
a5fe9bc
·
verified ·
1 Parent(s): 410d70c

Outdated code, bboxes were scaled incorrectly

Browse files
Files changed (1) hide show
  1. README.md +17 -14
README.md CHANGED
@@ -51,23 +51,12 @@ inputs = processor(text=texts, images=image, return_tensors="pt")
51
  with torch.no_grad():
52
  outputs = model(**inputs)
53
 
54
- # Note: boxes need to be visualized on the padded, unnormalized image
55
- # hence we'll set the target image sizes (height, width) based on that
56
 
57
- def get_preprocessed_image(pixel_values):
58
- pixel_values = pixel_values.squeeze().numpy()
59
- unnormalized_image = (pixel_values * np.array(OPENAI_CLIP_STD)[:, None, None]) + np.array(OPENAI_CLIP_MEAN)[:, None, None]
60
- unnormalized_image = (unnormalized_image * 255).astype(np.uint8)
61
- unnormalized_image = np.moveaxis(unnormalized_image, 0, -1)
62
- unnormalized_image = Image.fromarray(unnormalized_image)
63
- return unnormalized_image
64
-
65
- unnormalized_image = get_preprocessed_image(inputs.pixel_values)
66
-
67
- target_sizes = torch.Tensor([unnormalized_image.size[::-1]])
68
  # Convert outputs (bounding boxes and class logits) to final bounding boxes and scores
69
  results = processor.post_process_object_detection(
70
- outputs=outputs, threshold=0.2, target_sizes=target_sizes
71
  )
72
 
73
  i = 0 # Retrieve predictions for the first image for the corresponding text queries
@@ -77,6 +66,20 @@ boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["l
77
  for box, score, label in zip(boxes, scores, labels):
78
  box = [round(i, 2) for i in box.tolist()]
79
  print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  ```
81
 
82
 
 
51
  with torch.no_grad():
52
  outputs = model(**inputs)
53
 
54
+ # Get original image size
55
+ original_size = torch.Tensor([image.size[::-1]])
56
 
 
 
 
 
 
 
 
 
 
 
 
57
  # Convert outputs (bounding boxes and class logits) to final bounding boxes and scores
58
  results = processor.post_process_object_detection(
59
+ outputs=outputs, threshold=0.2, target_sizes=original_size
60
  )
61
 
62
  i = 0 # Retrieve predictions for the first image for the corresponding text queries
 
66
  for box, score, label in zip(boxes, scores, labels):
67
  box = [round(i, 2) for i in box.tolist()]
68
  print(f"Detected {text[label]} with confidence {round(score.item(), 3)} at location {box}")
69
+
70
+ # Draw each box on the image
71
+ draw = ImageDraw.Draw(image)
72
+
73
+ for box, score, label in zip(boxes, scores, labels):
74
+ box = [round(i, 2) for i in box.tolist()]
75
+ draw.rectangle(box, outline="red", width=2)
76
+ draw.text(
77
+ (box[0], box[1]),
78
+ f"{text[label]}: {round(score.item(), 3)}",
79
+ fill="red",
80
+ )
81
+
82
+ image.show()
83
  ```
84
 
85