# coding=utf-8 # Copyright 2024 The Google Research Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Visualization functions.""" import os import cv2 import matplotlib.pyplot as plt import numpy as np from PIL import Image import torch # pylint: disable=g-importing-member from utils.utils import normalize _VIS_HEIGHT = 512 _VIS_WIDTH = 512 def show_cam_on_image(img, mask): if img.shape[1] != mask.shape[1]: mask = cv2.resize(mask, (img.shape[1], img.shape[0])) heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET) heatmap = cv2.cvtColor(heatmap, cv2.COLOR_BGR2RGB) heatmap = np.float32(heatmap) / 255 cam = heatmap + np.float32(img) cam = cam / np.max(cam) cam = np.uint8(255 * cam) return cam def save_img(array, img_name): numpy_array = array.astype(np.uint8) image = Image.fromarray(numpy_array, mode="RGB") image.save(f"{img_name}.png") def viz_attn(img, attn_map, prefix="vis_results/clipcam_img", img_name="cam"): """Visualize attention map.""" num_masks = 1 if len(attn_map.shape) == 3: num_masks = attn_map.shape[0] attn_map = attn_map.float().squeeze(1).detach().cpu().numpy() attn_map = normalize(attn_map) img = normalize(img) if num_masks == 1: vis = show_cam_on_image(img, attn_map) if not os.path.exists(prefix): os.makedirs(prefix) save_img(vis, os.path.join(prefix, f"{img_name}")) return vis for i in range(num_masks): vis = show_cam_on_image(img, attn_map[i]) if not os.path.exists(prefix): os.makedirs(prefix) save_img(vis, os.path.join(prefix, f"{img_name}_{i}")) def vis_mask(mask, gt_mask, img, output_dir, fname): """Visualize mask.""" mask_img = torch.zeros((_VIS_WIDTH, _VIS_HEIGHT)) mask_img[mask[0]] = 1 # print(gt_mask.shape, img.size()) # Assume img and gt_mask are also torch.Tensor with size (512, 512) img = img[0].permute(1, 2, 0).numpy() gt_mask_img = torch.zeros((_VIS_WIDTH, _VIS_HEIGHT)) gt_mask_img[gt_mask[0]] = 1 _, axs = plt.subplots( 1, 3, figsize=(15, 5) ) # change the figsize if necessary axs[0].imshow(img) # if image is grayscale, otherwise remove cmap argument axs[0].axis("off") axs[0].set_title("Original Image") axs[1].imshow( mask_img.numpy(), cmap="jet", alpha=0.5 ) # using alpha for transparency axs[1].axis("off") axs[1].set_title("Mask") axs[2].imshow( gt_mask_img.numpy(), cmap="jet", alpha=0.5 ) # using alpha for transparency axs[2].axis("off") axs[2].set_title("Ground Truth Mask") plt.savefig( os.path.join(output_dir, f"{fname}.jpg"), bbox_inches="tight", dpi=300, pad_inches=0.0, )