Spaces:

Pusheen
/

LoCo

Sleeping

App Files Files Community

Pusheen commited on Feb 5, 2024

Commit

7cb3306

verified ·

1 Parent(s): 127d448

Update utils.py

Browse files

Files changed (1) hide show

utils.py +131 -0

utils.py CHANGED Viewed

@@ -1,5 +1,136 @@
 import torch
 import math
 def compute_ca_loss(attn_maps_mid, attn_maps_up, bboxes, object_positions):
     loss = 0
     object_number = len(bboxes)

 import torch
+from torch import nn
 import math
+from PIL import Image, ImageDraw, ImageFont
+import logging
+import os
+import pandas as pd
+import csv
+import pickle
+import numpy as np
+from torch.nn import BCELoss
+from torch.nn import functional as F
+import math
+import numbers
+from typing import List
+def get_all_attention_64(attn_maps_down, attn_maps_mid , attn_maps_up, res = 16):
+    result  = []
+    for attn_map_integrated in attn_maps_up:
+        if attn_map_integrated == []: continue
+        attn_map = attn_map_integrated.squeeze(0)
+        # print(attn_map.shape)
+        b, i, j = attn_map.shape
+        H = W = int(math.sqrt(i))
+        # print(H)
+        if H == res:
+            item = attn_map.reshape(-1, res, res, attn_map.shape[-1] )
+            item = item.permute(0, 3, 1, 2)
+            item = F.interpolate(item, 64, mode='bilinear').permute(0, 2, 3, 1)
+            result.append(item)
+    for attn_map_integrated in attn_maps_mid:
+        attn_map =  attn_map_integrated.squeeze(0)
+        b, i, j = attn_map.shape
+        H = W = int(math.sqrt(i))
+        # print(H)
+        if (H==8):
+            item = attn_map.reshape(-1, 8, 8, attn_map.shape[-1] )
+            item = item.permute(0, 3, 1, 2)
+            item = F.interpolate(item, 64, mode='bilinear').permute(0, 2, 3, 1)
+            result.append(item)
+    for attn_map_integrated in attn_maps_down:
+        if attn_map_integrated == []: continue
+        attn_map = attn_map_integrated.squeeze(0)
+        if attn_map == []: continue
+        b, i, j = attn_map.shape
+        H = W = int(math.sqrt(i))
+        if H == res:
+            item = attn_map.reshape(-1, res, res, attn_map.shape[-1] )
+            item = item.permute(0, 3, 1, 2)
+            item = F.interpolate(item, 64, mode='bilinear').permute(0, 2, 3, 1)
+            result.append(item)
+    # print('RES LENGTH', len(result))
+    # for maps in result:
+    #     print(maps.shape)
+    result = torch.cat(result, dim=0)
+    result = result.sum(0) / result.shape[0]
+    return result
+def compute_loco_v2(attn_maps_down, attn_maps_mid, attn_maps_up, bboxes, object_positions, smooth_attn=True, topk = 0.8):
+    loss = 0.
+    pad_loss = 0.
+    total_fg_map = torch.zeros(size=(64, 64)).cuda()
+    alpha = 0.2
+    beta = 0.8
+    object_number = len(bboxes)
+    if object_number == 0:
+        return torch.tensor(0).float().cuda() if torch.cuda.is_available() else torch.tensor(0).float()
+    attn16 = get_all_attention_64(attn_maps_down[-1]+ attn_maps_down[-2], attn_maps_mid, attn_maps_up[0]+attn_maps_up[1], 16)
+    all_attn = [attn16]
+    max_loss = 0
+    for attn_map in all_attn:
+        sum_in = 0.
+        sum_out = 0.
+        i, j, k = attn_map.shape
+        H = W = i
+        for obj_idx in range(object_number):
+            obj_loss = 0
+            mask = torch.zeros(size=(H, W)).cuda() if torch.cuda.is_available() else torch.zeros(size=(H, W))
+            for obj_box in bboxes[obj_idx]:
+                x_min, y_min, x_max, y_max = int(obj_box[0] * W), \
+                    int(obj_box[1] * H), int(obj_box[2] * W), int(obj_box[3] * H)
+                mask[y_min: y_max, x_min: x_max] = 1
+                total_fg_map[y_min: y_max, x_min: x_max] = 1
+            for obj_position in [object_positions[obj_idx]]:
+                ca_map_obj = attn_map[:, :, obj_position].sum(-1)
+                ca_map_obj = ca_map_obj.reshape(H, W)
+                norm_ca_map_obj = ca_map_obj / ca_map_obj.max()
+                norm_ca_map_obj = norm_ca_map_obj.reshape(H, W)
+                sum_in += (norm_ca_map_obj * mask).sum()
+                sum_out += (norm_ca_map_obj * (1 - mask)).sum()
+            loss += (obj_loss/len(object_positions[obj_idx]))
+        sot_map = attn_map[:, :, 0].reshape(H, W)
+        eot_map = attn_map[:, :, -1].reshape(H, W)
+        norm_sot_map = (1 - sot_map) / (1 - sot_map).max()
+        norm_eot_map = eot_map / eot_map.max()
+        pad_map = beta * norm_sot_map + (1 - beta) * norm_eot_map
+    total_fg_mask = total_fg_map
+    fg_map = pad_map * total_fg_mask
+    bce_loss = F.binary_cross_entropy(torch.sigmoid(pad_map.to(torch.float16).reshape(-1)), fg_map.to(torch.float16).reshape(-1))
+    pad_loss += bce_loss
+    loss += (1 - sum_in / (sum_in + sum_out)) ** 2
+    return loss + alpha * pad_loss
 def compute_ca_loss(attn_maps_mid, attn_maps_up, bboxes, object_positions):
     loss = 0
     object_number = len(bboxes)