timmAttentionViz

Runtime error

taesiri commited on Sep 1, 2024

Commit

d97d4d2

1 Parent(s): dfc8148

Update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -113,7 +113,6 @@ def visualize_attention(
     model = model.to('cuda')
     attention_maps = process_image(image, model, extractor)
-    # FIXME handle wider range of models that may not have num_prefix_tokens attr
     num_prefix_tokens = getattr(model, 'num_prefix_tokens', 1)  # Default to 1 class token if not specified
     # Convert PIL Image to numpy array
@@ -142,7 +141,6 @@ def visualize_attention(
             raise ValueError(f"Invalid head fusion method: {head_fusion}")
         # Use the first token's attention (usually the class token)
-        # FIXME handle different prefix token scenarios
         attn_map = attn_map[0]
         # Reshape the attention map to 2D
@@ -152,7 +150,7 @@ def visualize_attention(
         # Interpolate to match image size
         attn_map = attn_map.unsqueeze(0).unsqueeze(0)
         attn_map = F.interpolate(attn_map, size=(image_np.shape[0], image_np.shape[1]), mode='bilinear', align_corners=False)
-        attn_map = attn_map.squeeze().cpu().numpy()
         # Normalize attention map
         attn_map = (attn_map - attn_map.min()) / (attn_map.max() - attn_map.min())
@@ -228,4 +226,5 @@ iface = gr.Interface(
     description="Upload an image and select a timm model to visualize its attention maps."
 )
-iface.launch()

     model = model.to('cuda')
     attention_maps = process_image(image, model, extractor)
     num_prefix_tokens = getattr(model, 'num_prefix_tokens', 1)  # Default to 1 class token if not specified
     # Convert PIL Image to numpy array
             raise ValueError(f"Invalid head fusion method: {head_fusion}")
         # Use the first token's attention (usually the class token)
         attn_map = attn_map[0]
         # Reshape the attention map to 2D
         # Interpolate to match image size
         attn_map = attn_map.unsqueeze(0).unsqueeze(0)
         attn_map = F.interpolate(attn_map, size=(image_np.shape[0], image_np.shape[1]), mode='bilinear', align_corners=False)
+        attn_map = attn_map.squeeze().detach().cpu().numpy()  # Detach before converting to numpy
         # Normalize attention map
         attn_map = (attn_map - attn_map.min()) / (attn_map.max() - attn_map.min())
     description="Upload an image and select a timm model to visualize its attention maps."
 )
+# Launch the interface with share=True to create a public link
+iface.launch(share=True)