Spaces:
Running
on
Zero
Running
on
Zero
update data and model
Browse files- app.py +8 -3
- backbone.py +8 -9
app.py
CHANGED
@@ -533,6 +533,9 @@ def make_dataset_images_section(open=False):
|
|
533 |
labels = np.array(dataset['label'])
|
534 |
unique_labels = np.unique(labels)
|
535 |
valid_classes = [i for i in classes if i in unique_labels]
|
|
|
|
|
|
|
536 |
if len(valid_classes) == 0:
|
537 |
gr.Error(f"Classes {classes} not found in the dataset.")
|
538 |
return None
|
@@ -580,11 +583,13 @@ def make_parameters_section():
|
|
580 |
layer_dict = LAYER_DICT
|
581 |
if model_name in layer_dict:
|
582 |
value = layer_dict[model_name]
|
583 |
-
return gr.Slider(1, value, step=1, label="Backbone: Layer index", value=value, elem_id="layer", visible=True)
|
|
|
584 |
else:
|
585 |
value = 12
|
586 |
-
return gr.
|
587 |
-
|
|
|
588 |
|
589 |
with gr.Accordion("➡️ Click to expand: more parameters", open=False):
|
590 |
affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="NCUT: Affinity focal gamma", value=0.5, elem_id="affinity_focal_gamma", info="decrease for shaper segmentation")
|
|
|
533 |
labels = np.array(dataset['label'])
|
534 |
unique_labels = np.unique(labels)
|
535 |
valid_classes = [i for i in classes if i in unique_labels]
|
536 |
+
invalid_classes = [i for i in classes if i not in unique_labels]
|
537 |
+
if len(invalid_classes) > 0:
|
538 |
+
gr.Warning(f"Classes {invalid_classes} not found in the dataset.")
|
539 |
if len(valid_classes) == 0:
|
540 |
gr.Error(f"Classes {classes} not found in the dataset.")
|
541 |
return None
|
|
|
583 |
layer_dict = LAYER_DICT
|
584 |
if model_name in layer_dict:
|
585 |
value = layer_dict[model_name]
|
586 |
+
return (gr.Slider(1, value, step=1, label="Backbone: Layer index", value=value, elem_id="layer", visible=True),
|
587 |
+
gr.Dropdown(["attn: attention output", "mlp: mlp output", "block: sum of residual"], label="Backbone: Layer type", value="block: sum of residual", elem_id="node_type", info="which feature to take from each layer?"))
|
588 |
else:
|
589 |
value = 12
|
590 |
+
return (gr.Dropdown(["attn: attention output", "mlp: mlp output", "block: sum of residual"], label="Backbone: Layer type", value="block: sum of residual", elem_id="node_type", info="which feature to take from each layer?"),
|
591 |
+
gr.Slider(1, value, step=1, label="Backbone: Layer index", value=value, elem_id="layer", visible=True))
|
592 |
+
model_dropdown.change(fn=change_layer_slider, inputs=model_dropdown, outputs=[layer_slider, node_type_dropdown])
|
593 |
|
594 |
with gr.Accordion("➡️ Click to expand: more parameters", open=False):
|
595 |
affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="NCUT: Affinity focal gamma", value=0.5, elem_id="affinity_focal_gamma", info="decrease for shaper segmentation")
|
backbone.py
CHANGED
@@ -1,17 +1,14 @@
|
|
|
|
|
|
1 |
from typing import Optional, Tuple
|
2 |
from einops import rearrange
|
3 |
import requests
|
4 |
import torch
|
5 |
import torch.nn.functional as F
|
6 |
import timm
|
7 |
-
from PIL import Image
|
8 |
from torch import nn
|
9 |
import numpy as np
|
10 |
import os
|
11 |
-
import time
|
12 |
-
|
13 |
-
import gradio as gr
|
14 |
-
|
15 |
from functools import partial
|
16 |
|
17 |
MODEL_DICT = {}
|
@@ -613,7 +610,7 @@ class EVA02(nn.Module):
|
|
613 |
super().__init__(**kwargs)
|
614 |
|
615 |
model = timm.create_model(
|
616 |
-
'
|
617 |
pretrained=True,
|
618 |
num_classes=0, # remove classifier nn.Linear
|
619 |
)
|
@@ -660,9 +657,9 @@ class EVA02(nn.Module):
|
|
660 |
'block': block_outputs
|
661 |
}
|
662 |
|
663 |
-
MODEL_DICT["
|
664 |
-
LAYER_DICT["
|
665 |
-
RES_DICT["
|
666 |
|
667 |
class CLIPConvnext(nn.Module):
|
668 |
def __init__(self):
|
@@ -862,6 +859,8 @@ def extract_features(images, model, model_name, node_type, layer, batch_size=8):
|
|
862 |
inp = inp.cuda()
|
863 |
out = model(inp) # {'attn': [B, H, W, C], 'mlp': [B, H, W, C], 'block': [B, H, W, C]}
|
864 |
out = out[node_type]
|
|
|
|
|
865 |
out = out[layer]
|
866 |
# normalize
|
867 |
out = F.normalize(out, dim=-1)
|
|
|
1 |
+
# Author: Huzheng Yang
|
2 |
+
# %%
|
3 |
from typing import Optional, Tuple
|
4 |
from einops import rearrange
|
5 |
import requests
|
6 |
import torch
|
7 |
import torch.nn.functional as F
|
8 |
import timm
|
|
|
9 |
from torch import nn
|
10 |
import numpy as np
|
11 |
import os
|
|
|
|
|
|
|
|
|
12 |
from functools import partial
|
13 |
|
14 |
MODEL_DICT = {}
|
|
|
610 |
super().__init__(**kwargs)
|
611 |
|
612 |
model = timm.create_model(
|
613 |
+
'eva02_base_patch14_448.mim_in22k_ft_in1k',
|
614 |
pretrained=True,
|
615 |
num_classes=0, # remove classifier nn.Linear
|
616 |
)
|
|
|
657 |
'block': block_outputs
|
658 |
}
|
659 |
|
660 |
+
MODEL_DICT["CLIP(eva02_base_patch14_448.mim_in22k_ft_in1k)"] = partial(EVA02)
|
661 |
+
LAYER_DICT["CLIP(eva02_base_patch14_448.mim_in22k_ft_in1k)"] = 12
|
662 |
+
RES_DICT["CLIP(eva02_base_patch14_448.mim_in22k_ft_in1k)"] = (448, 448)
|
663 |
|
664 |
class CLIPConvnext(nn.Module):
|
665 |
def __init__(self):
|
|
|
859 |
inp = inp.cuda()
|
860 |
out = model(inp) # {'attn': [B, H, W, C], 'mlp': [B, H, W, C], 'block': [B, H, W, C]}
|
861 |
out = out[node_type]
|
862 |
+
if out is None:
|
863 |
+
raise ValueError(f"Node type {node_type} not found in model {model_name}")
|
864 |
out = out[layer]
|
865 |
# normalize
|
866 |
out = F.normalize(out, dim=-1)
|