ncut-pytorch / app.py
huzey's picture
update ui
9cd819b
raw
history blame
8.57 kB
import spaces
import gradio as gr
import torch
from PIL import Image
import numpy as np
import time
import gradio as gr
from backbone import extract_features
from ncut_pytorch import NCUT, rgb_from_tsne_3d, rgb_from_umap_3d
def compute_ncut(
features,
num_eig=100,
num_sample_ncut=10000,
affinity_focal_gamma=0.3,
knn_ncut=10,
knn_tsne=10,
embedding_method="UMAP",
num_sample_tsne=300,
perplexity=150,
n_neighbors=150,
min_dist=0.1,
):
start = time.time()
eigvecs, eigvals = NCUT(
num_eig=num_eig,
num_sample=num_sample_ncut,
device="cuda" if torch.cuda.is_available() else "cpu",
affinity_focal_gamma=affinity_focal_gamma,
knn=knn_ncut,
).fit_transform(features.reshape(-1, features.shape[-1]))
print(f"NCUT time: {time.time() - start:.2f}s")
start = time.time()
if embedding_method == "UMAP":
X_3d, rgb = rgb_from_umap_3d(
eigvecs,
n_neighbors=n_neighbors,
min_dist=min_dist,
device="cuda" if torch.cuda.is_available() else "cpu",
)
print(f"UMAP time: {time.time() - start:.2f}s")
elif embedding_method == "t-SNE":
X_3d, rgb = rgb_from_tsne_3d(
eigvecs,
num_sample=num_sample_tsne,
perplexity=perplexity,
knn=knn_tsne,
device="cuda" if torch.cuda.is_available() else "cpu",
)
print(f"t-SNE time: {time.time() - start:.2f}s")
else:
raise ValueError(f"Embedding method {embedding_method} not supported.")
rgb = rgb.reshape(features.shape[:3] + (3,))
return rgb
def dont_use_too_much_green(image_rgb):
# make sure the foval 40% of the image is red leading
x1, x2 = int(image_rgb.shape[1] * 0.3), int(image_rgb.shape[1] * 0.7)
y1, y2 = int(image_rgb.shape[2] * 0.3), int(image_rgb.shape[2] * 0.7)
sum_values = image_rgb[:, x1:x2, y1:y2].mean((0, 1, 2))
sorted_indices = sum_values.argsort(descending=True)
image_rgb = image_rgb[:, :, :, sorted_indices]
return image_rgb
def to_pil_images(images):
return [
Image.fromarray((image * 255).cpu().numpy().astype(np.uint8)).resize((256, 256), Image.NEAREST)
for image in images
]
default_images = ['./images/image_0.jpg', './images/image_1.jpg', './images/image_2.jpg', './images/image_3.jpg', './images/image_5.jpg']
default_outputs = ['./images/ncut_0.jpg', './images/ncut_1.jpg', './images/ncut_2.jpg', './images/ncut_3.jpg', './images/ncut_5.jpg']
downscaled_images = ['./images/image_0_small.jpg', './images/image_1_small.jpg', './images/image_2_small.jpg', './images/image_3_small.jpg', './images/image_5_small.jpg']
downscaled_outputs = ['./images/ncut_0_small.jpg', './images/ncut_1_small.jpg', './images/ncut_2_small.jpg', './images/ncut_3_small.jpg', './images/ncut_5_small.jpg']
example_items = downscaled_images[:3] + downscaled_outputs[:3]
@spaces.GPU(duration=30)
def main_fn(
images,
model_name="SAM(sam_vit_b)",
layer=-1,
num_eig=100,
node_type="block",
affinity_focal_gamma=0.3,
num_sample_ncut=10000,
knn_ncut=10,
embedding_method="UMAP",
num_sample_tsne=1000,
knn_tsne=10,
perplexity=500,
n_neighbors=500,
min_dist=0.1,
):
if len(images) == 0:
return [], example_items
if perplexity >= num_sample_tsne or n_neighbors >= num_sample_tsne:
# raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
gr.Warning("Perplexity/n_neighbors must be less than the number of samples.\n" f"Setting to {num_sample_tsne-1}.")
perplexity = num_sample_tsne - 1
n_neighbors = num_sample_tsne - 1
node_type = node_type.split(":")[0].strip()
images = [image[0] for image in images] # remove the label
start = time.time()
features = extract_features(
images, model_name=model_name, node_type=node_type, layer=layer
)
print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")
rgb = compute_ncut(
features,
num_eig=num_eig,
num_sample_ncut=num_sample_ncut,
affinity_focal_gamma=affinity_focal_gamma,
knn_ncut=knn_ncut,
knn_tsne=knn_tsne,
num_sample_tsne=num_sample_tsne,
embedding_method=embedding_method,
perplexity=perplexity,
n_neighbors=n_neighbors,
min_dist=min_dist,
)
rgb = dont_use_too_much_green(rgb)
return to_pil_images(rgb), []
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=5, min_width=200):
gr.Markdown('### Input Images')
input_gallery = gr.Gallery(value=[], label="Select images", show_label=False, elem_id="images", columns=[3], rows=[1], object_fit="contain", height="auto", type="pil", show_share_button=False)
submit_button = gr.Button("🔴Submit", elem_id="submit_button")
clear_images_button = gr.Button("🗑️Clear Images")
gr.Markdown('### Load Examples 👇')
load_images_button = gr.Button("Load", elem_id="load-images-button")
example_gallery = gr.Gallery(value=example_items, label="Example Set A", show_label=False, columns=[3], rows=[2], object_fit="scale-down", height="200px", show_share_button=False)
with gr.Column(scale=5, min_width=200):
gr.Markdown('### Output Images')
output_gallery = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut", columns=[3], rows=[1], object_fit="contain", height="auto")
model_dropdown = gr.Dropdown(["SAM(sam_vit_b)", "MobileSAM", "DiNO(dinov2_vitb14_reg)", "CLIP(openai/clip-vit-base-patch16)"], label="Model", value="SAM(sam_vit_b)", elem_id="model_name")
layer_slider = gr.Slider(0, 11, step=1, label="Layer", value=11, elem_id="layer")
num_eig_slider = gr.Slider(1, 1000, step=1, label="Number of eigenvectors", value=100, elem_id="num_eig", info='increase for more clusters')
affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for shaper NCUT")
with gr.Accordion("Additional Parameters", open=False):
node_type_dropdown = gr.Dropdown(["attn: attention output", "mlp: mlp output", "block: sum of residual"], label="Node type", value="block: sum of residual", elem_id="node_type", info="which feature to take from each layer?")
num_sample_ncut_slider = gr.Slider(100, 50000, step=100, label="num_sample (NCUT)", value=10000, elem_id="num_sample_ncut", info="Nyström approximation")
knn_ncut_slider = gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="Nyström approximation")
embedding_method_dropdown = gr.Dropdown(["t-SNE", "UMAP"], label="Embedding method", value="t-SNE", elem_id="embedding_method")
num_sample_tsne_slider = gr.Slider(100, 1000, step=100, label="num_sample (t-SNE/UMAP)", value=300, elem_id="num_sample_tsne", info="Nyström approximation")
knn_tsne_slider = gr.Slider(1, 100, step=1, label="KNN (t-SNE/UMAP)", value=10, elem_id="knn_tsne", info="Nyström approximation")
perplexity_slider = gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=150, elem_id="perplexity")
n_neighbors_slider = gr.Slider(10, 500, step=10, label="n_neighbors (UMAP)", value=150, elem_id="n_neighbors")
min_dist_slider = gr.Slider(0.1, 1, step=0.1, label="min_dist (UMAP)", value=0.1, elem_id="min_dist")
def load_default_images():
return default_images, default_outputs, []
def empty_input_and_output():
return [], []
load_images_button.click(load_default_images, outputs=[input_gallery, output_gallery, example_gallery])
clear_images_button.click(empty_input_and_output, outputs=[input_gallery, output_gallery])
submit_button.click(
main_fn,
inputs=[
input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
perplexity_slider, n_neighbors_slider, min_dist_slider
],
outputs=[output_gallery, example_gallery]
)
demo.launch()