ncut-pytorch / app.py
huzey's picture
optimize gpu allocation
8c6fc00
raw
history blame
10.1 kB
import spaces
import gradio as gr
import torch
from PIL import Image
import numpy as np
import time
import gradio as gr
from backbone import extract_features
from ncut_pytorch import NCUT, rgb_from_tsne_3d, rgb_from_umap_3d
def compute_ncut(
features,
num_eig=100,
num_sample_ncut=10000,
affinity_focal_gamma=0.3,
knn_ncut=10,
knn_tsne=10,
embedding_method="UMAP",
num_sample_tsne=300,
perplexity=150,
n_neighbors=150,
min_dist=0.1,
):
start = time.time()
eigvecs, eigvals = NCUT(
num_eig=num_eig,
num_sample=num_sample_ncut,
device="cuda" if torch.cuda.is_available() else "cpu",
affinity_focal_gamma=affinity_focal_gamma,
knn=knn_ncut,
).fit_transform(features.reshape(-1, features.shape[-1]))
print(f"NCUT time: {time.time() - start:.2f}s")
start = time.time()
if embedding_method == "UMAP":
X_3d, rgb = rgb_from_umap_3d(
eigvecs,
n_neighbors=n_neighbors,
min_dist=min_dist,
device="cuda" if torch.cuda.is_available() else "cpu",
)
print(f"UMAP time: {time.time() - start:.2f}s")
elif embedding_method == "t-SNE":
X_3d, rgb = rgb_from_tsne_3d(
eigvecs,
num_sample=num_sample_tsne,
perplexity=perplexity,
knn=knn_tsne,
device="cuda" if torch.cuda.is_available() else "cpu",
)
print(f"t-SNE time: {time.time() - start:.2f}s")
else:
raise ValueError(f"Embedding method {embedding_method} not supported.")
rgb = rgb.reshape(features.shape[:3] + (3,))
return rgb
def dont_use_too_much_green(image_rgb):
# make sure the foval 40% of the image is red leading
x1, x2 = int(image_rgb.shape[1] * 0.3), int(image_rgb.shape[1] * 0.7)
y1, y2 = int(image_rgb.shape[2] * 0.3), int(image_rgb.shape[2] * 0.7)
sum_values = image_rgb[:, x1:x2, y1:y2].mean((0, 1, 2))
sorted_indices = sum_values.argsort(descending=True)
image_rgb = image_rgb[:, :, :, sorted_indices]
return image_rgb
def to_pil_images(images):
return [
Image.fromarray((image * 255).cpu().numpy().astype(np.uint8)).resize((256, 256), Image.NEAREST)
for image in images
]
default_images = ['./images/image_0.jpg', './images/image_1.jpg', './images/image_2.jpg', './images/image_3.jpg', './images/image_5.jpg']
default_outputs = ['./images/ncut_0.jpg', './images/ncut_1.jpg', './images/ncut_2.jpg', './images/ncut_3.jpg', './images/ncut_5.jpg']
downscaled_images = ['./images/image_0_small.jpg', './images/image_1_small.jpg', './images/image_2_small.jpg', './images/image_3_small.jpg', './images/image_5_small.jpg']
downscaled_outputs = ['./images/ncut_0_small.jpg', './images/ncut_1_small.jpg', './images/ncut_2_small.jpg', './images/ncut_3_small.jpg', './images/ncut_5_small.jpg']
example_items = downscaled_images[:3] + downscaled_outputs[:3]
def main_fn(
images,
model_name="SAM(sam_vit_b)",
layer=-1,
num_eig=100,
node_type="block",
affinity_focal_gamma=0.3,
num_sample_ncut=10000,
knn_ncut=10,
embedding_method="UMAP",
num_sample_tsne=1000,
knn_tsne=10,
perplexity=500,
n_neighbors=500,
min_dist=0.1,
):
if perplexity >= num_sample_tsne or n_neighbors >= num_sample_tsne:
# raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
gr.Warning("Perplexity/n_neighbors must be less than the number of samples.\n" f"Setting to {num_sample_tsne-1}.")
perplexity = num_sample_tsne - 1
n_neighbors = num_sample_tsne - 1
node_type = node_type.split(":")[0].strip()
images = [image[0] for image in images] # remove the label
start = time.time()
features = extract_features(
images, model_name=model_name, node_type=node_type, layer=layer
)
print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")
rgb = compute_ncut(
features,
num_eig=num_eig,
num_sample_ncut=num_sample_ncut,
affinity_focal_gamma=affinity_focal_gamma,
knn_ncut=knn_ncut,
knn_tsne=knn_tsne,
num_sample_tsne=num_sample_tsne,
embedding_method=embedding_method,
perplexity=perplexity,
n_neighbors=n_neighbors,
min_dist=min_dist,
)
rgb = dont_use_too_much_green(rgb)
return to_pil_images(rgb), []
@spaces.GPU(duration=10)
def quick_run(*args, **kwargs):
return main_fn(*args, **kwargs)
@spaces.GPU(duration=30)
def long_run(*args, **kwargs):
return main_fn(*args, **kwargs)
@spaces.GPU(duration=120)
def super_duper_long_run(*args, **kwargs):
return main_fn(*args, **kwargs)
def run_fn(
images,
model_name="SAM(sam_vit_b)",
layer=-1,
num_eig=100,
node_type="block",
affinity_focal_gamma=0.3,
num_sample_ncut=10000,
knn_ncut=10,
embedding_method="UMAP",
num_sample_tsne=1000,
knn_tsne=10,
perplexity=500,
n_neighbors=500,
min_dist=0.1,
):
if images is None:
return [], example_items
kwargs = {
"images": images,
"model_name": model_name,
"layer": layer,
"num_eig": num_eig,
"node_type": node_type,
"affinity_focal_gamma": affinity_focal_gamma,
"num_sample_ncut": num_sample_ncut,
"knn_ncut": knn_ncut,
"embedding_method": embedding_method,
"num_sample_tsne": num_sample_tsne,
"knn_tsne": knn_tsne,
"perplexity": perplexity,
"n_neighbors": n_neighbors,
"min_dist": min_dist,
}
num_images = len(images)
if num_images > 100:
return super_duper_long_run(images, **kwargs)
if num_images > 20:
return long_run(images, **kwargs)
if embedding_method == "UMAP":
return long_run(images, **kwargs)
if perplexity >= 250:
return long_run(images, **kwargs)
if num_sample_tsne >= 500:
return long_run(images, **kwargs)
return quick_run(images, **kwargs)
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(scale=5, min_width=200):
gr.Markdown('### Input Images')
input_gallery = gr.Gallery(value=[], label="Select images", show_label=False, elem_id="images", columns=[3], rows=[1], object_fit="contain", height="auto", type="pil", show_share_button=False)
submit_button = gr.Button("🔴RUN", elem_id="submit_button")
clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
gr.Markdown('### Load Examples 👇')
load_images_button = gr.Button("Load", elem_id="load-images-button")
example_gallery = gr.Gallery(value=example_items, label="Example Set A", show_label=False, columns=[3], rows=[2], object_fit="scale-down", height="200px", show_share_button=False)
with gr.Column(scale=5, min_width=200):
gr.Markdown('### Output Images')
output_gallery = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut", columns=[3], rows=[1], object_fit="contain", height="auto")
model_dropdown = gr.Dropdown(["SAM(sam_vit_b)", "MobileSAM", "DiNO(dinov2_vitb14_reg)", "CLIP(openai/clip-vit-base-patch16)"], label="Model", value="SAM(sam_vit_b)", elem_id="model_name")
layer_slider = gr.Slider(0, 11, step=1, label="Layer", value=11, elem_id="layer")
num_eig_slider = gr.Slider(1, 1000, step=1, label="Number of eigenvectors", value=100, elem_id="num_eig", info='increase for more clusters')
affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for shaper NCUT")
with gr.Accordion("Additional Parameters", open=False):
node_type_dropdown = gr.Dropdown(["attn: attention output", "mlp: mlp output", "block: sum of residual"], label="Node type", value="block: sum of residual", elem_id="node_type", info="which feature to take from each layer?")
num_sample_ncut_slider = gr.Slider(100, 50000, step=100, label="num_sample (NCUT)", value=10000, elem_id="num_sample_ncut", info="Nyström approximation")
knn_ncut_slider = gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="Nyström approximation")
embedding_method_dropdown = gr.Dropdown(["t-SNE", "UMAP"], label="Embedding method", value="t-SNE", elem_id="embedding_method")
num_sample_tsne_slider = gr.Slider(100, 1000, step=100, label="num_sample (t-SNE/UMAP)", value=300, elem_id="num_sample_tsne", info="Nyström approximation")
knn_tsne_slider = gr.Slider(1, 100, step=1, label="KNN (t-SNE/UMAP)", value=10, elem_id="knn_tsne", info="Nyström approximation")
perplexity_slider = gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=150, elem_id="perplexity")
n_neighbors_slider = gr.Slider(10, 500, step=10, label="n_neighbors (UMAP)", value=150, elem_id="n_neighbors")
min_dist_slider = gr.Slider(0.1, 1, step=0.1, label="min_dist (UMAP)", value=0.1, elem_id="min_dist")
def load_default_images():
return default_images, default_outputs, []
def empty_input_and_output():
return [], [], example_items
load_images_button.click(load_default_images, outputs=[input_gallery, output_gallery, example_gallery])
clear_images_button.click(empty_input_and_output, outputs=[input_gallery, output_gallery, example_gallery])
submit_button.click(
main_fn,
inputs=[
input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
perplexity_slider, n_neighbors_slider, min_dist_slider
],
outputs=[output_gallery, example_gallery]
)
demo.launch()