Spaces:

huzey
/

ncut-pytorch

Running on Zero

App Files Files Community

ncut-pytorch / app.py

huzey

update ui

9cd819b 3 months ago

raw

history blame

8.57 kB

	import spaces
	import gradio as gr

	import torch
	from PIL import Image
	import numpy as np
	import time

	import gradio as gr

	from backbone import extract_features
	from ncut_pytorch import NCUT, rgb_from_tsne_3d, rgb_from_umap_3d


	def compute_ncut(
	features,
	num_eig=100,
	num_sample_ncut=10000,
	affinity_focal_gamma=0.3,
	knn_ncut=10,
	knn_tsne=10,
	embedding_method="UMAP",
	num_sample_tsne=300,
	perplexity=150,
	n_neighbors=150,
	min_dist=0.1,
	):

	start = time.time()
	eigvecs, eigvals = NCUT(
	num_eig=num_eig,
	num_sample=num_sample_ncut,
	device="cuda" if torch.cuda.is_available() else "cpu",
	affinity_focal_gamma=affinity_focal_gamma,
	knn=knn_ncut,
	).fit_transform(features.reshape(-1, features.shape[-1]))
	print(f"NCUT time: {time.time() - start:.2f}s")

	start = time.time()
	if embedding_method == "UMAP":
	X_3d, rgb = rgb_from_umap_3d(
	eigvecs,
	n_neighbors=n_neighbors,
	min_dist=min_dist,
	device="cuda" if torch.cuda.is_available() else "cpu",
	)
	print(f"UMAP time: {time.time() - start:.2f}s")
	elif embedding_method == "t-SNE":
	X_3d, rgb = rgb_from_tsne_3d(
	eigvecs,
	num_sample=num_sample_tsne,
	perplexity=perplexity,
	knn=knn_tsne,
	device="cuda" if torch.cuda.is_available() else "cpu",
	)
	print(f"t-SNE time: {time.time() - start:.2f}s")
	else:
	raise ValueError(f"Embedding method {embedding_method} not supported.")

	rgb = rgb.reshape(features.shape[:3] + (3,))
	return rgb


	def dont_use_too_much_green(image_rgb):
	# make sure the foval 40% of the image is red leading
	x1, x2 = int(image_rgb.shape[1] * 0.3), int(image_rgb.shape[1] * 0.7)
	y1, y2 = int(image_rgb.shape[2] * 0.3), int(image_rgb.shape[2] * 0.7)
	sum_values = image_rgb[:, x1:x2, y1:y2].mean((0, 1, 2))
	sorted_indices = sum_values.argsort(descending=True)
	image_rgb = image_rgb[:, :, :, sorted_indices]
	return image_rgb


	def to_pil_images(images):
	return [
	Image.fromarray((image * 255).cpu().numpy().astype(np.uint8)).resize((256, 256), Image.NEAREST)
	for image in images
	]

	default_images = ['./images/image_0.jpg', './images/image_1.jpg', './images/image_2.jpg', './images/image_3.jpg', './images/image_5.jpg']
	default_outputs = ['./images/ncut_0.jpg', './images/ncut_1.jpg', './images/ncut_2.jpg', './images/ncut_3.jpg', './images/ncut_5.jpg']

	downscaled_images = ['./images/image_0_small.jpg', './images/image_1_small.jpg', './images/image_2_small.jpg', './images/image_3_small.jpg', './images/image_5_small.jpg']
	downscaled_outputs = ['./images/ncut_0_small.jpg', './images/ncut_1_small.jpg', './images/ncut_2_small.jpg', './images/ncut_3_small.jpg', './images/ncut_5_small.jpg']

	example_items = downscaled_images[:3] + downscaled_outputs[:3]

	@spaces.GPU(duration=30)
	def main_fn(
	images,
	model_name="SAM(sam_vit_b)",
	layer=-1,
	num_eig=100,
	node_type="block",
	affinity_focal_gamma=0.3,
	num_sample_ncut=10000,
	knn_ncut=10,
	embedding_method="UMAP",
	num_sample_tsne=1000,
	knn_tsne=10,
	perplexity=500,
	n_neighbors=500,
	min_dist=0.1,
	):
	if len(images) == 0:
	return [], example_items

	if perplexity >= num_sample_tsne or n_neighbors >= num_sample_tsne:
	# raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
	gr.Warning("Perplexity/n_neighbors must be less than the number of samples.\n" f"Setting to {num_sample_tsne-1}.")
	perplexity = num_sample_tsne - 1
	n_neighbors = num_sample_tsne - 1


	node_type = node_type.split(":")[0].strip()

	images = [image[0] for image in images] # remove the label

	start = time.time()
	features = extract_features(
	images, model_name=model_name, node_type=node_type, layer=layer
	)
	print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")

	rgb = compute_ncut(
	features,
	num_eig=num_eig,
	num_sample_ncut=num_sample_ncut,
	affinity_focal_gamma=affinity_focal_gamma,
	knn_ncut=knn_ncut,
	knn_tsne=knn_tsne,
	num_sample_tsne=num_sample_tsne,
	embedding_method=embedding_method,
	perplexity=perplexity,
	n_neighbors=n_neighbors,
	min_dist=min_dist,
	)
	rgb = dont_use_too_much_green(rgb)
	return to_pil_images(rgb), []


	with gr.Blocks() as demo:

	with gr.Row():
	with gr.Column(scale=5, min_width=200):
	gr.Markdown('### Input Images')
	input_gallery = gr.Gallery(value=[], label="Select images", show_label=False, elem_id="images", columns=[3], rows=[1], object_fit="contain", height="auto", type="pil", show_share_button=False)
	submit_button = gr.Button("🔴Submit", elem_id="submit_button")
	clear_images_button = gr.Button("🗑️Clear Images")

	gr.Markdown('### Load Examples 👇')
	load_images_button = gr.Button("Load", elem_id="load-images-button")
	example_gallery = gr.Gallery(value=example_items, label="Example Set A", show_label=False, columns=[3], rows=[2], object_fit="scale-down", height="200px", show_share_button=False)

	with gr.Column(scale=5, min_width=200):
	gr.Markdown('### Output Images')
	output_gallery = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut", columns=[3], rows=[1], object_fit="contain", height="auto")
	model_dropdown = gr.Dropdown(["SAM(sam_vit_b)", "MobileSAM", "DiNO(dinov2_vitb14_reg)", "CLIP(openai/clip-vit-base-patch16)"], label="Model", value="SAM(sam_vit_b)", elem_id="model_name")
	layer_slider = gr.Slider(0, 11, step=1, label="Layer", value=11, elem_id="layer")
	num_eig_slider = gr.Slider(1, 1000, step=1, label="Number of eigenvectors", value=100, elem_id="num_eig", info='increase for more clusters')
	affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for shaper NCUT")

	with gr.Accordion("Additional Parameters", open=False):
	node_type_dropdown = gr.Dropdown(["attn: attention output", "mlp: mlp output", "block: sum of residual"], label="Node type", value="block: sum of residual", elem_id="node_type", info="which feature to take from each layer?")
	num_sample_ncut_slider = gr.Slider(100, 50000, step=100, label="num_sample (NCUT)", value=10000, elem_id="num_sample_ncut", info="Nyström approximation")
	knn_ncut_slider = gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="Nyström approximation")
	embedding_method_dropdown = gr.Dropdown(["t-SNE", "UMAP"], label="Embedding method", value="t-SNE", elem_id="embedding_method")
	num_sample_tsne_slider = gr.Slider(100, 1000, step=100, label="num_sample (t-SNE/UMAP)", value=300, elem_id="num_sample_tsne", info="Nyström approximation")
	knn_tsne_slider = gr.Slider(1, 100, step=1, label="KNN (t-SNE/UMAP)", value=10, elem_id="knn_tsne", info="Nyström approximation")
	perplexity_slider = gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=150, elem_id="perplexity")
	n_neighbors_slider = gr.Slider(10, 500, step=10, label="n_neighbors (UMAP)", value=150, elem_id="n_neighbors")
	min_dist_slider = gr.Slider(0.1, 1, step=0.1, label="min_dist (UMAP)", value=0.1, elem_id="min_dist")

	def load_default_images():
	return default_images, default_outputs, []

	def empty_input_and_output():
	return [], []

	load_images_button.click(load_default_images, outputs=[input_gallery, output_gallery, example_gallery])
	clear_images_button.click(empty_input_and_output, outputs=[input_gallery, output_gallery])
	submit_button.click(
	main_fn,
	inputs=[
	input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
	affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
	embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
	perplexity_slider, n_neighbors_slider, min_dist_slider
	],
	outputs=[output_gallery, example_gallery]
	)


	demo.launch()