Spaces:

huzey
/

ncut-pytorch

Running on Zero

App Files Files Community

ncut-pytorch / app.py

huzey

optimize gpu allocation

8c6fc00 3 months ago

raw

history blame

10.1 kB

	import spaces
	import gradio as gr

	import torch
	from PIL import Image
	import numpy as np
	import time

	import gradio as gr

	from backbone import extract_features
	from ncut_pytorch import NCUT, rgb_from_tsne_3d, rgb_from_umap_3d


	def compute_ncut(
	features,
	num_eig=100,
	num_sample_ncut=10000,
	affinity_focal_gamma=0.3,
	knn_ncut=10,
	knn_tsne=10,
	embedding_method="UMAP",
	num_sample_tsne=300,
	perplexity=150,
	n_neighbors=150,
	min_dist=0.1,
	):

	start = time.time()
	eigvecs, eigvals = NCUT(
	num_eig=num_eig,
	num_sample=num_sample_ncut,
	device="cuda" if torch.cuda.is_available() else "cpu",
	affinity_focal_gamma=affinity_focal_gamma,
	knn=knn_ncut,
	).fit_transform(features.reshape(-1, features.shape[-1]))
	print(f"NCUT time: {time.time() - start:.2f}s")

	start = time.time()
	if embedding_method == "UMAP":
	X_3d, rgb = rgb_from_umap_3d(
	eigvecs,
	n_neighbors=n_neighbors,
	min_dist=min_dist,
	device="cuda" if torch.cuda.is_available() else "cpu",
	)
	print(f"UMAP time: {time.time() - start:.2f}s")
	elif embedding_method == "t-SNE":
	X_3d, rgb = rgb_from_tsne_3d(
	eigvecs,
	num_sample=num_sample_tsne,
	perplexity=perplexity,
	knn=knn_tsne,
	device="cuda" if torch.cuda.is_available() else "cpu",
	)
	print(f"t-SNE time: {time.time() - start:.2f}s")
	else:
	raise ValueError(f"Embedding method {embedding_method} not supported.")

	rgb = rgb.reshape(features.shape[:3] + (3,))
	return rgb


	def dont_use_too_much_green(image_rgb):
	# make sure the foval 40% of the image is red leading
	x1, x2 = int(image_rgb.shape[1] * 0.3), int(image_rgb.shape[1] * 0.7)
	y1, y2 = int(image_rgb.shape[2] * 0.3), int(image_rgb.shape[2] * 0.7)
	sum_values = image_rgb[:, x1:x2, y1:y2].mean((0, 1, 2))
	sorted_indices = sum_values.argsort(descending=True)
	image_rgb = image_rgb[:, :, :, sorted_indices]
	return image_rgb


	def to_pil_images(images):
	return [
	Image.fromarray((image * 255).cpu().numpy().astype(np.uint8)).resize((256, 256), Image.NEAREST)
	for image in images
	]

	default_images = ['./images/image_0.jpg', './images/image_1.jpg', './images/image_2.jpg', './images/image_3.jpg', './images/image_5.jpg']
	default_outputs = ['./images/ncut_0.jpg', './images/ncut_1.jpg', './images/ncut_2.jpg', './images/ncut_3.jpg', './images/ncut_5.jpg']

	downscaled_images = ['./images/image_0_small.jpg', './images/image_1_small.jpg', './images/image_2_small.jpg', './images/image_3_small.jpg', './images/image_5_small.jpg']
	downscaled_outputs = ['./images/ncut_0_small.jpg', './images/ncut_1_small.jpg', './images/ncut_2_small.jpg', './images/ncut_3_small.jpg', './images/ncut_5_small.jpg']

	example_items = downscaled_images[:3] + downscaled_outputs[:3]

	def main_fn(
	images,
	model_name="SAM(sam_vit_b)",
	layer=-1,
	num_eig=100,
	node_type="block",
	affinity_focal_gamma=0.3,
	num_sample_ncut=10000,
	knn_ncut=10,
	embedding_method="UMAP",
	num_sample_tsne=1000,
	knn_tsne=10,
	perplexity=500,
	n_neighbors=500,
	min_dist=0.1,
	):

	if perplexity >= num_sample_tsne or n_neighbors >= num_sample_tsne:
	# raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
	gr.Warning("Perplexity/n_neighbors must be less than the number of samples.\n" f"Setting to {num_sample_tsne-1}.")
	perplexity = num_sample_tsne - 1
	n_neighbors = num_sample_tsne - 1


	node_type = node_type.split(":")[0].strip()

	images = [image[0] for image in images] # remove the label

	start = time.time()
	features = extract_features(
	images, model_name=model_name, node_type=node_type, layer=layer
	)
	print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")

	rgb = compute_ncut(
	features,
	num_eig=num_eig,
	num_sample_ncut=num_sample_ncut,
	affinity_focal_gamma=affinity_focal_gamma,
	knn_ncut=knn_ncut,
	knn_tsne=knn_tsne,
	num_sample_tsne=num_sample_tsne,
	embedding_method=embedding_method,
	perplexity=perplexity,
	n_neighbors=n_neighbors,
	min_dist=min_dist,
	)
	rgb = dont_use_too_much_green(rgb)
	return to_pil_images(rgb), []

	@spaces.GPU(duration=10)
	def quick_run(args, *kwargs):
	return main_fn(args, *kwargs)

	@spaces.GPU(duration=30)
	def long_run(args, *kwargs):
	return main_fn(args, *kwargs)

	@spaces.GPU(duration=120)
	def super_duper_long_run(args, *kwargs):
	return main_fn(args, *kwargs)

	def run_fn(
	images,
	model_name="SAM(sam_vit_b)",
	layer=-1,
	num_eig=100,
	node_type="block",
	affinity_focal_gamma=0.3,
	num_sample_ncut=10000,
	knn_ncut=10,
	embedding_method="UMAP",
	num_sample_tsne=1000,
	knn_tsne=10,
	perplexity=500,
	n_neighbors=500,
	min_dist=0.1,
	):
	if images is None:
	return [], example_items

	kwargs = {
	"images": images,
	"model_name": model_name,
	"layer": layer,
	"num_eig": num_eig,
	"node_type": node_type,
	"affinity_focal_gamma": affinity_focal_gamma,
	"num_sample_ncut": num_sample_ncut,
	"knn_ncut": knn_ncut,
	"embedding_method": embedding_method,
	"num_sample_tsne": num_sample_tsne,
	"knn_tsne": knn_tsne,
	"perplexity": perplexity,
	"n_neighbors": n_neighbors,
	"min_dist": min_dist,
	}
	num_images = len(images)
	if num_images > 100:
	return super_duper_long_run(images, **kwargs)
	if num_images > 20:
	return long_run(images, **kwargs)
	if embedding_method == "UMAP":
	return long_run(images, **kwargs)
	if perplexity >= 250:
	return long_run(images, **kwargs)
	if num_sample_tsne >= 500:
	return long_run(images, **kwargs)
	return quick_run(images, **kwargs)

	with gr.Blocks() as demo:

	with gr.Row():
	with gr.Column(scale=5, min_width=200):
	gr.Markdown('### Input Images')
	input_gallery = gr.Gallery(value=[], label="Select images", show_label=False, elem_id="images", columns=[3], rows=[1], object_fit="contain", height="auto", type="pil", show_share_button=False)
	submit_button = gr.Button("🔴RUN", elem_id="submit_button")
	clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')

	gr.Markdown('### Load Examples 👇')
	load_images_button = gr.Button("Load", elem_id="load-images-button")
	example_gallery = gr.Gallery(value=example_items, label="Example Set A", show_label=False, columns=[3], rows=[2], object_fit="scale-down", height="200px", show_share_button=False)

	with gr.Column(scale=5, min_width=200):
	gr.Markdown('### Output Images')
	output_gallery = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut", columns=[3], rows=[1], object_fit="contain", height="auto")
	model_dropdown = gr.Dropdown(["SAM(sam_vit_b)", "MobileSAM", "DiNO(dinov2_vitb14_reg)", "CLIP(openai/clip-vit-base-patch16)"], label="Model", value="SAM(sam_vit_b)", elem_id="model_name")
	layer_slider = gr.Slider(0, 11, step=1, label="Layer", value=11, elem_id="layer")
	num_eig_slider = gr.Slider(1, 1000, step=1, label="Number of eigenvectors", value=100, elem_id="num_eig", info='increase for more clusters')
	affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for shaper NCUT")

	with gr.Accordion("Additional Parameters", open=False):
	node_type_dropdown = gr.Dropdown(["attn: attention output", "mlp: mlp output", "block: sum of residual"], label="Node type", value="block: sum of residual", elem_id="node_type", info="which feature to take from each layer?")
	num_sample_ncut_slider = gr.Slider(100, 50000, step=100, label="num_sample (NCUT)", value=10000, elem_id="num_sample_ncut", info="Nyström approximation")
	knn_ncut_slider = gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="Nyström approximation")
	embedding_method_dropdown = gr.Dropdown(["t-SNE", "UMAP"], label="Embedding method", value="t-SNE", elem_id="embedding_method")
	num_sample_tsne_slider = gr.Slider(100, 1000, step=100, label="num_sample (t-SNE/UMAP)", value=300, elem_id="num_sample_tsne", info="Nyström approximation")
	knn_tsne_slider = gr.Slider(1, 100, step=1, label="KNN (t-SNE/UMAP)", value=10, elem_id="knn_tsne", info="Nyström approximation")
	perplexity_slider = gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=150, elem_id="perplexity")
	n_neighbors_slider = gr.Slider(10, 500, step=10, label="n_neighbors (UMAP)", value=150, elem_id="n_neighbors")
	min_dist_slider = gr.Slider(0.1, 1, step=0.1, label="min_dist (UMAP)", value=0.1, elem_id="min_dist")

	def load_default_images():
	return default_images, default_outputs, []

	def empty_input_and_output():
	return [], [], example_items

	load_images_button.click(load_default_images, outputs=[input_gallery, output_gallery, example_gallery])
	clear_images_button.click(empty_input_and_output, outputs=[input_gallery, output_gallery, example_gallery])
	submit_button.click(
	main_fn,
	inputs=[
	input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
	affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
	embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
	perplexity_slider, n_neighbors_slider, min_dist_slider
	],
	outputs=[output_gallery, example_gallery]
	)


	demo.launch()