Spaces:

huzey
/

ncut-pytorch

Running on Zero

App Files Files Community

ncut-pytorch / app.py

huzey

better UI

5c1d3a1 3 months ago

raw

history blame

11.2 kB

	import spaces
	import gradio as gr

	import torch
	from PIL import Image
	import numpy as np
	import time

	import gradio as gr

	from draft_gradio_backbone import extract_features
	from ncut_pytorch import NCUT, rgb_from_tsne_3d, rgb_from_umap_3d


	def compute_ncut(
	features,
	num_eig=100,
	num_sample_ncut=10000,
	affinity_focal_gamma=0.3,
	knn_ncut=10,
	knn_tsne=10,
	embedding_method="UMAP",
	num_sample_tsne=300,
	perplexity=150,
	n_neighbors=150,
	min_dist=0.1,
	):
	logging_str = ""
	start = time.time()
	eigvecs, eigvals = NCUT(
	num_eig=num_eig,
	num_sample=num_sample_ncut,
	device="cuda" if torch.cuda.is_available() else "cpu",
	affinity_focal_gamma=affinity_focal_gamma,
	knn=knn_ncut,
	).fit_transform(features.reshape(-1, features.shape[-1]))
	# print(f"NCUT time: {time.time() - start:.2f}s")
	logging_str += f"NCUT time: {time.time() - start:.2f}s\n"

	start = time.time()
	if embedding_method == "UMAP":
	X_3d, rgb = rgb_from_umap_3d(
	eigvecs,
	n_neighbors=n_neighbors,
	min_dist=min_dist,
	device="cuda" if torch.cuda.is_available() else "cpu",
	)
	# print(f"UMAP time: {time.time() - start:.2f}s")
	logging_str += f"UMAP time: {time.time() - start:.2f}s\n"
	elif embedding_method == "t-SNE":
	X_3d, rgb = rgb_from_tsne_3d(
	eigvecs,
	num_sample=num_sample_tsne,
	perplexity=perplexity,
	knn=knn_tsne,
	device="cuda" if torch.cuda.is_available() else "cpu",
	)
	# print(f"t-SNE time: {time.time() - start:.2f}s")
	logging_str += f"t-SNE time: {time.time() - start:.2f}s\n"
	else:
	raise ValueError(f"Embedding method {embedding_method} not supported.")

	rgb = rgb.reshape(features.shape[:3] + (3,))
	return rgb, logging_str


	def dont_use_too_much_green(image_rgb):
	# make sure the foval 40% of the image is red leading
	x1, x2 = int(image_rgb.shape[1] * 0.3), int(image_rgb.shape[1] * 0.7)
	y1, y2 = int(image_rgb.shape[2] * 0.3), int(image_rgb.shape[2] * 0.7)
	sum_values = image_rgb[:, x1:x2, y1:y2].mean((0, 1, 2))
	sorted_indices = sum_values.argsort(descending=True)
	image_rgb = image_rgb[:, :, :, sorted_indices]
	return image_rgb


	def to_pil_images(images):
	return [
	Image.fromarray((image * 255).cpu().numpy().astype(np.uint8)).resize((256, 256), Image.NEAREST)
	for image in images
	]

	default_images = ['./images/image_0.jpg', './images/image_1.jpg', './images/image_2.jpg', './images/image_3.jpg', './images/image_5.jpg']
	default_outputs = ['./images/ncut_0.jpg', './images/ncut_1.jpg', './images/ncut_2.jpg', './images/ncut_3.jpg', './images/ncut_5.jpg']

	downscaled_images = ['./images/image_0_small.jpg', './images/image_1_small.jpg', './images/image_2_small.jpg', './images/image_3_small.jpg', './images/image_5_small.jpg']
	downscaled_outputs = ['./images/ncut_0_small.jpg', './images/ncut_1_small.jpg', './images/ncut_2_small.jpg', './images/ncut_3_small.jpg', './images/ncut_5_small.jpg']

	example_items = downscaled_images[:3] + downscaled_outputs[:3]

	def ncut_run(
	images,
	model_name="SAM(sam_vit_b)",
	layer=-1,
	num_eig=100,
	node_type="block",
	affinity_focal_gamma=0.3,
	num_sample_ncut=10000,
	knn_ncut=10,
	embedding_method="UMAP",
	num_sample_tsne=1000,
	knn_tsne=10,
	perplexity=500,
	n_neighbors=500,
	min_dist=0.1,
	):
	logging_str = ""
	if perplexity >= num_sample_tsne or n_neighbors >= num_sample_tsne:
	# raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
	gr.Warning("Perplexity/n_neighbors must be less than the number of samples.\n" f"Setting to {num_sample_tsne-1}.")
	perplexity = num_sample_tsne - 1
	n_neighbors = num_sample_tsne - 1


	node_type = node_type.split(":")[0].strip()

	images = [image[0] for image in images] # remove the label

	start = time.time()
	features = extract_features(
	images, model_name=model_name, node_type=node_type, layer=layer
	)
	# print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")
	logging_str += f"Backbone time: {time.time() - start:.2f}s\n"

	rgb, _logging_str = compute_ncut(
	features,
	num_eig=num_eig,
	num_sample_ncut=num_sample_ncut,
	affinity_focal_gamma=affinity_focal_gamma,
	knn_ncut=knn_ncut,
	knn_tsne=knn_tsne,
	num_sample_tsne=num_sample_tsne,
	embedding_method=embedding_method,
	perplexity=perplexity,
	n_neighbors=n_neighbors,
	min_dist=min_dist,
	)
	logging_str += _logging_str
	rgb = dont_use_too_much_green(rgb)
	return to_pil_images(rgb), logging_str

	@spaces.GPU(duration=15)
	def quick_run(images, **kwargs):
	return ncut_run(images, **kwargs)

	@spaces.GPU(duration=30)
	def long_run(images, **kwargs):
	return ncut_run(images, **kwargs)

	@spaces.GPU(duration=60)
	def longer_run(images, **kwargs):
	return ncut_run(images, **kwargs)

	@spaces.GPU(duration=120)
	def super_duper_long_run(images, **kwargs):
	return ncut_run(images, **kwargs)

	def run_fn(
	images,
	model_name="SAM(sam_vit_b)",
	layer=-1,
	num_eig=100,
	node_type="block",
	affinity_focal_gamma=0.3,
	num_sample_ncut=10000,
	knn_ncut=10,
	embedding_method="UMAP",
	num_sample_tsne=1000,
	knn_tsne=10,
	perplexity=500,
	n_neighbors=500,
	min_dist=0.1,
	):
	if images is None:
	gr.Warning("No images selected.")
	return [], "No images selected."

	kwargs = {
	"model_name": model_name,
	"layer": layer,
	"num_eig": num_eig,
	"node_type": node_type,
	"affinity_focal_gamma": affinity_focal_gamma,
	"num_sample_ncut": num_sample_ncut,
	"knn_ncut": knn_ncut,
	"embedding_method": embedding_method,
	"num_sample_tsne": num_sample_tsne,
	"knn_tsne": knn_tsne,
	"perplexity": perplexity,
	"n_neighbors": n_neighbors,
	"min_dist": min_dist,
	}
	num_images = len(images)
	if num_images > 100:
	return super_duper_long_run(images, **kwargs)
	if num_images > 10:
	return long_run(images, **kwargs)
	if embedding_method == "UMAP":
	if perplexity >= 250 or num_sample_tsne >= 500:
	return longer_run(images, **kwargs)
	return long_run(images, **kwargs)
	if embedding_method == "t-SNE":
	if perplexity >= 250 or num_sample_tsne >= 500:
	return long_run(images, **kwargs)
	return quick_run(images, **kwargs)

	return quick_run(images, **kwargs)

	with gr.Blocks() as demo:

	with gr.Row():
	with gr.Column(scale=5, min_width=200):
	gr.Markdown('### Input Images')
	input_gallery = gr.Gallery(value=[], label="Select images", show_label=False, elem_id="images", columns=[3], rows=[1], object_fit="contain", height="auto", type="pil", show_share_button=False)
	submit_button = gr.Button("🔴RUN", elem_id="submit_button")
	clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')

	gr.Markdown('### Load Examples 👇')
	load_images_button = gr.Button("Load", elem_id="load-images-button")
	hide_button = gr.Button("Hide", elem_id="hide-button")
	example_gallery = gr.Gallery(value=example_items, label="Example Set A", show_label=False, columns=[3], rows=[2], object_fit="scale-down", height="200px", show_share_button=False, elem_id="example-gallery")

	hide_button.click(
	fn=lambda: gr.update(visible=False),
	outputs=example_gallery
	)

	with gr.Column(scale=5, min_width=200):
	gr.Markdown('### Output Images')
	output_gallery = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut", columns=[3], rows=[1], object_fit="contain", height="auto")
	model_dropdown = gr.Dropdown(["SAM(sam_vit_b)", "MobileSAM", "DiNO(dinov2_vitb14_reg)", "CLIP(openai/clip-vit-base-patch16)"], label="Model", value="SAM(sam_vit_b)", elem_id="model_name")
	layer_slider = gr.Slider(0, 11, step=1, label="Layer", value=11, elem_id="layer")
	num_eig_slider = gr.Slider(1, 1000, step=1, label="Number of eigenvectors", value=100, elem_id="num_eig", info='increase for more clusters')
	affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for shaper NCUT")

	with gr.Accordion("Additional Parameters", open=False):
	node_type_dropdown = gr.Dropdown(["attn: attention output", "mlp: mlp output", "block: sum of residual"], label="Node type", value="block: sum of residual", elem_id="node_type", info="which feature to take from each layer?")
	num_sample_ncut_slider = gr.Slider(100, 50000, step=100, label="num_sample (NCUT)", value=10000, elem_id="num_sample_ncut", info="Nyström approximation")
	knn_ncut_slider = gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="Nyström approximation")
	embedding_method_dropdown = gr.Dropdown(["t-SNE", "UMAP"], label="Embedding method", value="t-SNE", elem_id="embedding_method")
	num_sample_tsne_slider = gr.Slider(100, 1000, step=100, label="num_sample (t-SNE/UMAP)", value=300, elem_id="num_sample_tsne", info="Nyström approximation")
	knn_tsne_slider = gr.Slider(1, 100, step=1, label="KNN (t-SNE/UMAP)", value=10, elem_id="knn_tsne", info="Nyström approximation")
	perplexity_slider = gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=150, elem_id="perplexity")
	n_neighbors_slider = gr.Slider(10, 500, step=10, label="n_neighbors (UMAP)", value=150, elem_id="n_neighbors")
	min_dist_slider = gr.Slider(0.1, 1, step=0.1, label="min_dist (UMAP)", value=0.1, elem_id="min_dist")

	# logging text box
	logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")

	def load_default_images():
	return default_images, default_outputs

	def empty_input_and_output():
	return [], []

	load_images_button.click(load_default_images, outputs=[input_gallery, output_gallery])
	clear_images_button.click(empty_input_and_output, outputs=[input_gallery, output_gallery])
	submit_button.click(
	run_fn,
	inputs=[
	input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
	affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
	embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
	perplexity_slider, n_neighbors_slider, min_dist_slider
	],
	outputs=[output_gallery, logging_text]
	)


	demo.launch()