Spaces:
Running
on
Zero
Running
on
Zero
update gpu
Browse files
app.py
CHANGED
@@ -378,13 +378,14 @@ class CLIP(torch.nn.Module):
|
|
378 |
causal_attention_mask=causal_attention_mask,
|
379 |
output_attentions=output_attentions,
|
380 |
)
|
381 |
-
|
|
|
382 |
hidden_states = residual + hidden_states
|
383 |
|
384 |
residual = hidden_states
|
385 |
hidden_states = self.layer_norm2(hidden_states)
|
386 |
hidden_states = self.mlp(hidden_states)
|
387 |
-
self.mlp_output = hidden_states.clone()
|
388 |
|
389 |
hidden_states = residual + hidden_states
|
390 |
|
@@ -393,7 +394,7 @@ class CLIP(torch.nn.Module):
|
|
393 |
if output_attentions:
|
394 |
outputs += (attn_weights,)
|
395 |
|
396 |
-
self.block_output = hidden_states.clone()
|
397 |
return outputs
|
398 |
|
399 |
setattr(self.model.vision_model.encoder.layers[0].__class__, "forward", new_forward)
|
@@ -538,10 +539,13 @@ def compute_ncut(
|
|
538 |
affinity_focal_gamma=0.3,
|
539 |
knn_ncut=10,
|
540 |
knn_tsne=10,
|
|
|
541 |
num_sample_tsne=1000,
|
542 |
perplexity=500,
|
|
|
|
|
543 |
):
|
544 |
-
from ncut_pytorch import NCUT, rgb_from_tsne_3d
|
545 |
|
546 |
start = time.time()
|
547 |
eigvecs, eigvals = NCUT(
|
@@ -554,16 +558,23 @@ def compute_ncut(
|
|
554 |
print(f"NCUT time (cpu): {time.time() - start:.2f}s")
|
555 |
|
556 |
start = time.time()
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
567 |
|
568 |
rgb = rgb.reshape(features.shape[:3] + (3,))
|
569 |
return rgb
|
@@ -585,7 +596,7 @@ def to_pil_images(images):
|
|
585 |
for image in images
|
586 |
]
|
587 |
|
588 |
-
@spaces.GPU(duration=
|
589 |
def main_fn(
|
590 |
images,
|
591 |
model_name="SAM(sam_vit_b)",
|
@@ -595,9 +606,12 @@ def main_fn(
|
|
595 |
affinity_focal_gamma=0.3,
|
596 |
num_sample_ncut=10000,
|
597 |
knn_ncut=10,
|
|
|
598 |
num_sample_tsne=1000,
|
599 |
knn_tsne=10,
|
600 |
perplexity=500,
|
|
|
|
|
601 |
):
|
602 |
if perplexity >= num_sample_tsne:
|
603 |
# raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
|
@@ -620,7 +634,10 @@ def main_fn(
|
|
620 |
knn_ncut=knn_ncut,
|
621 |
knn_tsne=knn_tsne,
|
622 |
num_sample_tsne=num_sample_tsne,
|
|
|
623 |
perplexity=perplexity,
|
|
|
|
|
624 |
)
|
625 |
rgb = dont_use_too_much_green(rgb)
|
626 |
return to_pil_images(rgb)
|
@@ -642,10 +659,12 @@ demo = gr.Interface(
|
|
642 |
gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for more aggressive cleaning on the affinity matrix"),
|
643 |
gr.Slider(100, 10000, step=100, label="num_sample (NCUT)", value=5000, elem_id="num_sample_ncut", info="for Nyström approximation"),
|
644 |
gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="for Nyström approximation"),
|
645 |
-
gr.
|
646 |
-
gr.Slider(
|
647 |
-
gr.Slider(
|
648 |
-
|
|
|
|
|
649 |
]
|
650 |
)
|
651 |
|
|
|
378 |
causal_attention_mask=causal_attention_mask,
|
379 |
output_attentions=output_attentions,
|
380 |
)
|
381 |
+
hw = np.sqrt(hidden_states.shape[1]-1).astype(int)
|
382 |
+
self.attn_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
|
383 |
hidden_states = residual + hidden_states
|
384 |
|
385 |
residual = hidden_states
|
386 |
hidden_states = self.layer_norm2(hidden_states)
|
387 |
hidden_states = self.mlp(hidden_states)
|
388 |
+
self.mlp_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
|
389 |
|
390 |
hidden_states = residual + hidden_states
|
391 |
|
|
|
394 |
if output_attentions:
|
395 |
outputs += (attn_weights,)
|
396 |
|
397 |
+
self.block_output = rearrange(hidden_states.clone()[:, 1:], "b (h w) c -> b h w c", h=hw)
|
398 |
return outputs
|
399 |
|
400 |
setattr(self.model.vision_model.encoder.layers[0].__class__, "forward", new_forward)
|
|
|
539 |
affinity_focal_gamma=0.3,
|
540 |
knn_ncut=10,
|
541 |
knn_tsne=10,
|
542 |
+
embedding_method="UMAP",
|
543 |
num_sample_tsne=1000,
|
544 |
perplexity=500,
|
545 |
+
n_neighbors=500,
|
546 |
+
min_dist=0.1,
|
547 |
):
|
548 |
+
from ncut_pytorch import NCUT, rgb_from_tsne_3d, rgb_from_umap_3d
|
549 |
|
550 |
start = time.time()
|
551 |
eigvecs, eigvals = NCUT(
|
|
|
558 |
print(f"NCUT time (cpu): {time.time() - start:.2f}s")
|
559 |
|
560 |
start = time.time()
|
561 |
+
if embedding_method == "UMAP":
|
562 |
+
rgb = rgb_from_umap_3d(
|
563 |
+
eigvecs,
|
564 |
+
n_neighbors=n_neighbors,
|
565 |
+
min_dist=min_dist,
|
566 |
+
)
|
567 |
+
print(f"UMAP time (cpu): {time.time() - start:.2f}s")
|
568 |
+
elif embedding_method == "t-SNE":
|
569 |
+
X_3d, rgb = rgb_from_tsne_3d(
|
570 |
+
eigvecs,
|
571 |
+
num_sample=num_sample_tsne,
|
572 |
+
perplexity=perplexity,
|
573 |
+
knn=knn_tsne,
|
574 |
+
)
|
575 |
+
print(f"t-SNE time (cpu): {time.time() - start:.2f}s")
|
576 |
+
else:
|
577 |
+
raise ValueError(f"Embedding method {embedding_method} not supported.")
|
578 |
|
579 |
rgb = rgb.reshape(features.shape[:3] + (3,))
|
580 |
return rgb
|
|
|
596 |
for image in images
|
597 |
]
|
598 |
|
599 |
+
@spaces.GPU(duration=30)
|
600 |
def main_fn(
|
601 |
images,
|
602 |
model_name="SAM(sam_vit_b)",
|
|
|
606 |
affinity_focal_gamma=0.3,
|
607 |
num_sample_ncut=10000,
|
608 |
knn_ncut=10,
|
609 |
+
embedding_method="UMAP",
|
610 |
num_sample_tsne=1000,
|
611 |
knn_tsne=10,
|
612 |
perplexity=500,
|
613 |
+
n_neighbors=500,
|
614 |
+
min_dist=0.1,
|
615 |
):
|
616 |
if perplexity >= num_sample_tsne:
|
617 |
# raise gr.Error("Perplexity must be less than the number of samples for t-SNE.")
|
|
|
634 |
knn_ncut=knn_ncut,
|
635 |
knn_tsne=knn_tsne,
|
636 |
num_sample_tsne=num_sample_tsne,
|
637 |
+
embedding_method=embedding_method,
|
638 |
perplexity=perplexity,
|
639 |
+
n_neighbors=n_neighbors,
|
640 |
+
min_dist=min_dist,
|
641 |
)
|
642 |
rgb = dont_use_too_much_green(rgb)
|
643 |
return to_pil_images(rgb)
|
|
|
659 |
gr.Slider(0.01, 1, step=0.01, label="Affinity focal gamma", value=0.3, elem_id="affinity_focal_gamma", info="decrease for more aggressive cleaning on the affinity matrix"),
|
660 |
gr.Slider(100, 10000, step=100, label="num_sample (NCUT)", value=5000, elem_id="num_sample_ncut", info="for Nyström approximation"),
|
661 |
gr.Slider(1, 100, step=1, label="KNN (NCUT)", value=10, elem_id="knn_ncut", info="for Nyström approximation"),
|
662 |
+
gr.Dropdown(["t-SNE", "UMAP"], label="Embedding method", value="UMAP", elem_id="embedding_method"),
|
663 |
+
gr.Slider(100, 1000, step=100, label="num_sample (t-SNE/UMAP)", value=300, elem_id="num_sample_tsne", info="for Nyström approximation. Adding will slow down quite a lot"),
|
664 |
+
gr.Slider(1, 100, step=1, label="KNN (t-SNE/UMAP)", value=10, elem_id="knn_tsne", info="for Nyström approximation"),
|
665 |
+
gr.Slider(10, 500, step=10, label="Perplexity (t-SNE)", value=150, elem_id="perplexity", info="for t-SNE"),
|
666 |
+
gr.Slider(10, 500, step=10, label="n_neighbors (UMAP)", value=150, elem_id="n_neighbors", info="for UMAP"),
|
667 |
+
gr.Slider(0.1, 1, step=0.1, label="min_dist (UMAP)", value=0.1, elem_id="min_dist", info="for UMAP"),
|
668 |
]
|
669 |
)
|
670 |
|