Spaces:
Running
on
Zero
Running
on
Zero
update compare
Browse files
app.py
CHANGED
@@ -98,7 +98,7 @@ def pil_images_to_video(images, output_path, fps=5):
|
|
98 |
# from pil images to numpy
|
99 |
images = [np.array(image) for image in images]
|
100 |
|
101 |
-
print("Saving video to", output_path)
|
102 |
import cv2
|
103 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
104 |
height, width, _ = images[0].shape
|
@@ -338,7 +338,7 @@ def run_fn(
|
|
338 |
recursion_l3_n_eigs=20,
|
339 |
recursion_metric="euclidean",
|
340 |
):
|
341 |
-
print("Running...")
|
342 |
if images is None:
|
343 |
gr.Warning("No images selected.")
|
344 |
return [], "No images selected."
|
@@ -373,10 +373,12 @@ def run_fn(
|
|
373 |
"recursion_metric": recursion_metric,
|
374 |
"video_output": video_output,
|
375 |
}
|
376 |
-
print(kwargs)
|
377 |
num_images = len(images)
|
378 |
if num_images > 100:
|
379 |
return super_duper_long_run(images, **kwargs)
|
|
|
|
|
380 |
if num_images > 50:
|
381 |
return longer_run(images, **kwargs)
|
382 |
if old_school_ncut:
|
@@ -406,7 +408,9 @@ def make_input_images_section():
|
|
406 |
def make_input_video_section():
|
407 |
gr.Markdown('### Input Video')
|
408 |
input_gallery = gr.Video(value=None, label="Select video", elem_id="video-input", height="auto", show_share_button=False)
|
409 |
-
|
|
|
|
|
410 |
submit_button = gr.Button("🔴RUN", elem_id="submit_button")
|
411 |
clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
|
412 |
return input_gallery, submit_button, clear_images_button, max_frames_number
|
@@ -428,7 +432,7 @@ def make_example_video_section():
|
|
428 |
return load_video_button
|
429 |
|
430 |
def make_dataset_images_section():
|
431 |
-
with gr.Accordion("➡️ Load from dataset", open=
|
432 |
dataset_names = [
|
433 |
'UCSC-VLAA/Recap-COCO-30K',
|
434 |
'nateraw/pascal-voc-2012',
|
@@ -440,7 +444,8 @@ def make_dataset_images_section():
|
|
440 |
]
|
441 |
dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
|
442 |
num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
|
443 |
-
random_seed_slider = gr.Number(0, label="Random seed", elem_id="random_seed")
|
|
|
444 |
load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
|
445 |
def load_dataset_images(dataset_name, num_images=10, random_seed=42):
|
446 |
from datasets import load_dataset
|
@@ -524,8 +529,8 @@ with gr.Blocks() as demo:
|
|
524 |
outputs=[output_gallery, logging_text]
|
525 |
)
|
526 |
|
527 |
-
with gr.Tab('NCut
|
528 |
-
gr.Markdown('####
|
529 |
gr.Markdown('Each image is solved independently, <em>color is <b>not</b> aligned across images</em>')
|
530 |
|
531 |
gr.Markdown('---')
|
@@ -595,6 +600,10 @@ with gr.Blocks() as demo:
|
|
595 |
with gr.Row():
|
596 |
with gr.Column(scale=5, min_width=200):
|
597 |
input_gallery, submit_button, clear_images_button = make_input_images_section()
|
|
|
|
|
|
|
|
|
598 |
dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
|
599 |
num_images_slider.value = 100
|
600 |
dataset_dropdown.value = 'nielsr/CelebA-faces'
|
@@ -657,14 +666,13 @@ with gr.Blocks() as demo:
|
|
657 |
)
|
658 |
|
659 |
|
660 |
-
with gr.Tab('
|
661 |
with gr.Row():
|
662 |
with gr.Column(scale=5, min_width=200):
|
663 |
-
|
664 |
# load_video_button = make_example_video_section()
|
665 |
with gr.Column(scale=5, min_width=200):
|
666 |
-
|
667 |
-
gr.Markdown('_image backbone model is used to extract features from each frame, NCUT is computed on all frames_')
|
668 |
[
|
669 |
model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
|
670 |
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
|
@@ -679,27 +687,96 @@ with gr.Blocks() as demo:
|
|
679 |
# logging text box
|
680 |
logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
|
681 |
load_images_button.click(lambda x: (default_images, default_outputs), outputs=[input_gallery, output_gallery])
|
682 |
-
|
683 |
-
clear_images_button.click(lambda x: (None, []), outputs=[input_gallery, output_gallery])
|
684 |
place_holder_false = gr.Checkbox(label="Place holder", value=False, elem_id="place_holder_false")
|
685 |
place_holder_false.visible = False
|
686 |
submit_button.click(
|
687 |
run_fn,
|
688 |
inputs=[
|
689 |
-
|
690 |
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
|
691 |
embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
|
692 |
perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
|
693 |
place_holder_false, max_frame_number
|
694 |
],
|
695 |
-
outputs=[
|
696 |
)
|
697 |
|
698 |
-
with gr.Tab('
|
699 |
gr.Markdown('=== under construction ===')
|
700 |
gr.Markdown('Please see the [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/gallery_llama3/) for example of NCUT on text input.')
|
701 |
gr.Markdown('---')
|
702 |
gr.Markdown('![ncut](https://ncut-pytorch.readthedocs.io/en/latest/images/gallery/llama3/llama3_layer_31.jpg)')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
703 |
|
704 |
demo.launch(share=True)
|
705 |
|
|
|
98 |
# from pil images to numpy
|
99 |
images = [np.array(image) for image in images]
|
100 |
|
101 |
+
# print("Saving video to", output_path)
|
102 |
import cv2
|
103 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
104 |
height, width, _ = images[0].shape
|
|
|
338 |
recursion_l3_n_eigs=20,
|
339 |
recursion_metric="euclidean",
|
340 |
):
|
341 |
+
# print("Running...")
|
342 |
if images is None:
|
343 |
gr.Warning("No images selected.")
|
344 |
return [], "No images selected."
|
|
|
373 |
"recursion_metric": recursion_metric,
|
374 |
"video_output": video_output,
|
375 |
}
|
376 |
+
# print(kwargs)
|
377 |
num_images = len(images)
|
378 |
if num_images > 100:
|
379 |
return super_duper_long_run(images, **kwargs)
|
380 |
+
if recursion:
|
381 |
+
return longer_run(images, **kwargs)
|
382 |
if num_images > 50:
|
383 |
return longer_run(images, **kwargs)
|
384 |
if old_school_ncut:
|
|
|
408 |
def make_input_video_section():
|
409 |
gr.Markdown('### Input Video')
|
410 |
input_gallery = gr.Video(value=None, label="Select video", elem_id="video-input", height="auto", show_share_button=False)
|
411 |
+
gr.Markdown('_image backbone model is used to extract features from each frame, NCUT is computed on all frames_')
|
412 |
+
# max_frames_number = gr.Number(100, label="Max frames", elem_id="max_frames")
|
413 |
+
max_frames_number = gr.Slider(1, 200, step=1, label="Max frames", value=100, elem_id="max_frames")
|
414 |
submit_button = gr.Button("🔴RUN", elem_id="submit_button")
|
415 |
clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
|
416 |
return input_gallery, submit_button, clear_images_button, max_frames_number
|
|
|
432 |
return load_video_button
|
433 |
|
434 |
def make_dataset_images_section():
|
435 |
+
with gr.Accordion("➡️ Click to expand: Load from dataset", open=False):
|
436 |
dataset_names = [
|
437 |
'UCSC-VLAA/Recap-COCO-30K',
|
438 |
'nateraw/pascal-voc-2012',
|
|
|
444 |
]
|
445 |
dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
|
446 |
num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
|
447 |
+
# random_seed_slider = gr.Number(0, label="Random seed", elem_id="random_seed")
|
448 |
+
random_seed_slider = gr.Slider(0, 1000, step=1, label="Random seed", value=1, elem_id="random_seed")
|
449 |
load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
|
450 |
def load_dataset_images(dataset_name, num_images=10, random_seed=42):
|
451 |
from datasets import load_dataset
|
|
|
529 |
outputs=[output_gallery, logging_text]
|
530 |
)
|
531 |
|
532 |
+
with gr.Tab('NCut'):
|
533 |
+
gr.Markdown('#### NCut (Legacy), not aligned, no Nyström approximation')
|
534 |
gr.Markdown('Each image is solved independently, <em>color is <b>not</b> aligned across images</em>')
|
535 |
|
536 |
gr.Markdown('---')
|
|
|
600 |
with gr.Row():
|
601 |
with gr.Column(scale=5, min_width=200):
|
602 |
input_gallery, submit_button, clear_images_button = make_input_images_section()
|
603 |
+
load_images_button, example_gallery, hide_button = make_example_images_section()
|
604 |
+
load_images_button.click(lambda x: default_images, outputs=[input_gallery])
|
605 |
+
example_gallery.visible = False
|
606 |
+
hide_button.visible = False
|
607 |
dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
|
608 |
num_images_slider.value = 100
|
609 |
dataset_dropdown.value = 'nielsr/CelebA-faces'
|
|
|
666 |
)
|
667 |
|
668 |
|
669 |
+
with gr.Tab('Video'):
|
670 |
with gr.Row():
|
671 |
with gr.Column(scale=5, min_width=200):
|
672 |
+
video_input_gallery, submit_button, clear_images_button, max_frame_number = make_input_video_section()
|
673 |
# load_video_button = make_example_video_section()
|
674 |
with gr.Column(scale=5, min_width=200):
|
675 |
+
video_output_gallery = gr.Video(value=None, label="NCUT Embedding", elem_id="ncut", height="auto", show_share_button=False)
|
|
|
676 |
[
|
677 |
model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
|
678 |
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
|
|
|
687 |
# logging text box
|
688 |
logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
|
689 |
load_images_button.click(lambda x: (default_images, default_outputs), outputs=[input_gallery, output_gallery])
|
690 |
+
clear_images_button.click(lambda x: (None, []), outputs=[video_input_gallery, video_output_gallery])
|
|
|
691 |
place_holder_false = gr.Checkbox(label="Place holder", value=False, elem_id="place_holder_false")
|
692 |
place_holder_false.visible = False
|
693 |
submit_button.click(
|
694 |
run_fn,
|
695 |
inputs=[
|
696 |
+
video_input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
|
697 |
affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
|
698 |
embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
|
699 |
perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
|
700 |
place_holder_false, max_frame_number
|
701 |
],
|
702 |
+
outputs=[video_output_gallery, logging_text]
|
703 |
)
|
704 |
|
705 |
+
with gr.Tab('Text'):
|
706 |
gr.Markdown('=== under construction ===')
|
707 |
gr.Markdown('Please see the [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/gallery_llama3/) for example of NCUT on text input.')
|
708 |
gr.Markdown('---')
|
709 |
gr.Markdown('![ncut](https://ncut-pytorch.readthedocs.io/en/latest/images/gallery/llama3/llama3_layer_31.jpg)')
|
710 |
+
|
711 |
+
with gr.Tab('Compare'):
|
712 |
+
|
713 |
+
with gr.Row():
|
714 |
+
with gr.Column(scale=5, min_width=200):
|
715 |
+
input_gallery, submit_button, clear_images_button = make_input_images_section()
|
716 |
+
submit_button.visible = False
|
717 |
+
load_images_button, example_gallery, hide_button = make_example_images_section()
|
718 |
+
example_gallery.visible = False
|
719 |
+
hide_button.visible = False
|
720 |
+
dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
|
721 |
+
load_images_button.click(lambda x: default_images, outputs=input_gallery)
|
722 |
+
|
723 |
+
with gr.Column(scale=5, min_width=200):
|
724 |
+
gr.Markdown('### Output Model1')
|
725 |
+
output_gallery1 = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut1", columns=[3], rows=[1], object_fit="contain", height="auto")
|
726 |
+
submit_button1 = gr.Button("🔴RUN", elem_id="submit_button1")
|
727 |
+
[
|
728 |
+
model_dropdown1, layer_slider1, node_type_dropdown1, num_eig_slider1,
|
729 |
+
affinity_focal_gamma_slider1, num_sample_ncut_slider1, knn_ncut_slider1,
|
730 |
+
embedding_method_dropdown1, num_sample_tsne_slider1, knn_tsne_slider1,
|
731 |
+
perplexity_slider1, n_neighbors_slider1, min_dist_slider1,
|
732 |
+
sampling_method_dropdown1
|
733 |
+
] = make_parameters_section()
|
734 |
+
model_dropdown1.value = 'DiNO(dinov2_vitb14_reg)'
|
735 |
+
layer_slider1.value = 11
|
736 |
+
node_type_dropdown1.value = 'block: sum of residual'
|
737 |
+
# logging text box
|
738 |
+
logging_text1 = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
|
739 |
+
|
740 |
+
with gr.Column(scale=5, min_width=200):
|
741 |
+
gr.Markdown('### Output Model2')
|
742 |
+
output_gallery2 = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut2", columns=[3], rows=[1], object_fit="contain", height="auto")
|
743 |
+
submit_button2 = gr.Button("🔴RUN", elem_id="submit_button2")
|
744 |
+
[
|
745 |
+
model_dropdown2, layer_slider2, node_type_dropdown2, num_eig_slider2,
|
746 |
+
affinity_focal_gamma_slider2, num_sample_ncut_slider2, knn_ncut_slider2,
|
747 |
+
embedding_method_dropdown2, num_sample_tsne_slider2, knn_tsne_slider2,
|
748 |
+
perplexity_slider2, n_neighbors_slider2, min_dist_slider2,
|
749 |
+
sampling_method_dropdown2
|
750 |
+
] = make_parameters_section()
|
751 |
+
model_dropdown2.value = 'DiNO(dinov2_vitb14_reg)'
|
752 |
+
layer_slider2.value = 9
|
753 |
+
node_type_dropdown2.value = 'attn: attention output'
|
754 |
+
# logging text box
|
755 |
+
logging_text2 = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
|
756 |
+
|
757 |
+
clear_images_button.click(lambda x: ([], [], []), outputs=[input_gallery, output_gallery1, output_gallery2])
|
758 |
+
submit_button1.click(
|
759 |
+
run_fn,
|
760 |
+
inputs=[
|
761 |
+
input_gallery, model_dropdown1, layer_slider1, num_eig_slider1, node_type_dropdown1,
|
762 |
+
affinity_focal_gamma_slider1, num_sample_ncut_slider1, knn_ncut_slider1,
|
763 |
+
embedding_method_dropdown1, num_sample_tsne_slider1, knn_tsne_slider1,
|
764 |
+
perplexity_slider1, n_neighbors_slider1, min_dist_slider1, sampling_method_dropdown1
|
765 |
+
],
|
766 |
+
outputs=[output_gallery1, logging_text1]
|
767 |
+
)
|
768 |
+
|
769 |
+
submit_button2.click(
|
770 |
+
run_fn,
|
771 |
+
inputs=[
|
772 |
+
input_gallery, model_dropdown2, layer_slider2, num_eig_slider2, node_type_dropdown2,
|
773 |
+
affinity_focal_gamma_slider2, num_sample_ncut_slider2, knn_ncut_slider2,
|
774 |
+
embedding_method_dropdown2, num_sample_tsne_slider2, knn_tsne_slider2,
|
775 |
+
perplexity_slider2, n_neighbors_slider2, min_dist_slider2, sampling_method_dropdown2
|
776 |
+
],
|
777 |
+
outputs=[output_gallery2, logging_text2]
|
778 |
+
)
|
779 |
+
|
780 |
|
781 |
demo.launch(share=True)
|
782 |
|