huzey commited on
Commit
e85b6ae
1 Parent(s): a29b195

update compare

Browse files
Files changed (1) hide show
  1. app.py +94 -17
app.py CHANGED
@@ -98,7 +98,7 @@ def pil_images_to_video(images, output_path, fps=5):
98
  # from pil images to numpy
99
  images = [np.array(image) for image in images]
100
 
101
- print("Saving video to", output_path)
102
  import cv2
103
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
104
  height, width, _ = images[0].shape
@@ -338,7 +338,7 @@ def run_fn(
338
  recursion_l3_n_eigs=20,
339
  recursion_metric="euclidean",
340
  ):
341
- print("Running...")
342
  if images is None:
343
  gr.Warning("No images selected.")
344
  return [], "No images selected."
@@ -373,10 +373,12 @@ def run_fn(
373
  "recursion_metric": recursion_metric,
374
  "video_output": video_output,
375
  }
376
- print(kwargs)
377
  num_images = len(images)
378
  if num_images > 100:
379
  return super_duper_long_run(images, **kwargs)
 
 
380
  if num_images > 50:
381
  return longer_run(images, **kwargs)
382
  if old_school_ncut:
@@ -406,7 +408,9 @@ def make_input_images_section():
406
  def make_input_video_section():
407
  gr.Markdown('### Input Video')
408
  input_gallery = gr.Video(value=None, label="Select video", elem_id="video-input", height="auto", show_share_button=False)
409
- max_frames_number = gr.Number(100, label="Max frames", elem_id="max_frames")
 
 
410
  submit_button = gr.Button("🔴RUN", elem_id="submit_button")
411
  clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
412
  return input_gallery, submit_button, clear_images_button, max_frames_number
@@ -428,7 +432,7 @@ def make_example_video_section():
428
  return load_video_button
429
 
430
  def make_dataset_images_section():
431
- with gr.Accordion("➡️ Load from dataset", open=True):
432
  dataset_names = [
433
  'UCSC-VLAA/Recap-COCO-30K',
434
  'nateraw/pascal-voc-2012',
@@ -440,7 +444,8 @@ def make_dataset_images_section():
440
  ]
441
  dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
442
  num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
443
- random_seed_slider = gr.Number(0, label="Random seed", elem_id="random_seed")
 
444
  load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
445
  def load_dataset_images(dataset_name, num_images=10, random_seed=42):
446
  from datasets import load_dataset
@@ -524,8 +529,8 @@ with gr.Blocks() as demo:
524
  outputs=[output_gallery, logging_text]
525
  )
526
 
527
- with gr.Tab('NCut (Legacy)'):
528
- gr.Markdown('#### Ncut, not aligned, no Nyström approximation')
529
  gr.Markdown('Each image is solved independently, <em>color is <b>not</b> aligned across images</em>')
530
 
531
  gr.Markdown('---')
@@ -595,6 +600,10 @@ with gr.Blocks() as demo:
595
  with gr.Row():
596
  with gr.Column(scale=5, min_width=200):
597
  input_gallery, submit_button, clear_images_button = make_input_images_section()
 
 
 
 
598
  dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
599
  num_images_slider.value = 100
600
  dataset_dropdown.value = 'nielsr/CelebA-faces'
@@ -657,14 +666,13 @@ with gr.Blocks() as demo:
657
  )
658
 
659
 
660
- with gr.Tab('AlignedCut (Video)'):
661
  with gr.Row():
662
  with gr.Column(scale=5, min_width=200):
663
- input_gallery, submit_button, clear_images_button, max_frame_number = make_input_video_section()
664
  # load_video_button = make_example_video_section()
665
  with gr.Column(scale=5, min_width=200):
666
- output_gallery = gr.Video(value=None, label="NCUT Embedding", elem_id="ncut", height="auto", show_share_button=False)
667
- gr.Markdown('_image backbone model is used to extract features from each frame, NCUT is computed on all frames_')
668
  [
669
  model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
670
  affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
@@ -679,27 +687,96 @@ with gr.Blocks() as demo:
679
  # logging text box
680
  logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
681
  load_images_button.click(lambda x: (default_images, default_outputs), outputs=[input_gallery, output_gallery])
682
- # load_video_button.click(lambda x: './images/ego4d_dog.mp4', outputs=[input_gallery])
683
- clear_images_button.click(lambda x: (None, []), outputs=[input_gallery, output_gallery])
684
  place_holder_false = gr.Checkbox(label="Place holder", value=False, elem_id="place_holder_false")
685
  place_holder_false.visible = False
686
  submit_button.click(
687
  run_fn,
688
  inputs=[
689
- input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
690
  affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
691
  embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
692
  perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
693
  place_holder_false, max_frame_number
694
  ],
695
- outputs=[output_gallery, logging_text]
696
  )
697
 
698
- with gr.Tab('AlignedCut (Text)'):
699
  gr.Markdown('=== under construction ===')
700
  gr.Markdown('Please see the [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/gallery_llama3/) for example of NCUT on text input.')
701
  gr.Markdown('---')
702
  gr.Markdown('![ncut](https://ncut-pytorch.readthedocs.io/en/latest/images/gallery/llama3/llama3_layer_31.jpg)')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
703
 
704
  demo.launch(share=True)
705
 
 
98
  # from pil images to numpy
99
  images = [np.array(image) for image in images]
100
 
101
+ # print("Saving video to", output_path)
102
  import cv2
103
  fourcc = cv2.VideoWriter_fourcc(*'mp4v')
104
  height, width, _ = images[0].shape
 
338
  recursion_l3_n_eigs=20,
339
  recursion_metric="euclidean",
340
  ):
341
+ # print("Running...")
342
  if images is None:
343
  gr.Warning("No images selected.")
344
  return [], "No images selected."
 
373
  "recursion_metric": recursion_metric,
374
  "video_output": video_output,
375
  }
376
+ # print(kwargs)
377
  num_images = len(images)
378
  if num_images > 100:
379
  return super_duper_long_run(images, **kwargs)
380
+ if recursion:
381
+ return longer_run(images, **kwargs)
382
  if num_images > 50:
383
  return longer_run(images, **kwargs)
384
  if old_school_ncut:
 
408
  def make_input_video_section():
409
  gr.Markdown('### Input Video')
410
  input_gallery = gr.Video(value=None, label="Select video", elem_id="video-input", height="auto", show_share_button=False)
411
+ gr.Markdown('_image backbone model is used to extract features from each frame, NCUT is computed on all frames_')
412
+ # max_frames_number = gr.Number(100, label="Max frames", elem_id="max_frames")
413
+ max_frames_number = gr.Slider(1, 200, step=1, label="Max frames", value=100, elem_id="max_frames")
414
  submit_button = gr.Button("🔴RUN", elem_id="submit_button")
415
  clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
416
  return input_gallery, submit_button, clear_images_button, max_frames_number
 
432
  return load_video_button
433
 
434
  def make_dataset_images_section():
435
+ with gr.Accordion("➡️ Click to expand: Load from dataset", open=False):
436
  dataset_names = [
437
  'UCSC-VLAA/Recap-COCO-30K',
438
  'nateraw/pascal-voc-2012',
 
444
  ]
445
  dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
446
  num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
447
+ # random_seed_slider = gr.Number(0, label="Random seed", elem_id="random_seed")
448
+ random_seed_slider = gr.Slider(0, 1000, step=1, label="Random seed", value=1, elem_id="random_seed")
449
  load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
450
  def load_dataset_images(dataset_name, num_images=10, random_seed=42):
451
  from datasets import load_dataset
 
529
  outputs=[output_gallery, logging_text]
530
  )
531
 
532
+ with gr.Tab('NCut'):
533
+ gr.Markdown('#### NCut (Legacy), not aligned, no Nyström approximation')
534
  gr.Markdown('Each image is solved independently, <em>color is <b>not</b> aligned across images</em>')
535
 
536
  gr.Markdown('---')
 
600
  with gr.Row():
601
  with gr.Column(scale=5, min_width=200):
602
  input_gallery, submit_button, clear_images_button = make_input_images_section()
603
+ load_images_button, example_gallery, hide_button = make_example_images_section()
604
+ load_images_button.click(lambda x: default_images, outputs=[input_gallery])
605
+ example_gallery.visible = False
606
+ hide_button.visible = False
607
  dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
608
  num_images_slider.value = 100
609
  dataset_dropdown.value = 'nielsr/CelebA-faces'
 
666
  )
667
 
668
 
669
+ with gr.Tab('Video'):
670
  with gr.Row():
671
  with gr.Column(scale=5, min_width=200):
672
+ video_input_gallery, submit_button, clear_images_button, max_frame_number = make_input_video_section()
673
  # load_video_button = make_example_video_section()
674
  with gr.Column(scale=5, min_width=200):
675
+ video_output_gallery = gr.Video(value=None, label="NCUT Embedding", elem_id="ncut", height="auto", show_share_button=False)
 
676
  [
677
  model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
678
  affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
 
687
  # logging text box
688
  logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
689
  load_images_button.click(lambda x: (default_images, default_outputs), outputs=[input_gallery, output_gallery])
690
+ clear_images_button.click(lambda x: (None, []), outputs=[video_input_gallery, video_output_gallery])
 
691
  place_holder_false = gr.Checkbox(label="Place holder", value=False, elem_id="place_holder_false")
692
  place_holder_false.visible = False
693
  submit_button.click(
694
  run_fn,
695
  inputs=[
696
+ video_input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
697
  affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
698
  embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
699
  perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
700
  place_holder_false, max_frame_number
701
  ],
702
+ outputs=[video_output_gallery, logging_text]
703
  )
704
 
705
+ with gr.Tab('Text'):
706
  gr.Markdown('=== under construction ===')
707
  gr.Markdown('Please see the [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/gallery_llama3/) for example of NCUT on text input.')
708
  gr.Markdown('---')
709
  gr.Markdown('![ncut](https://ncut-pytorch.readthedocs.io/en/latest/images/gallery/llama3/llama3_layer_31.jpg)')
710
+
711
+ with gr.Tab('Compare'):
712
+
713
+ with gr.Row():
714
+ with gr.Column(scale=5, min_width=200):
715
+ input_gallery, submit_button, clear_images_button = make_input_images_section()
716
+ submit_button.visible = False
717
+ load_images_button, example_gallery, hide_button = make_example_images_section()
718
+ example_gallery.visible = False
719
+ hide_button.visible = False
720
+ dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
721
+ load_images_button.click(lambda x: default_images, outputs=input_gallery)
722
+
723
+ with gr.Column(scale=5, min_width=200):
724
+ gr.Markdown('### Output Model1')
725
+ output_gallery1 = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut1", columns=[3], rows=[1], object_fit="contain", height="auto")
726
+ submit_button1 = gr.Button("🔴RUN", elem_id="submit_button1")
727
+ [
728
+ model_dropdown1, layer_slider1, node_type_dropdown1, num_eig_slider1,
729
+ affinity_focal_gamma_slider1, num_sample_ncut_slider1, knn_ncut_slider1,
730
+ embedding_method_dropdown1, num_sample_tsne_slider1, knn_tsne_slider1,
731
+ perplexity_slider1, n_neighbors_slider1, min_dist_slider1,
732
+ sampling_method_dropdown1
733
+ ] = make_parameters_section()
734
+ model_dropdown1.value = 'DiNO(dinov2_vitb14_reg)'
735
+ layer_slider1.value = 11
736
+ node_type_dropdown1.value = 'block: sum of residual'
737
+ # logging text box
738
+ logging_text1 = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
739
+
740
+ with gr.Column(scale=5, min_width=200):
741
+ gr.Markdown('### Output Model2')
742
+ output_gallery2 = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut2", columns=[3], rows=[1], object_fit="contain", height="auto")
743
+ submit_button2 = gr.Button("🔴RUN", elem_id="submit_button2")
744
+ [
745
+ model_dropdown2, layer_slider2, node_type_dropdown2, num_eig_slider2,
746
+ affinity_focal_gamma_slider2, num_sample_ncut_slider2, knn_ncut_slider2,
747
+ embedding_method_dropdown2, num_sample_tsne_slider2, knn_tsne_slider2,
748
+ perplexity_slider2, n_neighbors_slider2, min_dist_slider2,
749
+ sampling_method_dropdown2
750
+ ] = make_parameters_section()
751
+ model_dropdown2.value = 'DiNO(dinov2_vitb14_reg)'
752
+ layer_slider2.value = 9
753
+ node_type_dropdown2.value = 'attn: attention output'
754
+ # logging text box
755
+ logging_text2 = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
756
+
757
+ clear_images_button.click(lambda x: ([], [], []), outputs=[input_gallery, output_gallery1, output_gallery2])
758
+ submit_button1.click(
759
+ run_fn,
760
+ inputs=[
761
+ input_gallery, model_dropdown1, layer_slider1, num_eig_slider1, node_type_dropdown1,
762
+ affinity_focal_gamma_slider1, num_sample_ncut_slider1, knn_ncut_slider1,
763
+ embedding_method_dropdown1, num_sample_tsne_slider1, knn_tsne_slider1,
764
+ perplexity_slider1, n_neighbors_slider1, min_dist_slider1, sampling_method_dropdown1
765
+ ],
766
+ outputs=[output_gallery1, logging_text1]
767
+ )
768
+
769
+ submit_button2.click(
770
+ run_fn,
771
+ inputs=[
772
+ input_gallery, model_dropdown2, layer_slider2, num_eig_slider2, node_type_dropdown2,
773
+ affinity_focal_gamma_slider2, num_sample_ncut_slider2, knn_ncut_slider2,
774
+ embedding_method_dropdown2, num_sample_tsne_slider2, knn_tsne_slider2,
775
+ perplexity_slider2, n_neighbors_slider2, min_dist_slider2, sampling_method_dropdown2
776
+ ],
777
+ outputs=[output_gallery2, logging_text2]
778
+ )
779
+
780
 
781
  demo.launch(share=True)
782