huzey commited on
Commit
5ca0c5b
1 Parent(s): 88c1ef1

update tabs

Browse files
Files changed (2) hide show
  1. app.py +284 -31
  2. requirements.txt +1 -0
app.py CHANGED
@@ -8,6 +8,7 @@ if USE_SPACES: # huggingface ZeroGPU
8
  except ImportError:
9
  USE_SPACES = False # run on standard GPU
10
 
 
11
  import gradio as gr
12
 
13
  import torch
@@ -34,6 +35,7 @@ def compute_ncut(
34
  n_neighbors=150,
35
  min_dist=0.1,
36
  sampling_method="fps",
 
37
  ):
38
  logging_str = ""
39
 
@@ -52,6 +54,7 @@ def compute_ncut(
52
  affinity_focal_gamma=affinity_focal_gamma,
53
  knn=knn_ncut,
54
  sample_method=sampling_method,
 
55
  ).fit_transform(features.reshape(-1, features.shape[-1]))
56
  # print(f"NCUT time: {time.time() - start:.2f}s")
57
  logging_str += f"NCUT time: {time.time() - start:.2f}s\n"
@@ -70,7 +73,7 @@ def compute_ncut(
70
  logging_str += f"{embedding_method} time: {time.time() - start:.2f}s\n"
71
 
72
  rgb = rgb.reshape(features.shape[:3] + (3,))
73
- return rgb, logging_str
74
 
75
 
76
  def dont_use_too_much_green(image_rgb):
@@ -90,6 +93,48 @@ def to_pil_images(images):
90
  ]
91
 
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  default_images = ['./images/image_0.jpg', './images/image_1.jpg', './images/image_2.jpg', './images/image_3.jpg', './images/image_5.jpg']
94
  default_outputs = ['./images/ncut_0.jpg', './images/ncut_1.jpg', './images/ncut_2.jpg', './images/ncut_3.jpg', './images/ncut_5.jpg']
95
  default_outputs_independent = ['./images/ncut_0_independent.jpg', './images/ncut_1_independent.jpg', './images/ncut_2_independent.jpg', './images/ncut_3_independent.jpg', './images/ncut_5_independent.jpg']
@@ -99,6 +144,8 @@ downscaled_outputs = ['./images/ncut_0_small.jpg', './images/ncut_1_small.jpg',
99
 
100
  example_items = downscaled_images[:3] + downscaled_outputs[:3]
101
 
 
 
102
  def ncut_run(
103
  images,
104
  model_name="SAM(sam_vit_b)",
@@ -116,6 +163,11 @@ def ncut_run(
116
  min_dist=0.1,
117
  sampling_method="fps",
118
  old_school_ncut=False,
 
 
 
 
 
119
  ):
120
  logging_str = ""
121
  if perplexity >= num_sample_tsne or n_neighbors >= num_sample_tsne:
@@ -137,29 +189,38 @@ def ncut_run(
137
  # print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")
138
  logging_str += f"Backbone time: {time.time() - start:.2f}s\n"
139
 
140
- if not old_school_ncut: # joint across all images
141
- rgb, _logging_str = compute_ncut(
142
- features,
143
- num_eig=num_eig,
144
- num_sample_ncut=num_sample_ncut,
145
- affinity_focal_gamma=affinity_focal_gamma,
146
- knn_ncut=knn_ncut,
147
- knn_tsne=knn_tsne,
148
- num_sample_tsne=num_sample_tsne,
149
- embedding_method=embedding_method,
150
- perplexity=perplexity,
151
- n_neighbors=n_neighbors,
152
- min_dist=min_dist,
153
- sampling_method=sampling_method,
154
- )
155
- logging_str += _logging_str
156
- rgb = dont_use_too_much_green(rgb)
 
 
 
 
 
 
 
 
 
157
  if old_school_ncut: # individual images
158
  logging_str += "Running NCut for each image independently\n"
159
  rgb = []
160
  for i_image in range(features.shape[0]):
161
  feature = features[i_image]
162
- _rgb, _logging_str = compute_ncut(
163
  feature[None],
164
  num_eig=num_eig,
165
  num_sample_ncut=num_sample_ncut,
@@ -175,7 +236,34 @@ def ncut_run(
175
  )
176
  logging_str += _logging_str
177
  rgb.append(_rgb[0])
178
- return to_pil_images(rgb), logging_str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  def _ncut_run(*args, **kwargs):
181
  try:
@@ -214,6 +302,19 @@ if not USE_SPACES:
214
  def super_duper_long_run(*args, **kwargs):
215
  return _ncut_run(*args, **kwargs)
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  def run_fn(
218
  images,
219
  model_name="SAM(sam_vit_b)",
@@ -231,11 +332,22 @@ def run_fn(
231
  min_dist=0.1,
232
  sampling_method="fps",
233
  old_school_ncut=False,
 
 
 
 
 
234
  ):
 
235
  if images is None:
236
  gr.Warning("No images selected.")
237
  return [], "No images selected."
238
 
 
 
 
 
 
239
  if sampling_method == "fps":
240
  sampling_method = "farthest"
241
 
@@ -255,7 +367,13 @@ def run_fn(
255
  "min_dist": min_dist,
256
  "sampling_method": sampling_method,
257
  "old_school_ncut": old_school_ncut,
 
 
 
 
 
258
  }
 
259
  num_images = len(images)
260
  if num_images > 100:
261
  return super_duper_long_run(images, **kwargs)
@@ -278,13 +396,21 @@ def run_fn(
278
 
279
  def make_input_images_section():
280
  gr.Markdown('### Input Images')
281
- input_gallery = gr.Gallery(value=[], label="Select images", show_label=False, elem_id="images", columns=[3], rows=[1], object_fit="contain", height="auto", type="pil", show_share_button=False)
282
  submit_button = gr.Button("🔴RUN", elem_id="submit_button")
283
  clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
284
  return input_gallery, submit_button, clear_images_button
285
 
 
 
 
 
 
 
 
 
286
  def make_example_images_section():
287
- gr.Markdown('### Load from Cloud Dataset 👇')
288
  load_images_button = gr.Button("Load Example", elem_id="load-images-button")
289
  example_gallery = gr.Gallery(value=example_items, label="Example Set A", show_label=False, columns=[3], rows=[2], object_fit="scale-down", height="200px", show_share_button=False, elem_id="example-gallery")
290
  hide_button = gr.Button("Hide Example", elem_id="hide-button")
@@ -294,24 +420,30 @@ def make_example_images_section():
294
  )
295
  return load_images_button, example_gallery, hide_button
296
 
 
 
 
 
 
297
  def make_dataset_images_section():
298
- with gr.Accordion(" Load from dataset", open=True):
299
  dataset_names = [
300
  'UCSC-VLAA/Recap-COCO-30K',
301
  'nateraw/pascal-voc-2012',
302
  'johnowhitaker/imagenette2-320',
303
  'jainr3/diffusiondb-pixelart',
 
304
  'JapanDegitalMaterial/Places_in_Japan',
305
  'Borismile/Anime-dataset',
306
  ]
307
  dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
308
  num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
309
- random_seed_slider = gr.Number(0, label="Random seed", value=42, elem_id="random_seed")
310
  load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
311
  def load_dataset_images(dataset_name, num_images=10, random_seed=42):
312
  from datasets import load_dataset
313
  try:
314
- dataset = load_dataset(dataset_name)
315
  key = list(dataset.keys())[0]
316
  dataset = dataset[key]
317
  except Exception as e:
@@ -339,9 +471,9 @@ def make_parameters_section():
339
  num_eig_slider = gr.Slider(1, 1000, step=1, label="NCUT: Number of eigenvectors", value=100, elem_id="num_eig", info='increase for more clusters')
340
  affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="NCUT: Affinity focal gamma", value=0.5, elem_id="affinity_focal_gamma", info="decrease for shaper segmentation")
341
 
342
- with gr.Accordion(" Click to expand: more parameters", open=False):
343
  num_sample_ncut_slider = gr.Slider(100, 50000, step=100, label="NCUT: num_sample", value=10000, elem_id="num_sample_ncut", info="Nyström approximation")
344
- sampling_method_dropdown = gr.Dropdown(["fps", "random"], label="NCUT: Sampling method", value="fps", elem_id="sampling_method")
345
  knn_ncut_slider = gr.Slider(1, 100, step=1, label="NCUT: KNN", value=10, elem_id="knn_ncut", info="Nyström approximation")
346
  embedding_method_dropdown = gr.Dropdown(["tsne_3d", "umap_3d", "umap_shpere", "tsne_2d", "umap_2d"], label="Coloring method", value="tsne_3d", elem_id="embedding_method")
347
  num_sample_tsne_slider = gr.Slider(100, 1000, step=100, label="t-SNE/UMAP: num_sample", value=300, elem_id="num_sample_tsne", info="Nyström approximation")
@@ -357,7 +489,7 @@ def make_parameters_section():
357
 
358
  with gr.Blocks() as demo:
359
 
360
- with gr.Tab('AlignedCut (Images)'):
361
 
362
  with gr.Row():
363
  with gr.Column(scale=5, min_width=200):
@@ -392,10 +524,15 @@ with gr.Blocks() as demo:
392
 
393
  with gr.Tab('NCut (Legacy)'):
394
  gr.Markdown('#### Ncut, not aligned, no Nyström approximation')
395
- gr.Markdown('1. Each image is solved independently, _color is not aligned across images_')
396
- gr.Markdown('2. No Nyström approximation')
397
 
398
- gr.Markdown('### NCut (Legacy) vs. AlignedCut:')
 
 
 
 
 
 
399
  with gr.Row():
400
  with gr.Column(scale=5, min_width=200):
401
  gr.Markdown('#### Pros')
@@ -451,6 +588,122 @@ with gr.Blocks() as demo:
451
  ],
452
  outputs=[output_gallery, logging_text]
453
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
  demo.launch(share=True)
455
 
456
  # %%
 
8
  except ImportError:
9
  USE_SPACES = False # run on standard GPU
10
 
11
+ import os
12
  import gradio as gr
13
 
14
  import torch
 
35
  n_neighbors=150,
36
  min_dist=0.1,
37
  sampling_method="fps",
38
+ metric="cosine",
39
  ):
40
  logging_str = ""
41
 
 
54
  affinity_focal_gamma=affinity_focal_gamma,
55
  knn=knn_ncut,
56
  sample_method=sampling_method,
57
+ distance=metric,
58
  ).fit_transform(features.reshape(-1, features.shape[-1]))
59
  # print(f"NCUT time: {time.time() - start:.2f}s")
60
  logging_str += f"NCUT time: {time.time() - start:.2f}s\n"
 
73
  logging_str += f"{embedding_method} time: {time.time() - start:.2f}s\n"
74
 
75
  rgb = rgb.reshape(features.shape[:3] + (3,))
76
+ return rgb, logging_str, eigvecs
77
 
78
 
79
  def dont_use_too_much_green(image_rgb):
 
93
  ]
94
 
95
 
96
+
97
+ def pil_images_to_video(images, output_path, fps=5):
98
+ # from pil images to numpy
99
+ images = [np.array(image) for image in images]
100
+
101
+ print("Saving video to", output_path)
102
+ import cv2
103
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
104
+ height, width, _ = images[0].shape
105
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
106
+ for image in images:
107
+ out.write(cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
108
+ out.release()
109
+ return output_path
110
+
111
+ # save up to 100 videos in disk
112
+ class VideoCache:
113
+ def __init__(self, max_videos=100):
114
+ self.max_videos = max_videos
115
+ self.videos = {}
116
+
117
+ def add_video(self, video_path):
118
+ if len(self.videos) >= self.max_videos:
119
+ pop_path = self.videos.popitem()[0]
120
+ try:
121
+ os.remove(pop_path)
122
+ except:
123
+ pass
124
+ self.videos[video_path] = video_path
125
+
126
+ def get_video(self, video_path):
127
+ return self.videos.get(video_path, None)
128
+
129
+ video_cache = VideoCache()
130
+
131
+ def get_random_path(length=10):
132
+ import random
133
+ import string
134
+ name = ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))
135
+ path = f'/tmp/{name}.mp4'
136
+ return path
137
+
138
  default_images = ['./images/image_0.jpg', './images/image_1.jpg', './images/image_2.jpg', './images/image_3.jpg', './images/image_5.jpg']
139
  default_outputs = ['./images/ncut_0.jpg', './images/ncut_1.jpg', './images/ncut_2.jpg', './images/ncut_3.jpg', './images/ncut_5.jpg']
140
  default_outputs_independent = ['./images/ncut_0_independent.jpg', './images/ncut_1_independent.jpg', './images/ncut_2_independent.jpg', './images/ncut_3_independent.jpg', './images/ncut_5_independent.jpg']
 
144
 
145
  example_items = downscaled_images[:3] + downscaled_outputs[:3]
146
 
147
+
148
+
149
  def ncut_run(
150
  images,
151
  model_name="SAM(sam_vit_b)",
 
163
  min_dist=0.1,
164
  sampling_method="fps",
165
  old_school_ncut=False,
166
+ recursion=False,
167
+ recursion_l2_n_eigs=50,
168
+ recursion_l3_n_eigs=20,
169
+ recursion_metric="euclidean",
170
+ video_output=False,
171
  ):
172
  logging_str = ""
173
  if perplexity >= num_sample_tsne or n_neighbors >= num_sample_tsne:
 
189
  # print(f"Feature extraction time (gpu): {time.time() - start:.2f}s")
190
  logging_str += f"Backbone time: {time.time() - start:.2f}s\n"
191
 
192
+ if recursion:
193
+ rgbs = []
194
+ inp = features
195
+ for i, n_eigs in enumerate([num_eig, recursion_l2_n_eigs, recursion_l3_n_eigs]):
196
+ logging_str += f"Recursion #{i+1}\n"
197
+ rgb, _logging_str, eigvecs = compute_ncut(
198
+ inp,
199
+ num_eig=n_eigs,
200
+ num_sample_ncut=num_sample_ncut,
201
+ affinity_focal_gamma=affinity_focal_gamma,
202
+ knn_ncut=knn_ncut,
203
+ knn_tsne=knn_tsne,
204
+ num_sample_tsne=num_sample_tsne,
205
+ embedding_method=embedding_method,
206
+ perplexity=perplexity,
207
+ n_neighbors=n_neighbors,
208
+ min_dist=min_dist,
209
+ sampling_method=sampling_method,
210
+ metric="cosine" if i == 0 else recursion_metric,
211
+ )
212
+ logging_str += _logging_str
213
+ rgb = dont_use_too_much_green(rgb)
214
+ rgbs.append(to_pil_images(rgb))
215
+ inp = eigvecs.reshape(*features.shape[:3], -1)
216
+ return rgbs[0], rgbs[1], rgbs[2], logging_str
217
+
218
  if old_school_ncut: # individual images
219
  logging_str += "Running NCut for each image independently\n"
220
  rgb = []
221
  for i_image in range(features.shape[0]):
222
  feature = features[i_image]
223
+ _rgb, _logging_str, _ = compute_ncut(
224
  feature[None],
225
  num_eig=num_eig,
226
  num_sample_ncut=num_sample_ncut,
 
236
  )
237
  logging_str += _logging_str
238
  rgb.append(_rgb[0])
239
+
240
+ if not old_school_ncut: # joint across all images
241
+ rgb, _logging_str, _ = compute_ncut(
242
+ features,
243
+ num_eig=num_eig,
244
+ num_sample_ncut=num_sample_ncut,
245
+ affinity_focal_gamma=affinity_focal_gamma,
246
+ knn_ncut=knn_ncut,
247
+ knn_tsne=knn_tsne,
248
+ num_sample_tsne=num_sample_tsne,
249
+ embedding_method=embedding_method,
250
+ perplexity=perplexity,
251
+ n_neighbors=n_neighbors,
252
+ min_dist=min_dist,
253
+ sampling_method=sampling_method,
254
+ )
255
+ logging_str += _logging_str
256
+
257
+ rgb = dont_use_too_much_green(rgb)
258
+
259
+
260
+ if video_output:
261
+ video_path = get_random_path()
262
+ video_cache.add_video(video_path)
263
+ pil_images_to_video(to_pil_images(rgb), video_path)
264
+ return video_path, logging_str
265
+ else:
266
+ return to_pil_images(rgb), logging_str
267
 
268
  def _ncut_run(*args, **kwargs):
269
  try:
 
302
  def super_duper_long_run(*args, **kwargs):
303
  return _ncut_run(*args, **kwargs)
304
 
305
+ def extract_video_frames(video_path, max_frames=100):
306
+ from decord import VideoReader
307
+ vr = VideoReader(video_path)
308
+ num_frames = len(vr)
309
+ if num_frames > max_frames:
310
+ gr.Warning(f"Video has {num_frames} frames. Only using {max_frames} frames. Evenly spaced.")
311
+ frame_idx = np.linspace(0, num_frames - 1, max_frames, dtype=int).tolist()
312
+ else:
313
+ frame_idx = list(range(num_frames))
314
+ frames = vr.get_batch(frame_idx).asnumpy()
315
+ # return as list of PIL images
316
+ return [(Image.fromarray(frames[i]), "") for i in range(frames.shape[0])]
317
+
318
  def run_fn(
319
  images,
320
  model_name="SAM(sam_vit_b)",
 
332
  min_dist=0.1,
333
  sampling_method="fps",
334
  old_school_ncut=False,
335
+ max_frames=100,
336
+ recursion=False,
337
+ recursion_l2_n_eigs=50,
338
+ recursion_l3_n_eigs=20,
339
+ recursion_metric="euclidean",
340
  ):
341
+ print("Running...")
342
  if images is None:
343
  gr.Warning("No images selected.")
344
  return [], "No images selected."
345
 
346
+ video_output = False
347
+ if isinstance(images, str):
348
+ images = extract_video_frames(images, max_frames=max_frames)
349
+ video_output = True
350
+
351
  if sampling_method == "fps":
352
  sampling_method = "farthest"
353
 
 
367
  "min_dist": min_dist,
368
  "sampling_method": sampling_method,
369
  "old_school_ncut": old_school_ncut,
370
+ "recursion": recursion,
371
+ "recursion_l2_n_eigs": recursion_l2_n_eigs,
372
+ "recursion_l3_n_eigs": recursion_l3_n_eigs,
373
+ "recursion_metric": recursion_metric,
374
+ "video_output": video_output,
375
  }
376
+ print(kwargs)
377
  num_images = len(images)
378
  if num_images > 100:
379
  return super_duper_long_run(images, **kwargs)
 
396
 
397
  def make_input_images_section():
398
  gr.Markdown('### Input Images')
399
+ input_gallery = gr.Gallery(value=None, label="Select images", show_label=False, elem_id="images", columns=[3], rows=[1], object_fit="contain", height="auto", type="pil", show_share_button=False)
400
  submit_button = gr.Button("🔴RUN", elem_id="submit_button")
401
  clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
402
  return input_gallery, submit_button, clear_images_button
403
 
404
+ def make_input_video_section():
405
+ gr.Markdown('### Input Video')
406
+ input_gallery = gr.Video(value=None, label="Select video", elem_id="video-input", height="auto", show_share_button=False)
407
+ max_frames_number = gr.Number(100, label="Max frames", elem_id="max_frames")
408
+ submit_button = gr.Button("🔴RUN", elem_id="submit_button")
409
+ clear_images_button = gr.Button("🗑️Clear", elem_id='clear_button')
410
+ return input_gallery, submit_button, clear_images_button, max_frames_number
411
+
412
  def make_example_images_section():
413
+ gr.Markdown('### Load Images 👇')
414
  load_images_button = gr.Button("Load Example", elem_id="load-images-button")
415
  example_gallery = gr.Gallery(value=example_items, label="Example Set A", show_label=False, columns=[3], rows=[2], object_fit="scale-down", height="200px", show_share_button=False, elem_id="example-gallery")
416
  hide_button = gr.Button("Hide Example", elem_id="hide-button")
 
420
  )
421
  return load_images_button, example_gallery, hide_button
422
 
423
+ def make_example_video_section():
424
+ gr.Markdown('### Load Video 👇')
425
+ load_video_button = gr.Button("Load Example", elem_id="load-video-button")
426
+ return load_video_button
427
+
428
  def make_dataset_images_section():
429
+ with gr.Accordion("➡️ Load from dataset", open=True):
430
  dataset_names = [
431
  'UCSC-VLAA/Recap-COCO-30K',
432
  'nateraw/pascal-voc-2012',
433
  'johnowhitaker/imagenette2-320',
434
  'jainr3/diffusiondb-pixelart',
435
+ 'nielsr/CelebA-faces',
436
  'JapanDegitalMaterial/Places_in_Japan',
437
  'Borismile/Anime-dataset',
438
  ]
439
  dataset_dropdown = gr.Dropdown(dataset_names, label="Dataset name", value="UCSC-VLAA/Recap-COCO-30K", elem_id="dataset")
440
  num_images_slider = gr.Slider(1, 200, step=1, label="Number of images", value=9, elem_id="num_images")
441
+ random_seed_slider = gr.Number(0, label="Random seed", elem_id="random_seed")
442
  load_dataset_button = gr.Button("Load Dataset", elem_id="load-dataset-button")
443
  def load_dataset_images(dataset_name, num_images=10, random_seed=42):
444
  from datasets import load_dataset
445
  try:
446
+ dataset = load_dataset(dataset_name, trust_remote_code=True)
447
  key = list(dataset.keys())[0]
448
  dataset = dataset[key]
449
  except Exception as e:
 
471
  num_eig_slider = gr.Slider(1, 1000, step=1, label="NCUT: Number of eigenvectors", value=100, elem_id="num_eig", info='increase for more clusters')
472
  affinity_focal_gamma_slider = gr.Slider(0.01, 1, step=0.01, label="NCUT: Affinity focal gamma", value=0.5, elem_id="affinity_focal_gamma", info="decrease for shaper segmentation")
473
 
474
+ with gr.Accordion("➡️ Click to expand: more parameters", open=False):
475
  num_sample_ncut_slider = gr.Slider(100, 50000, step=100, label="NCUT: num_sample", value=10000, elem_id="num_sample_ncut", info="Nyström approximation")
476
+ sampling_method_dropdown = gr.Dropdown(["fps", "random"], label="NCUT: Sampling method", value="fps", elem_id="sampling_method", info="Nyström approximation")
477
  knn_ncut_slider = gr.Slider(1, 100, step=1, label="NCUT: KNN", value=10, elem_id="knn_ncut", info="Nyström approximation")
478
  embedding_method_dropdown = gr.Dropdown(["tsne_3d", "umap_3d", "umap_shpere", "tsne_2d", "umap_2d"], label="Coloring method", value="tsne_3d", elem_id="embedding_method")
479
  num_sample_tsne_slider = gr.Slider(100, 1000, step=100, label="t-SNE/UMAP: num_sample", value=300, elem_id="num_sample_tsne", info="Nyström approximation")
 
489
 
490
  with gr.Blocks() as demo:
491
 
492
+ with gr.Tab('AlignedCut'):
493
 
494
  with gr.Row():
495
  with gr.Column(scale=5, min_width=200):
 
524
 
525
  with gr.Tab('NCut (Legacy)'):
526
  gr.Markdown('#### Ncut, not aligned, no Nyström approximation')
527
+ gr.Markdown('Each image is solved independently, _color is not aligned across images_')
 
528
 
529
+ with gr.Row():
530
+ with gr.Column(scale=3, min_width=200):
531
+ gr.Markdown('')
532
+ with gr.Column(scale=5, min_width=200):
533
+ gr.Markdown('### NCut vs. AlignedCut')
534
+ with gr.Column(scale=2, min_width=200):
535
+ gr.Markdown('')
536
  with gr.Row():
537
  with gr.Column(scale=5, min_width=200):
538
  gr.Markdown('#### Pros')
 
588
  ],
589
  outputs=[output_gallery, logging_text]
590
  )
591
+
592
+ with gr.Tab('Recursive Cut'):
593
+ gr.Markdown('NCUT can be applied recursively, the eigenvectors from previous iteration is the input for the next iteration NCUT. ')
594
+ gr.Markdown('__Recursive NCUT__ amplifies small object parts, please see [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/how_to_get_better_segmentation/#recursive-ncut)')
595
+
596
+ gr.Markdown('---')
597
+
598
+ with gr.Row():
599
+ with gr.Column(scale=5, min_width=200):
600
+ input_gallery, submit_button, clear_images_button = make_input_images_section()
601
+ dataset_dropdown, num_images_slider, random_seed_slider, load_dataset_button = make_dataset_images_section()
602
+ num_images_slider.value = 100
603
+ dataset_dropdown.value = 'nielsr/CelebA-faces'
604
+
605
+ with gr.Column(scale=5, min_width=200):
606
+ with gr.Accordion("➡️ Recursion config", open=True):
607
+ l1_num_eig_slider = gr.Slider(1, 1000, step=1, label="Recursion #1: N eigenvectors", value=100, elem_id="l1_num_eig")
608
+ l2_num_eig_slider = gr.Slider(1, 1000, step=1, label="Recursion #2: N eigenvectors", value=50, elem_id="l2_num_eig")
609
+ l3_num_eig_slider = gr.Slider(1, 1000, step=1, label="Recursion #3: N eigenvectors", value=25, elem_id="l3_num_eig")
610
+ metric_dropdown = gr.Dropdown(["euclidean", "cosine"], label="Recursion distance metric", value="cosine", elem_id="recursion_metric")
611
+
612
+ [
613
+ model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
614
+ affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
615
+ embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
616
+ perplexity_slider, n_neighbors_slider, min_dist_slider,
617
+ sampling_method_dropdown
618
+ ] = make_parameters_section()
619
+ num_eig_slider.visible = False
620
+ model_dropdown.value = 'DiNO(dinov2_vitb14_reg)'
621
+ layer_slider.value = 6
622
+ node_type_dropdown.value = 'attn: attention output'
623
+ affinity_focal_gamma_slider.value = 0.25
624
+ # logging text box
625
+ with gr.Row():
626
+ with gr.Column(scale=5, min_width=200):
627
+ gr.Markdown('### Output (Recursion #1)')
628
+ l1_gallery = gr.Gallery(value=[], label="Recursion #1", show_label=False, elem_id="ncut_l1", columns=[3], rows=[5], object_fit="contain", height="auto")
629
+ with gr.Column(scale=5, min_width=200):
630
+ gr.Markdown('### Output (Recursion #2)')
631
+ l2_gallery = gr.Gallery(value=[], label="Recursion #2", show_label=False, elem_id="ncut_l2", columns=[3], rows=[5], object_fit="contain", height="auto")
632
+ with gr.Column(scale=5, min_width=200):
633
+ gr.Markdown('### Output (Recursion #3)')
634
+ l3_gallery = gr.Gallery(value=[], label="Recursion #3", show_label=False, elem_id="ncut_l3", columns=[3], rows=[5], object_fit="contain", height="auto")
635
+ with gr.Row():
636
+ with gr.Column(scale=5, min_width=200):
637
+ gr.Markdown(' ')
638
+ with gr.Column(scale=5, min_width=200):
639
+ gr.Markdown(' ')
640
+ with gr.Column(scale=5, min_width=200):
641
+ logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
642
+ true_placeholder = gr.Checkbox(label="True placeholder", value=True, elem_id="true_placeholder")
643
+ true_placeholder.visible = False
644
+ false_placeholder = gr.Checkbox(label="False placeholder", value=False, elem_id="false_placeholder")
645
+ false_placeholder.visible = False
646
+ number_placeholder = gr.Number(0, label="Number placeholder", elem_id="number_placeholder")
647
+ number_placeholder.visible = False
648
+ clear_images_button.click(lambda x: ([], [], [], []), outputs=[input_gallery, l1_gallery, l2_gallery, l3_gallery])
649
+ submit_button.click(
650
+ run_fn,
651
+ inputs=[
652
+ input_gallery, model_dropdown, layer_slider, l1_num_eig_slider, node_type_dropdown,
653
+ affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
654
+ embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
655
+ perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
656
+ false_placeholder, number_placeholder, true_placeholder,
657
+ l2_num_eig_slider, l3_num_eig_slider, metric_dropdown,
658
+ ],
659
+ outputs=[l1_gallery, l2_gallery, l3_gallery, logging_text]
660
+ )
661
+
662
+
663
+ with gr.Tab('AlignedCut (Video)'):
664
+ with gr.Row():
665
+ with gr.Column(scale=5, min_width=200):
666
+ input_gallery, submit_button, clear_images_button, max_frame_number = make_input_video_section()
667
+ load_video_button = make_example_video_section()
668
+ with gr.Column(scale=5, min_width=200):
669
+ output_gallery = gr.Video(value=None, label="NCUT Embedding", elem_id="ncut", height="auto", show_share_button=False)
670
+ gr.Markdown('_image backbone model is used to extract features from each frame, NCUT is computed on all frames_')
671
+ [
672
+ model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
673
+ affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
674
+ embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
675
+ perplexity_slider, n_neighbors_slider, min_dist_slider,
676
+ sampling_method_dropdown
677
+ ] = make_parameters_section()
678
+ num_sample_tsne_slider.value = 1000
679
+ perplexity_slider.value = 500
680
+ n_neighbors_slider.value = 500
681
+ knn_tsne_slider.value = 20
682
+ # logging text box
683
+ logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
684
+ load_images_button.click(lambda x: (default_images, default_outputs), outputs=[input_gallery, output_gallery])
685
+ load_video_button.click(lambda x: './images/ego4d_dog.mp4', outputs=[input_gallery])
686
+ clear_images_button.click(lambda x: (None, []), outputs=[input_gallery, output_gallery])
687
+ place_holder_false = gr.Checkbox(label="Place holder", value=False, elem_id="place_holder_false")
688
+ place_holder_false.visible = False
689
+ submit_button.click(
690
+ run_fn,
691
+ inputs=[
692
+ input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
693
+ affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
694
+ embedding_method_dropdown, num_sample_tsne_slider, knn_tsne_slider,
695
+ perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown,
696
+ place_holder_false, max_frame_number
697
+ ],
698
+ outputs=[output_gallery, logging_text]
699
+ )
700
+
701
+ with gr.Tab('AlignedCut (Text)'):
702
+ gr.Markdown('=== under construction ===')
703
+ gr.Markdown('Please see the [Documentation](https://ncut-pytorch.readthedocs.io/en/latest/gallery_llama3/) for example of NCUT on text input.')
704
+ gr.Markdown('---')
705
+ gr.Markdown('![ncut](https://ncut-pytorch.readthedocs.io/en/latest/images/gallery/llama3/llama3_layer_31.jpg)')
706
+
707
  demo.launch(share=True)
708
 
709
  # %%
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  torch
2
  torchvision
3
  ncut-pytorch
 
4
  transformers
5
  datasets
6
  segment-anything @ git+https://github.com/facebookresearch/segment-anything.git
 
1
  torch
2
  torchvision
3
  ncut-pytorch
4
+ opencv-python
5
  transformers
6
  datasets
7
  segment-anything @ git+https://github.com/facebookresearch/segment-anything.git