pablovela5620 commited on
Commit
87df1fa
1 Parent(s): e841ccd

Upload gradio_app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. gradio_app.py +69 -85
gradio_app.py CHANGED
@@ -33,7 +33,6 @@ import numpy as np
33
  import PIL
34
  import torch
35
  from pathlib import Path
36
- from queue import SimpleQueue
37
  import trimesh
38
  import subprocess
39
 
@@ -44,13 +43,13 @@ from typing import Final, Literal
44
 
45
  from jaxtyping import Float64, Float32, UInt8
46
 
47
- from monopriors.relative_depth_models import (
48
- get_relative_predictor,
49
- )
50
 
51
  from mini_nvs_solver.custom_diffusers_pipeline.svd import StableVideoDiffusionPipeline
52
  from mini_nvs_solver.custom_diffusers_pipeline.scheduler import EulerDiscreteScheduler
53
-
 
 
54
 
55
  SVD_HEIGHT: Final[int] = 576
56
  SVD_WIDTH: Final[int] = 1024
@@ -58,8 +57,8 @@ NEAR: Final[float] = 0.0001
58
  FAR: Final[float] = 500.0
59
 
60
  if gr.NO_RELOAD:
61
- DepthAnythingV2Predictor = get_relative_predictor("DepthAnythingV2Predictor")(
62
- device="cuda"
63
  )
64
  SVD_PIPE = StableVideoDiffusionPipeline.from_pretrained(
65
  "stabilityai/stable-video-diffusion-img2vid-xt",
@@ -71,40 +70,16 @@ if gr.NO_RELOAD:
71
  SVD_PIPE.scheduler = scheduler
72
 
73
 
74
- def svd_render_threaded(
75
- image_o: PIL.Image.Image,
76
- masks: Float64[torch.Tensor, "b 72 128"],
77
- cond_image: PIL.Image.Image,
78
- lambda_ts: Float64[torch.Tensor, "n b"],
79
- num_denoise_iters: Literal[2, 25, 50, 100],
80
- weight_clamp: float,
81
- log_queue: SimpleQueue | None = None,
82
- ):
83
- frames: list[PIL.Image.Image] = SVD_PIPE(
84
- [image_o],
85
- log_queue=log_queue,
86
- temp_cond=cond_image,
87
- mask=masks,
88
- lambda_ts=lambda_ts,
89
- weight_clamp=weight_clamp,
90
- num_frames=25,
91
- decode_chunk_size=8,
92
- num_inference_steps=num_denoise_iters,
93
- ).frames[0]
94
- if log_queue is not None:
95
- log_queue.put(frames)
96
-
97
-
98
  def svd_render(
99
  image_o: PIL.Image.Image,
100
  masks: Float64[torch.Tensor, "b 72 128"],
101
  cond_image: PIL.Image.Image,
102
  lambda_ts: Float64[torch.Tensor, "n b"],
103
- num_denoise_iters: Literal[2, 25, 50, 100],
104
  weight_clamp: float,
105
- log_queue: SimpleQueue | None = None,
106
  ):
107
- frames: list[PIL.Image.Image] = SVD_PIPE(
108
  [image_o],
109
  log_queue=None,
110
  temp_cond=cond_image,
@@ -132,8 +107,12 @@ def gradio_warped_image(
132
  major_radius: float = 60.0,
133
  minor_radius: float = 70.0,
134
  num_frames: int = 25, # StableDiffusion Video generates 25 frames
135
- progress=gr.Progress(track_tqdm=True),
136
  ):
 
 
 
 
137
  # ensure that the degrees per frame is a float
138
  degrees_per_frame = float(degrees_per_frame)
139
 
@@ -181,7 +160,7 @@ def gradio_warped_image(
181
  cam_params=camera_list[0],
182
  near=NEAR,
183
  far=FAR,
184
- depth_predictor=DepthAnythingV2Predictor,
185
  )
186
 
187
  rr.log(
@@ -220,7 +199,7 @@ def gradio_warped_image(
220
  masks.append(mask_erosion_tensor)
221
 
222
  log_camera(cam_log_path, current_cam, np.asarray(warped_frame2))
223
- yield stream.read(), None, [], ""
224
 
225
  masks: Float64[torch.Tensor, "b 72 128"] = torch.cat(masks)
226
  # load sigmas to optimize for timestep
@@ -228,54 +207,56 @@ def gradio_warped_image(
228
  lambda_ts: Float64[torch.Tensor, "n b"] = load_lambda_ts(num_denoise_iters)
229
  progress(0.15, desc="Starting diffusion")
230
 
231
- # to allow logging from a separate thread
232
- # log_queue: SimpleQueue = SimpleQueue()
233
- # handle = threading.Thread(
234
- # target=svd_render_threaded,
235
- # kwargs={
236
- # "image_o": rgb_resized,
237
- # "masks": masks,
238
- # "cond_image": cond_image,
239
- # "lambda_ts": lambda_ts,
240
- # "num_denoise_iters": num_denoise_iters,
241
- # "weight_clamp": 0.2,
242
- # "log_queue": None,
243
- # },
244
  # )
245
 
246
- # handle.start()
247
- # i = 0
248
- # while True:
249
- # msg = log_queue.get()
250
- # match msg:
251
- # case frames if all(isinstance(frame, PIL.Image.Image) for frame in frames):
252
- # break
253
- # case entity_path, entity, times:
254
- # i += 1
255
- # rr.reset_time()
256
- # for timeline, time in times:
257
- # if isinstance(time, int):
258
- # rr.set_time_sequence(timeline, time)
259
- # else:
260
- # rr.set_time_seconds(timeline, time)
261
- # static = False
262
- # if entity_path == "diffusion_step":
263
- # static = True
264
- # rr.log(entity_path, entity, static=static)
265
- # yield stream.read(), None, [], f"{i} out of {num_denoise_iters}"
266
- # case _:
267
- # assert False
268
- # handle.join()
269
- frames = svd_render(
270
- image_o=rgb_resized,
271
- masks=masks,
272
- cond_image=cond_image,
273
- lambda_ts=lambda_ts,
274
- num_denoise_iters=num_denoise_iters,
275
- weight_clamp=0.2,
276
- log_queue=None,
277
  )
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  # all frames but the first one
280
  frame: np.ndarray
281
  for frame_id, (frame, cam_pararms) in enumerate(zip(frames, camera_list)):
@@ -283,20 +264,22 @@ def gradio_warped_image(
283
  rr.set_time_sequence("frame_id", frame_id)
284
  cam_log_path = parent_log_path / "generated_camera"
285
  generated_rgb_np: UInt8[np.ndarray, "h w 3"] = np.array(frame)
 
286
  log_camera(cam_log_path, cam_pararms, generated_rgb_np, depth=None)
287
- yield stream.read(), None, [], "finished"
288
 
289
  frames_to_nerfstudio(
290
  rgb_np_original, frames, trimesh_pc_original, camera_list, save_path
291
  )
292
  # zip up nerfstudio data
293
  zip_file_path = save_path / "nerfstudio.zip"
294
- progress(0.95, desc="Zipping up camera data in nerfstudio format")
295
  # Run the zip command
296
  subprocess.run(["zip", "-r", str(zip_file_path), str(save_path)], check=True)
297
  video_file_path = save_path / "output.mp4"
298
  mmcv.frames2video(str(save_path), str(video_file_path), fps=7)
299
  print(f"Video saved to {video_file_path}")
 
300
  yield stream.read(), video_file_path, [str(zip_file_path)], "finished"
301
 
302
 
@@ -328,7 +311,7 @@ with gr.Blocks() as demo:
328
  )
329
  iteration_num = gr.Textbox(
330
  value="",
331
- label="Current Diffusion Step",
332
  )
333
  with gr.Tab(label="Outputs"):
334
  video_output = gr.Video(interactive=False)
@@ -339,6 +322,7 @@ with gr.Blocks() as demo:
339
  with gr.Row():
340
  viewer = Rerun(
341
  streaming=True,
 
342
  )
343
 
344
  warp_img_btn.click(
@@ -350,7 +334,7 @@ with gr.Blocks() as demo:
350
  gr.Examples(
351
  [
352
  [
353
- "/home/pablo/0Dev/docker/.per/repos/NVS_Solver/example_imgs/single/000001.jpg",
354
  ],
355
  ],
356
  fn=warp_img_btn,
 
33
  import PIL
34
  import torch
35
  from pathlib import Path
 
36
  import trimesh
37
  import subprocess
38
 
 
43
 
44
  from jaxtyping import Float64, Float32, UInt8
45
 
46
+ from monopriors.relative_depth_models.depth_anything_v2 import DepthAnythingV2Predictor
 
 
47
 
48
  from mini_nvs_solver.custom_diffusers_pipeline.svd import StableVideoDiffusionPipeline
49
  from mini_nvs_solver.custom_diffusers_pipeline.scheduler import EulerDiscreteScheduler
50
+ from mini_nvs_solver.threaded_logging_utils import svd_render_threaded
51
+ from queue import Queue
52
+ import threading
53
 
54
  SVD_HEIGHT: Final[int] = 576
55
  SVD_WIDTH: Final[int] = 1024
 
57
  FAR: Final[float] = 500.0
58
 
59
  if gr.NO_RELOAD:
60
+ depth_predictor: DepthAnythingV2Predictor = DepthAnythingV2Predictor(
61
+ device="cuda", encoder="vitl"
62
  )
63
  SVD_PIPE = StableVideoDiffusionPipeline.from_pretrained(
64
  "stabilityai/stable-video-diffusion-img2vid-xt",
 
70
  SVD_PIPE.scheduler = scheduler
71
 
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def svd_render(
74
  image_o: PIL.Image.Image,
75
  masks: Float64[torch.Tensor, "b 72 128"],
76
  cond_image: PIL.Image.Image,
77
  lambda_ts: Float64[torch.Tensor, "n b"],
78
+ num_denoise_iters: Literal[2, 5, 25, 50, 100],
79
  weight_clamp: float,
80
+ svd_pipe: StableVideoDiffusionPipeline,
81
  ):
82
+ frames: list[PIL.Image.Image] = svd_pipe(
83
  [image_o],
84
  log_queue=None,
85
  temp_cond=cond_image,
 
107
  major_radius: float = 60.0,
108
  minor_radius: float = 70.0,
109
  num_frames: int = 25, # StableDiffusion Video generates 25 frames
110
+ progress=gr.Progress(track_tqdm=False),
111
  ):
112
+ if num_denoise_iters != 2 and IN_SPACES:
113
+ gr.Warning(
114
+ "Running on Zero, anything greater than 2 iterations may cause GPU abort due to long running time"
115
+ )
116
  # ensure that the degrees per frame is a float
117
  degrees_per_frame = float(degrees_per_frame)
118
 
 
160
  cam_params=camera_list[0],
161
  near=NEAR,
162
  far=FAR,
163
+ depth_predictor=depth_predictor,
164
  )
165
 
166
  rr.log(
 
199
  masks.append(mask_erosion_tensor)
200
 
201
  log_camera(cam_log_path, current_cam, np.asarray(warped_frame2))
202
+ yield stream.read(), None, [], "Warping images"
203
 
204
  masks: Float64[torch.Tensor, "b 72 128"] = torch.cat(masks)
205
  # load sigmas to optimize for timestep
 
207
  lambda_ts: Float64[torch.Tensor, "n b"] = load_lambda_ts(num_denoise_iters)
208
  progress(0.15, desc="Starting diffusion")
209
 
210
+ # frames: list[PIL.Image.Image] = svd_render(
211
+ # image_o=rgb_resized,
212
+ # masks=masks,
213
+ # cond_image=cond_image,
214
+ # lambda_ts=lambda_ts,
215
+ # num_denoise_iters=num_denoise_iters,
216
+ # weight_clamp=0.2,
217
+ # svd_pipe=SVD_PIPE,
 
 
 
 
 
218
  # )
219
 
220
+ # to allow logging from a separate thread
221
+ log_queue: Queue = Queue()
222
+ handle = threading.Thread(
223
+ target=svd_render_threaded,
224
+ kwargs={
225
+ "image_o": rgb_resized,
226
+ "masks": masks,
227
+ "cond_image": cond_image,
228
+ "lambda_ts": lambda_ts,
229
+ "num_denoise_iters": num_denoise_iters,
230
+ "weight_clamp": 0.2,
231
+ "svd_pipe": SVD_PIPE,
232
+ "log_queue": log_queue,
233
+ },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  )
235
 
236
+ handle.start()
237
+ i = 0
238
+ while True:
239
+ msg = log_queue.get()
240
+ match msg:
241
+ case frames if all(isinstance(frame, PIL.Image.Image) for frame in frames):
242
+ break
243
+ case entity_path, entity, times:
244
+ i += 1
245
+ rr.reset_time()
246
+ for timeline, time in times:
247
+ if isinstance(time, int):
248
+ rr.set_time_sequence(timeline, time)
249
+ else:
250
+ rr.set_time_seconds(timeline, time)
251
+ static = False
252
+ if entity_path == "latents":
253
+ static = True
254
+ rr.log(entity_path, entity, static=static)
255
+ yield stream.read(), None, [], f"{i} out of {num_denoise_iters}"
256
+ case _:
257
+ assert False
258
+ handle.join()
259
+
260
  # all frames but the first one
261
  frame: np.ndarray
262
  for frame_id, (frame, cam_pararms) in enumerate(zip(frames, camera_list)):
 
264
  rr.set_time_sequence("frame_id", frame_id)
265
  cam_log_path = parent_log_path / "generated_camera"
266
  generated_rgb_np: UInt8[np.ndarray, "h w 3"] = np.array(frame)
267
+ print(f"Logging frame {frame_id}")
268
  log_camera(cam_log_path, cam_pararms, generated_rgb_np, depth=None)
269
+ yield stream.read(), None, [], "Logging generated frames"
270
 
271
  frames_to_nerfstudio(
272
  rgb_np_original, frames, trimesh_pc_original, camera_list, save_path
273
  )
274
  # zip up nerfstudio data
275
  zip_file_path = save_path / "nerfstudio.zip"
276
+ # progress(0.95, desc="Zipping up camera data in nerfstudio format")
277
  # Run the zip command
278
  subprocess.run(["zip", "-r", str(zip_file_path), str(save_path)], check=True)
279
  video_file_path = save_path / "output.mp4"
280
  mmcv.frames2video(str(save_path), str(video_file_path), fps=7)
281
  print(f"Video saved to {video_file_path}")
282
+
283
  yield stream.read(), video_file_path, [str(zip_file_path)], "finished"
284
 
285
 
 
311
  )
312
  iteration_num = gr.Textbox(
313
  value="",
314
+ label="Status",
315
  )
316
  with gr.Tab(label="Outputs"):
317
  video_output = gr.Video(interactive=False)
 
322
  with gr.Row():
323
  viewer = Rerun(
324
  streaming=True,
325
+ height=800,
326
  )
327
 
328
  warp_img_btn.click(
 
334
  gr.Examples(
335
  [
336
  [
337
+ "examples/000001.jpg",
338
  ],
339
  ],
340
  fn=warp_img_btn,