Lotus_Depth

App Files Files Community

ghostsInTheMachine commited on 1 day ago

Commit

3e8f9ec

•

1 Parent(s): 7d32b39

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -16

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import shutil
 import time
 import ffmpeg
 import numpy as np
-from PIL import Image
 import moviepy.editor as mp
 from infer import lotus, load_models
 import logging
@@ -23,33 +23,53 @@ task_name = 'depth'
 pipe_g, pipe_d = load_models(task_name, device)
 # Preprocess the video to adjust resolution and frame rate
-def preprocess_video(video_path, target_fps=24, max_resolution=(512, 512)):
-    """Preprocess the video to resize and adjust its frame rate."""
     video = mp.VideoFileClip(video_path)
-    # Resize video if it's larger than the target resolution
-    if video.size[0] > max_resolution[0] or video.size[1] > max_resolution[1]:
-        video = video.resize(height=max_resolution[1])
     # Adjust FPS if target_fps is specified
     if target_fps > 0:
         video = video.set_fps(target_fps)
     return video
 # Process a single frame through the depth model
 def process_frame(frame, seed=0, target_size=(512, 512)):
     """Process a single frame and return depth map."""
     try:
         torch.cuda.empty_cache()  # Clear GPU cache
-        # Resize frame to the target size
-        image = Image.fromarray(frame).convert('RGB').resize(target_size, Image.BILINEAR)
         # Run inference
-        depth_map = lotus(image, 'depth', seed, device, pipe_g, pipe_d)
-        return depth_map
     except Exception as e:
         logger.error(f"Error processing frame: {e}")
@@ -64,13 +84,13 @@ def process_video(video_path, fps=0, seed=0):
         start_time = time.time()
         # Preprocess the video
-        video = preprocess_video(video_path, target_fps=fps, max_resolution=(512, 512))
         # Use original video FPS if not specified
         if fps == 0:
             fps = video.fps
-        frames = list(video.iter_frames(fps=video.fps))
         total_frames = len(frames)
         logger.info(f"Processing {total_frames} frames at {fps} FPS...")
@@ -86,7 +106,7 @@ def process_video(video_path, fps=0, seed=0):
             if depth_map is not None:
                 # Save frame
                 frame_path = os.path.join(frames_dir, f"frame_{i:06d}.png")
-                depth_map.save(frame_path)
                 # Update live preview every 10% progress
                 if i % max(1, total_frames // 10) == 0:
@@ -109,10 +129,11 @@ def process_video(video_path, fps=0, seed=0):
         try:
             # FFmpeg settings for high-quality MP4
             (
                 ffmpeg
-                .input(os.path.join(frames_dir, 'frame_%06d.png'), pattern_type='sequence', framerate=fps)
-                .output(output_video_path, vcodec='libx264', pix_fmt='yuv420p', crf=17)
                 .run(overwrite_output=True, quiet=True)
             )
             logger.info("MP4 video created successfully!")

 import time
 import ffmpeg
 import numpy as np
+from PIL import Image, ImageOps
 import moviepy.editor as mp
 from infer import lotus, load_models
 import logging
 pipe_g, pipe_d = load_models(task_name, device)
 # Preprocess the video to adjust resolution and frame rate
+def preprocess_video(video_path, target_fps=24, max_resolution=None):
+    """Preprocess the video to adjust its frame rate."""
     video = mp.VideoFileClip(video_path)
     # Adjust FPS if target_fps is specified
     if target_fps > 0:
         video = video.set_fps(target_fps)
     return video
+# Resize image while preserving aspect ratio and adding padding
+def resize_and_pad(image, target_size):
+    """Resize and pad an image to the target size while preserving aspect ratio."""
+    # Calculate the new size preserving aspect ratio
+    image.thumbnail(target_size, Image.ANTIALIAS)
+    # Create a new image with the target size and black background
+    new_image = Image.new("RGB", target_size)
+    new_image.paste(
+        image, ((target_size[0] - image.width) // 2, (target_size[1] - image.height) // 2)
+    )
+    return new_image
 # Process a single frame through the depth model
 def process_frame(frame, seed=0, target_size=(512, 512)):
     """Process a single frame and return depth map."""
     try:
         torch.cuda.empty_cache()  # Clear GPU cache
+        # Convert frame to PIL Image
+        image = Image.fromarray(frame).convert('RGB')
+        # Resize and pad image
+        input_image = resize_and_pad(image, target_size)
         # Run inference
+        depth_map = lotus(input_image, 'depth', seed, device, pipe_g, pipe_d)
+        # Crop the output depth map back to original image size
+        width, height = image.size
+        left = (target_size[0] - width) // 2
+        top = (target_size[1] - height) // 2
+        right = left + width
+        bottom = top + height
+        depth_map_cropped = depth_map.crop((left, top, right, bottom))
+        return depth_map_cropped
     except Exception as e:
         logger.error(f"Error processing frame: {e}")
         start_time = time.time()
         # Preprocess the video
+        video = preprocess_video(video_path, target_fps=fps)
         # Use original video FPS if not specified
         if fps == 0:
             fps = video.fps
+        frames = list(video.iter_frames())
         total_frames = len(frames)
         logger.info(f"Processing {total_frames} frames at {fps} FPS...")
             if depth_map is not None:
                 # Save frame
                 frame_path = os.path.join(frames_dir, f"frame_{i:06d}.png")
+                depth_map.save(frame_path, format='PNG', compress_level=0)
                 # Update live preview every 10% progress
                 if i % max(1, total_frames // 10) == 0:
         try:
             # FFmpeg settings for high-quality MP4
+            input_pattern = os.path.join(frames_dir, 'frame_%06d.png')
             (
                 ffmpeg
+                .input(input_pattern, pattern_type='sequence', framerate=fps)
+                .output(output_video_path, vcodec='libx264', pix_fmt='yuv420p', crf=17, preset='slow')
                 .run(overwrite_output=True, quiet=True)
             )
             logger.info("MP4 video created successfully!")