Lotus_Depth

App Files Files Community

ghostsInTheMachine commited on 1 day ago

Commit

afe7cc3

•

1 Parent(s): 8e07e41

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -72

app.py CHANGED Viewed

@@ -8,10 +8,8 @@ import ffmpeg
 import numpy as np
 from PIL import Image
 import moviepy.editor as mp
-from infer import lotus  # Import the depth model inference function
 import logging
-import io
-from multiprocessing import Pool, cpu_count
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -20,105 +18,98 @@ logger = logging.getLogger(__name__)
 # Set device to use the L40s GPU explicitly
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 # Preprocess the video to adjust resolution and frame rate
 def preprocess_video(video_path, target_fps=24, max_resolution=(1920, 1080)):
     """Preprocess the video to resize and adjust its frame rate."""
     video = mp.VideoFileClip(video_path)
     # Resize video if it's larger than the target resolution
     if video.size[0] > max_resolution[0] or video.size[1] > max_resolution[1]:
         video = video.resize(height=max_resolution[1])
     # Adjust FPS if target_fps is specified
     if target_fps > 0:
         video = video.set_fps(target_fps)
     return video
-# Process a single frame through the depth model
-def process_frame(args):
-    """Process a single frame through the depth model and return depth map."""
-    frame_index, frame_data, seed = args
     try:
-        # Set seeds for reproducibility
-        torch.manual_seed(seed)
-        np.random.seed(seed)
-        # Convert frame data to PIL Image
-        image = Image.fromarray(frame_data).convert('RGB')
-        # Save image to an in-memory file
-        img_bytes = io.BytesIO()
-        image.save(img_bytes, format='PNG')
-        img_bytes.seek(0)  # Reset file pointer to the beginning
-        # Process through the depth model
-        _, output_d = lotus(img_bytes, 'depth', seed, device)
-        # Convert depth output to numpy array
-        depth_array = np.array(output_d)
-        return (frame_index, depth_array)
     except Exception as e:
-        logger.error(f"Error processing frame {frame_index}: {e}")
-        return (frame_index, None)
 # Process video frames and generate depth maps
-def process_video(video_path, fps=0, seed=0, num_workers=4):
-    """Process video frames in parallel and generate depth maps."""
     # Create a persistent temporary directory
     temp_dir = tempfile.mkdtemp()
     try:
         start_time = time.time()
         # Preprocess the video
         video = preprocess_video(video_path, target_fps=fps)
         # Use original video FPS if not specified
         if fps == 0:
             fps = video.fps
         frames = list(video.iter_frames(fps=video.fps))
         total_frames = len(frames)
         logger.info(f"Processing {total_frames} frames at {fps} FPS...")
         # Create directory for frame sequence and outputs
         frames_dir = os.path.join(temp_dir, "frames")
         os.makedirs(frames_dir, exist_ok=True)
-        # Prepare arguments for multiprocessing
-        args_list = [(i, frames[i], seed) for i in range(total_frames)]
-        # Use multiprocessing Pool to process frames in parallel
-        with Pool(processes=num_workers) as pool:
-            results = []
-            for result in pool.imap_unordered(process_frame, args_list):
-                frame_index, depth_map = result
                 if depth_map is not None:
                     # Save frame
                     frame_path = os.path.join(frames_dir, f"frame_{frame_index:06d}.png")
-                    Image.fromarray(depth_map.squeeze()).save(frame_path)
-                    # Update preview every 10% progress
-                    if (frame_index + 1) % max(1, total_frames // 10) == 0:
                         elapsed_time = time.time() - start_time
-                        progress = ((frame_index + 1) / total_frames) * 100
-                        yield depth_map, None, None, f"Processed {frame_index + 1}/{total_frames} frames... ({progress:.2f}%) Elapsed: {elapsed_time:.2f}s"
                 else:
-                    logger.error(f"Frame {frame_index} failed to process.")
         logger.info("Creating output files...")
         # Create ZIP of frame sequence
         zip_filename = f"depth_frames_{int(time.time())}.zip"
         zip_path = os.path.join(temp_dir, zip_filename)
         shutil.make_archive(zip_path[:-4], 'zip', frames_dir)
         # Create MP4 video
         video_filename = f"depth_video_{int(time.time())}.mp4"
         output_video_path = os.path.join(temp_dir, video_filename)
         try:
             # FFmpeg settings for high-quality MP4
             (
@@ -128,31 +119,29 @@ def process_video(video_path, fps=0, seed=0, num_workers=4):
                 .run(overwrite_output=True, quiet=True)
             )
             logger.info("MP4 video created successfully!")
         except ffmpeg.Error as e:
             logger.error(f"Error creating video: {e.stderr.decode() if e.stderr else str(e)}")
             output_video_path = None
         total_time = time.time() - start_time
         logger.info("Processing complete!")
         # Yield the file paths
         yield None, zip_path, output_video_path, f"Processing complete! Total time: {total_time:.2f} seconds"
     except Exception as e:
         logger.error(f"Error: {e}")
         yield None, None, None, f"Error processing video: {e}"
-    # Do not delete temp_dir here; we need the files to persist
-    # Cleanup can be handled elsewhere if necessary
 # Wrapper function with error handling
-def process_wrapper(video, fps=0, seed=0, num_workers=4):
     if video is None:
         raise gr.Error("Please upload a video.")
     try:
         outputs = []
         # Use video directly, since it's the file path
-        for output in process_video(video, fps, seed, num_workers):
             outputs.append(output)
             yield output
         return outputs[-1]
@@ -161,7 +150,33 @@ def process_wrapper(video, fps=0, seed=0, num_workers=4):
 # Custom CSS for styling (unchanged)
 custom_css = """
-    /* Your existing custom CSS */
 """
 # Gradio Interface
@@ -171,24 +186,24 @@ with gr.Blocks(css=custom_css) as demo:
             <div id="title">Video Depth Estimation</div>
         </div>
     ''')
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(label="Upload Video", interactive=True)
             fps_slider = gr.Slider(minimum=0, maximum=60, step=1, value=0, label="Output FPS (0 for original)")
             seed_slider = gr.Number(value=0, label="Seed")
-            num_workers_slider = gr.Slider(minimum=1, maximum=cpu_count(), step=1, value=4, label="Number of Workers")
             btn = gr.Button("Process Video")
         with gr.Column():
             preview_image = gr.Image(label="Live Preview")
             output_frames_zip = gr.File(label="Download Frame Sequence (ZIP)")
             output_video = gr.File(label="Download Video (MP4)")
             time_textbox = gr.Textbox(label="Status", interactive=False)
     btn.click(
         fn=process_wrapper,
-        inputs=[video_input, fps_slider, seed_slider, num_workers_slider],
         outputs=[preview_image, output_frames_zip, output_video, time_textbox]
     )

 import numpy as np
 from PIL import Image
 import moviepy.editor as mp
+from infer import lotus, load_models
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 # Set device to use the L40s GPU explicitly
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# Load models once
+task_name = 'depth'
+pipe_g, pipe_d = load_models(task_name, device)
 # Preprocess the video to adjust resolution and frame rate
 def preprocess_video(video_path, target_fps=24, max_resolution=(1920, 1080)):
     """Preprocess the video to resize and adjust its frame rate."""
     video = mp.VideoFileClip(video_path)
     # Resize video if it's larger than the target resolution
     if video.size[0] > max_resolution[0] or video.size[1] > max_resolution[1]:
         video = video.resize(height=max_resolution[1])
     # Adjust FPS if target_fps is specified
     if target_fps > 0:
         video = video.set_fps(target_fps)
     return video
+# Process a batch of frames through the depth model
+def process_frames_batch(frames_batch, seed=0):
+    """Process a batch of frames and return depth maps."""
     try:
+        # Convert frames to PIL Images
+        images_batch = [Image.fromarray(frame).convert('RGB') for frame in frames_batch]
+        # Run batch inference
+        depth_maps = lotus(images_batch, 'depth', seed, device, pipe_g, pipe_d)
+        return depth_maps
     except Exception as e:
+        logger.error(f"Error processing batch: {e}")
+        return [None] * len(frames_batch)
 # Process video frames and generate depth maps
+def process_video(video_path, fps=0, seed=0, batch_size=16):
+    """Process video frames in batches and generate depth maps."""
     # Create a persistent temporary directory
     temp_dir = tempfile.mkdtemp()
     try:
         start_time = time.time()
         # Preprocess the video
         video = preprocess_video(video_path, target_fps=fps)
         # Use original video FPS if not specified
         if fps == 0:
             fps = video.fps
         frames = list(video.iter_frames(fps=video.fps))
         total_frames = len(frames)
         logger.info(f"Processing {total_frames} frames at {fps} FPS...")
         # Create directory for frame sequence and outputs
         frames_dir = os.path.join(temp_dir, "frames")
         os.makedirs(frames_dir, exist_ok=True)
+        processed_frames = []
+        # Process frames in batches
+        for i in range(0, total_frames, batch_size):
+            frames_batch = frames[i:i+batch_size]
+            depth_maps = process_frames_batch(frames_batch, seed)
+            for j, depth_map in enumerate(depth_maps):
+                frame_index = i + j
                 if depth_map is not None:
                     # Save frame
                     frame_path = os.path.join(frames_dir, f"frame_{frame_index:06d}.png")
+                    depth_map.save(frame_path)
+                    # Update live preview every 10% progress
+                    if frame_index % max(1, total_frames // 10) == 0:
                         elapsed_time = time.time() - start_time
+                        progress = (frame_index / total_frames) * 100
+                        yield depth_map, None, None, f"Processed {frame_index}/{total_frames} frames... ({progress:.2f}%) Elapsed: {elapsed_time:.2f}s"
                 else:
+                    logger.error(f"Error processing frame {frame_index}")
         logger.info("Creating output files...")
         # Create ZIP of frame sequence
         zip_filename = f"depth_frames_{int(time.time())}.zip"
         zip_path = os.path.join(temp_dir, zip_filename)
         shutil.make_archive(zip_path[:-4], 'zip', frames_dir)
         # Create MP4 video
         video_filename = f"depth_video_{int(time.time())}.mp4"
         output_video_path = os.path.join(temp_dir, video_filename)
         try:
             # FFmpeg settings for high-quality MP4
             (
                 .run(overwrite_output=True, quiet=True)
             )
             logger.info("MP4 video created successfully!")
         except ffmpeg.Error as e:
             logger.error(f"Error creating video: {e.stderr.decode() if e.stderr else str(e)}")
             output_video_path = None
         total_time = time.time() - start_time
         logger.info("Processing complete!")
         # Yield the file paths
         yield None, zip_path, output_video_path, f"Processing complete! Total time: {total_time:.2f} seconds"
     except Exception as e:
         logger.error(f"Error: {e}")
         yield None, None, None, f"Error processing video: {e}"
 # Wrapper function with error handling
+def process_wrapper(video, fps=0, seed=0, batch_size=16):
     if video is None:
         raise gr.Error("Please upload a video.")
     try:
         outputs = []
         # Use video directly, since it's the file path
+        for output in process_video(video, fps, seed, batch_size):
             outputs.append(output)
             yield output
         return outputs[-1]
 # Custom CSS for styling (unchanged)
 custom_css = """
+    .title-container {
+        text-align: center;
+        padding: 10px 0;
+    }
+    #title {
+        font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+        font-size: 36px;
+        font-weight: bold;
+        color: #000000;
+        padding: 10px;
+        border-radius: 10px;
+        display: inline-block;
+        background: linear-gradient(
+            135deg,
+            #e0f7fa, #e8f5e9, #fff9c4, #ffebee,
+            #f3e5f5, #e1f5fe, #fff3e0, #e8eaf6
+        );
+        background-size: 400% 400%;
+        animation: gradient-animation 15s ease infinite;
+    }
+    @keyframes gradient-animation {
+        0% { background-position: 0% 50%; }
+        50% { background-position: 100% 50%; }
+        100% { background-position: 0% 50%; }
+    }
 """
 # Gradio Interface
             <div id="title">Video Depth Estimation</div>
         </div>
     ''')
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(label="Upload Video", interactive=True)
             fps_slider = gr.Slider(minimum=0, maximum=60, step=1, value=0, label="Output FPS (0 for original)")
             seed_slider = gr.Number(value=0, label="Seed")
+            batch_size_slider = gr.Slider(minimum=1, maximum=64, step=1, value=16, label="Batch Size")
             btn = gr.Button("Process Video")
         with gr.Column():
             preview_image = gr.Image(label="Live Preview")
             output_frames_zip = gr.File(label="Download Frame Sequence (ZIP)")
             output_video = gr.File(label="Download Video (MP4)")
             time_textbox = gr.Textbox(label="Status", interactive=False)
     btn.click(
         fn=process_wrapper,
+        inputs=[video_input, fps_slider, seed_slider, batch_size_slider],
         outputs=[preview_image, output_frames_zip, output_video, time_textbox]
     )