Spaces:

Ryukijano
/

Flash3d

Sleeping

App Files Files Community

Ryukijano commited on Oct 14, 2024

Commit

af2e32a

verified ·

1 Parent(s): 6a66177

Extend app to accept multiple images for scene reconstruction

Browse files

This update modifies the Flash3D application to accept multiple input images for reconstructing a complete 3D scene. Key changes include:

Gradio Interface Changes:

Updated the input component to accept multiple images using gr.Images.
Added a gallery to display the preprocessed images.
Model Inference Updates:

Modified the preprocess function to handle multiple images.
Updated the reconstruct_and_export function to iterate over all uploaded images for scene reconstruction.
Adjusted logic to combine or save outputs from multiple views.
User Interaction:

Users can now upload several images from different angles to create a richer reconstruction.
Added sliders for adjustable parameters: padding and number of Gaussians per pixel.
This new functionality aims to provide a more comprehensive 3D reconstruction, allowing for richer inputs from multiple perspectives and generating a better quality model.

Files changed (1) hide show

app.py +72 -56

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import torchvision.transforms as TT
 import torchvision.transforms.functional as TTF
 from huggingface_hub import hf_hub_download
 import numpy as np
 from networks.gaussian_predictor import GaussianPredictor
 from util.vis3d import save_ply
@@ -54,50 +55,63 @@ def main():
     to_tensor = TT.ToTensor()  # Convert image to tensor
     # Function to check if an image is uploaded by the user
-    def check_input_image(input_image):
-        print("[DEBUG] Checking input image...")
-        if input_image is None:
-            print("[ERROR] No image uploaded!")
-            raise gr.Error("No image uploaded!")
-        print("[INFO] Input image is valid.")
-    # Function to preprocess the input image before passing it to the model
-    def preprocess(image, padding_value):
-        print("[DEBUG] Preprocessing image...")
-        # Resize the image to the desired height and width specified in the configuration
-        image = TTF.resize(
-            image, (cfg.dataset.height, cfg.dataset.width),
-            interpolation=TT.InterpolationMode.BICUBIC
-        )
-        # Apply padding to the image
-        pad_border_fn = TT.Pad((padding_value, padding_value))
-        image = pad_border_fn(image)
-        print("[INFO] Image preprocessing complete.")
-        return image
-    # Function to reconstruct the 3D model from the input image and export it as a PLY file
     @spaces.GPU(duration=120)  # Decorator to allocate a GPU for this function during execution
-    def reconstruct_and_export(image, num_gauss):
         """
-        Passes image through model, outputs reconstruction in form of a dict of tensors.
         """
-        print("[DEBUG] Starting reconstruction and export...")
-        # Convert the preprocessed image to a tensor and move it to the specified device
-        image = to_tensor(image).to(device).unsqueeze(0)
-        inputs = {
-            ("color_aug", 0, 0): image,
-        }
-        # Pass the image through the model to get the output
-        print("[INFO] Passing image through the model...")
-        outputs = model(inputs)
         # Export the reconstruction to a PLY file
         print(f"[INFO] Saving output to {ply_out_path}...")
-        save_ply(outputs, ply_out_path, num_gauss=num_gauss)
         print("[INFO] Reconstruction and export complete.")
-        return ply_out_path
     # Path to save the output PLY file
     ply_out_path = f'./mesh.ply'
@@ -120,18 +134,20 @@ def main():
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
-                    # Input image component for the user to upload an image
-                    input_image = gr.Image(
-                        label="Input Image",
-                        image_mode="RGBA",
-                        sources="upload",
-                        type="pil",
-                        elem_id="content_image",
                     )
                 with gr.Row():
                     # Sliders for configurable parameters
-                    num_gauss = gr.Slider(minimum=1, maximum=20, step=1, label="Number of Gaussians per Pixel", value=10)
-                    padding_value = gr.Slider(minimum=0, maximum=128, step=8, label="Padding Amount for Output Processing", value=32)
                 with gr.Row():
                     # Button to trigger the generation process
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
@@ -147,35 +163,35 @@ def main():
                             './demo_examples/re10k_05.jpg',
                             './demo_examples/re10k_06.jpg',
                         ],
-                        inputs=[input_image],
                         cache_examples=False,
-                        label="Examples",
                         examples_per_page=20,
                     )
                 with gr.Row():
-                    # Display the preprocessed image (after resizing and padding)
-                    processed_image = gr.Image(label="Processed Image", interactive=False)
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Tab("Reconstruction"):
                         # 3D model viewer to display the reconstructed model
                         output_model = gr.Model3D(
-                            height=512,
                             label="Output Model",
-                            interactive=False
                         )
         # Define the workflow for the Generate button
-        submit.click(fn=check_input_image, inputs=[input_image]).success(
             fn=preprocess,
-            inputs=[input_image, padding_value],
-            outputs=[processed_image],
         ).success(
             fn=reconstruct_and_export,
-            inputs=[processed_image, num_gauss],
-            outputs=[output_model],
         )
     # Queue the requests to handle them sequentially (to avoid GPU resource conflicts)

 import torchvision.transforms.functional as TTF
 from huggingface_hub import hf_hub_download
 import numpy as np
+from einops import rearrange
 from networks.gaussian_predictor import GaussianPredictor
 from util.vis3d import save_ply
     to_tensor = TT.ToTensor()  # Convert image to tensor
     # Function to check if an image is uploaded by the user
+    def check_input_image(input_images):
+        print("[DEBUG] Checking input images...")
+        if not input_images or len(input_images) == 0:
+            print("[ERROR] No images uploaded!")
+            raise gr.Error("No images uploaded!")
+        print("[INFO] Input images are valid.")
+    # Function to preprocess the input images before passing them to the model
+    def preprocess(images, padding_value):
+        processed_images = []
+        for image in images:
+            # Resize and pad each image
+            print("[DEBUG] Preprocessing image...")
+            image = TTF.resize(image, (cfg.dataset.height, cfg.dataset.width), interpolation=TT.InterpolationMode.BICUBIC)
+            pad_border_fn = TT.Pad((padding_value, padding_value))
+            image = pad_border_fn(image)
+            print("[INFO] Image preprocessing complete.")
+            processed_images.append(image)
+        return processed_images
+    # Function to reconstruct the 3D model from the input images and export it as a PLY file
     @spaces.GPU(duration=120)  # Decorator to allocate a GPU for this function during execution
+    def reconstruct_and_export(images, num_gauss):
         """
+        Passes images through model, outputs reconstruction in form of a dict of tensors.
         """
+        outputs_list = []
+        for image in images:
+            print("[DEBUG] Starting reconstruction and export...")
+            # Convert the preprocessed image to a tensor and move it to the specified device
+            image = to_tensor(image).to(device).unsqueeze(0)  # Add a batch dimension to the image tensor
+            inputs = {
+                ("color_aug", 0, 0): image,  # The input dictionary expected by the model
+            }
+            # Pass the image through the model to get the output
+            print("[INFO] Passing image through the model...")
+            outputs = model(inputs)  # Perform inference to get model outputs
+            outputs_list.append(outputs)
+        # Combine or process outputs from multiple images here if necessary
+        # For now, we'll just save the first one for illustration
+        gauss_means = outputs_list[0][('gauss_means', 0, 0)]
+        if gauss_means.size(0) < num_gauss or gauss_means.size(0) % num_gauss != 0:
+            adjusted_num_gauss = max(1, gauss_means.size(0) // (gauss_means.size(0) // num_gauss))
+            print(f"[WARNING] Adjusting num_gauss from {num_gauss} to {adjusted_num_gauss} to avoid shape mismatch.")
+            num_gauss = adjusted_num_gauss  # Adjust num_gauss to prevent errors during tensor reshaping
+        # Debugging tensor shape
+        print(f"[DEBUG] gauss_means tensor shape: {gauss_means.shape}")
         # Export the reconstruction to a PLY file
         print(f"[INFO] Saving output to {ply_out_path}...")
+        save_ply(outputs_list[0], ply_out_path, num_gauss=num_gauss)  # Save the output 3D model to a PLY file
         print("[INFO] Reconstruction and export complete.")
+        return ply_out_path  # Return the path to the saved PLY file
     # Path to save the output PLY file
     ply_out_path = f'./mesh.ply'
         with gr.Row(variant="panel"):
             with gr.Column(scale=1):
                 with gr.Row():
+                    # Input images component for the user to upload multiple images
+                    input_images = gr.Images(
+                        label="Input Images",
+                        image_mode="RGBA",  # Accept RGBA images
+                        sources="upload",  # Allow users to upload images
+                        type="pil",  # The images are returned as PIL images
+                        elem_id="content_images",
+                        tool="editor",  # Optional, for editing images
+                        multiple=True  # Allow multiple image uploads
                     )
                 with gr.Row():
                     # Sliders for configurable parameters
+                    num_gauss = gr.Slider(minimum=1, maximum=20, step=1, label="Number of Gaussians per Pixel", value=1)  # Slider to set the number of Gaussians per pixel
+                    padding_value = gr.Slider(minimum=0, maximum=128, step=8, label="Padding Amount for Output Processing", value=32)  # Slider to set padding value
                 with gr.Row():
                     # Button to trigger the generation process
                     submit = gr.Button("Generate", elem_id="generate", variant="primary")
                             './demo_examples/re10k_05.jpg',
                             './demo_examples/re10k_06.jpg',
                         ],
+                        inputs=[input_images],  # Load the example images into the input component
                         cache_examples=False,
+                        label="Examples",  # Label for the examples section
                         examples_per_page=20,
                     )
                 with gr.Row():
+                    # Display the preprocessed images (after resizing and padding)
+                    processed_images = gr.Gallery(label="Processed Images", interactive=False)  # Output component to show the processed images
             with gr.Column(scale=2):
                 with gr.Row():
                     with gr.Tab("Reconstruction"):
                         # 3D model viewer to display the reconstructed model
                         output_model = gr.Model3D(
+                            height=512,  # Height of the 3D model viewer
                             label="Output Model",
+                            interactive=False  # The viewer is not interactive
                         )
         # Define the workflow for the Generate button
+        submit.click(fn=check_input_image, inputs=[input_images]).success(
             fn=preprocess,
+            inputs=[input_images, padding_value],  # Pass the input images and padding value to the preprocess function
+            outputs=[processed_images],  # Output the processed images
         ).success(
             fn=reconstruct_and_export,
+            inputs=[processed_images, num_gauss],  # Pass the processed images and number of Gaussians to the reconstruction function
+            outputs=[output_model],  # Output the reconstructed 3D model
         )
     # Queue the requests to handle them sequentially (to avoid GPU resource conflicts)