Spaces:

szymanowiczs
/

splatter_image

Running on Zero

App Files Files Community

Stanislaw Szymanowicz commited on Apr 9, 2024

Commit

e10da38

1 Parent(s): 4aa5114

Add model and app file

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +178 -4
model_file/objaverse/.hydra/config.yaml +66 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ */__pycache__

app.py CHANGED Viewed

@@ -1,7 +1,181 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-iface.launch()

+import torch
+import torchvision
+import numpy as np
+import os
+from omegaconf import OmegaConf
+from PIL import Image
+from utils.app_utils import (
+    remove_background,
+    resize_foreground,
+    set_white_background,
+    resize_to_128,
+    to_tensor,
+    get_source_camera_v2w_rmo_and_quats,
+    get_target_cameras,
+    export_to_obj)
+import imageio
+from scene.gaussian_predictor import GaussianSplatPredictor
+from gaussian_renderer import render_predicted
 import gradio as gr
+import rembg
+def main():
+    # ============= model loading ==========
+    def load_model(device):
+        experiment_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
+                                       "model_file", "objaverse")
+        # load cfg
+        training_cfg = OmegaConf.load(os.path.join(experiment_path, ".hydra", "config.yaml"))
+        # load model
+        model = GaussianSplatPredictor(training_cfg)
+        ckpt_loaded = torch.load(os.path.join(experiment_path, "model_latest.pth"), map_location=device)
+        model.load_state_dict(ckpt_loaded["model_state_dict"])
+        return model, training_cfg
+    if torch.cuda.is_available():
+        device = "cuda:0"
+    else:
+        device = "cpu"
+    torch.cuda.set_device(device)
+    model, model_cfg = load_model(device)
+    model.to(device)
+    # ============= image preprocessing =============
+    rembg_session = rembg.new_session()
+    def check_input_image(input_image):
+        if input_image is None:
+            raise gr.Error("No image uploaded!")
+    def preprocess(input_image, preprocess_background=True, foreground_ratio=0.65):
+        # 0.7 seems to be a reasonable foreground ratio
+        if preprocess_background:
+            image = input_image.convert("RGB")
+            image = remove_background(image, rembg_session)
+            image = resize_foreground(image, foreground_ratio)
+            image = set_white_background(image)
+        else:
+            image = input_image
+            if image.mode == "RGBA":
+                image = set_white_background(image)
+        image = resize_to_128(image)
+        return image
+    ply_out_path="/users/stan/splatter-image/gradio_out/mesh.ply"
+    os.makedirs(os.path.dirname(ply_out_path), exist_ok=True)
+    def reconstruct_and_export(image):
+        """
+        Passes image through model, outputs reconstruction in form of a dict of tensors.
+        """
+        image = to_tensor(image).to(device)
+        view_to_world_source, rot_transform_quats = get_source_camera_v2w_rmo_and_quats()
+        view_to_world_source = view_to_world_source.to(device)
+        rot_transform_quats = rot_transform_quats.to(device)
+        reconstruction_unactivated = model(
+            image.unsqueeze(0).unsqueeze(0),
+            view_to_world_source,
+            rot_transform_quats,
+            None,
+            activate_output=False)
+        reconstruction = {k: v[0].contiguous() for k, v in reconstruction_unactivated.items()}
+        reconstruction["scaling"] = model.scaling_activation(reconstruction["scaling"])
+        reconstruction["opacity"] = model.opacity_activation(reconstruction["opacity"])
+        # render images in a loop
+        world_view_transforms, full_proj_transforms, camera_centers = get_target_cameras()
+        background = torch.tensor([1, 1, 1] , dtype=torch.float32, device=device)
+        loop_renders = []
+        t_to_512 = torchvision.transforms.Resize(512, interpolation=torchvision.transforms.InterpolationMode.NEAREST)
+        for r_idx in range( world_view_transforms.shape[0]):
+            image = render_predicted(reconstruction,
+                                     world_view_transforms[r_idx].to(device),
+                                     full_proj_transforms[r_idx].to(device),
+                                     camera_centers[r_idx].to(device),
+                                     background,
+                                     model_cfg,
+                                     focals_pixels=None)["render"]
+            image = t_to_512(image)
+            loop_renders.append(torch.clamp(image * 255, 0.0, 255.0).detach().permute(1, 2, 0).cpu().numpy().astype(np.uint8))
+        loop_out_path = os.path.join(os.path.dirname(ply_out_path), "loop.mp4")
+        imageio.mimsave(loop_out_path, loop_renders, fps=25)
+        # export reconstruction to ply
+        export_to_obj(reconstruction_unactivated, ply_out_path)
+        return loop_out_path, ply_out_path
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            # Splatter Image Demo
+            [Splatter Image](https://github.com/szymanowiczs/splatter-image) (CVPR 2024) is a fast, super cheap to train method for object 3D reconstruction from a single image.
+            The model used in the demo was trained on **Objaverse-LVIS on 2 A6000 GPUs for 3.5 days**.
+            On NVIDIA V100 GPU, reconstruction can be done at 38FPS and rendering at 588FPS.
+            Upload an image of an object to see how the Splatter Image does.
+            **Comments:**
+            1. The first example you upload should take about 4.5 seconds (with preprocessing, saving and overhead), the following take about 1.5s.
+            2. The model does not work well on photos of humans.
+            3. The 3D viewer shows a .ply mesh extracted from a mix of 3D Gaussians. Artefacts might show - see video for more faithful results.
+            4. Best results are achieved on the datasets described in the [repository](https://github.com/szymanowiczs/splatter-image) using that code. This demo is experimental.
+            5. Our model might not be better than some state-of-the-art methods, but it is of comparable quality and is **much** cheaper to train and run.
+            """
+            )
+        with gr.Row(variant="panel"):
+            with gr.Column():
+                with gr.Row():
+                    input_image = gr.Image(
+                        label="Input Image",
+                        image_mode="RGBA",
+                        sources="upload",
+                        type="pil",
+                        elem_id="content_image",
+                    )
+                    processed_image = gr.Image(label="Processed Image", interactive=False)
+                with gr.Row():
+                    with gr.Group():
+                        preprocess_background = gr.Checkbox(
+                            label="Remove Background", value=True
+                        )
+                with gr.Row():
+                    submit = gr.Button("Generate", elem_id="generate", variant="primary")
+            with gr.Column():
+                with gr.Row():
+                    with gr.Tab("Reconstruction"):
+                        with gr.Column():
+                            output_video = gr.Video(value=None, width=512, label="Rendered Video", autoplay=True)
+                            output_model = gr.Model3D(
+                                height=512,
+                                label="Output Model",
+                                interactive=False
+                            )
+        submit.click(fn=check_input_image, inputs=[input_image]).success(
+            fn=preprocess,
+            inputs=[input_image, preprocess_background],
+            outputs=[processed_image],
+        ).success(
+            fn=reconstruct_and_export,
+            inputs=[processed_image],
+            outputs=[output_video, output_model],
+        )
+    demo.queue(max_size=1)
+    demo.launch()
+if __name__ == "__main__":
+    main()
+# gradio app interface

model_file/objaverse/.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,66 @@

+wandb:
+  project: gs_pred
+cam_embd:
+  embedding: null
+  encode_embedding: null
+  dimension: 0
+  method: null
+general:
+  device: 0
+  random_seed: 0
+  num_devices: 2
+  mixed_precision: true
+data:
+  training_resolution: 128
+  fov: 49.134342641202636
+  subset: -1
+  input_images: 1
+  znear: 0.8
+  zfar: 3.2
+  category: objaverse
+  white_background: true
+  origin_distances: false
+opt:
+  iterations: 50001
+  base_lr: 6.34584421e-05
+  batch_size: 16
+  betas:
+  - 0.9
+  - 0.999
+  loss: l2
+  imgs_per_obj: 4
+  ema:
+    use: true
+    update_every: 10
+    update_after_step: 100
+    beta: 0.9999
+  lambda_lpips: 0.33814373
+  start_lpips_after: 0
+  step_lr_at: -1
+model:
+  max_sh_degree: 1
+  inverted_x: false
+  inverted_y: true
+  name: SingleUNet
+  opacity_scale: 1.0
+  opacity_bias: -2.0
+  scale_scale: 0.01
+  scale_bias: 0.02
+  xyz_scale: 0.1
+  xyz_bias: 0.0
+  depth_scale: 1.0
+  depth_bias: 0.0
+  network_without_offset: false
+  network_with_offset: true
+  attention_resolutions:
+  - 16
+  cross_view_attention: true
+  isotropic: false
+  base_dim: 128
+  num_blocks: 4
+logging:
+  ckpt_iterations: 1000
+  val_log: 10000
+  loss_log: 10
+  loop_log: 10000
+  render_log: 10000