Spaces:

ostapagon
/

mast3r-3dgs

Running on Zero

App Files Files Community

ostapagon commited on Jan 10

Commit

b46de64

1 Parent(s): ada2cc0

Add some instruction. Delete unnesessary ui components.

Browse files

Files changed (6) hide show

app.py +17 -20
demo/gs_demo.py +28 -29
demo/gs_train.py +10 -53
demo/mast3r_demo.py +59 -128
requirements.txt +5 -5
wheels/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl +2 -2

app.py CHANGED Viewed

@@ -11,28 +11,25 @@ from gs_demo import gs_demo_tab
 torch.backends.cuda.matmul.allow_tf32 = True
 if __name__ == '__main__':
-    # parser = get_args_parser()
-    # args = parser.parse_args()
-    # if args.server_name is not None:
-    #     server_name = args.server_name
-    # else:
-    # server_name = '0.0.0.0'# if args.local_network else '127.0.0.1'
-    # weights_path = '/app/wild-gaussian-splatting/mast3r/checkpoints/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth'
-    # weights_path = "naver/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric"#args.weights if args.weights is not None else  + MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric
-    # device = device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    # chkpt_tag = hash_md5(weights_path)
     with gr.Blocks() as demo:
         with gr.Tabs():
-            with gr.Tab("MASt3R Demo"):
                 mast3r_demo_tab()
-            with gr.Tab("Gaussian Splatting Demo"):
                 gs_demo_tab()
-        demo.launch(show_error=True, share=None, server_name=None, server_port=None)
-        # demo.launch(show_error=True, share=None, server_name='0.0.0.0', server_port=5555)
-# python3 demo.py --weights "/app/mast3r/checkpoints/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth" --device "cuda" --server_port 3334 --local_network "$@"

 torch.backends.cuda.matmul.allow_tf32 = True
 if __name__ == '__main__':
     with gr.Blocks() as demo:
+        gr.HTML('''
+        <div style="text-align: center; padding: 20px; background-color: #f9f9f9; border-radius: 10px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);">
+            <h2 style="color: #333;">MASt3R and 3DGS Pipeline Demo</h2>
+            <p style="font-size: 16px; color: #555;">This pipeline is designed for 3D reconstruction using MASt3R and 3DGS.</p>
+            <p style="font-size: 16px; color: #555;">The process is divided into two stages:</p>
+            <ol style="text-align: left; display: inline-block; margin: 0 auto; color: #555;">
+                <li>MASt3R is used to obtain the initial point cloud and camera parameters.</li>
+                <li>3DGS is then trained on the results from MASt3R to refine the 3D scene representation.</li>
+            </ol>
+            <p style="font-size: 16px; color: #555;">For a full version of this pipeline, please visit the repository at:</p>
+            <a href="https://github.com/nerlfield/wild-gaussian-splatting" target="_blank" style="font-size: 16px; color: #007bff; text-decoration: none;">nerlfield/wild-gaussian-splatting</a>
+        </div>
+        ''')
         with gr.Tabs():
+            with gr.Tab("MASt3R"):
                 mast3r_demo_tab()
+            with gr.Tab("3DGS"):
                 gs_demo_tab()
+        demo.launch(show_error=True, share=None, server_name=None, server_port=None)

demo/gs_demo.py CHANGED Viewed

@@ -30,7 +30,26 @@ def gs_demo_tab():
         }
         </style>
         """)
-        gr.Markdown("# Gaussian Splatting Training Demo")
         refresh_button = gr.Button("Refresh Datasets", elem_classes="refresh-button")
         dataset_dropdown = gr.Dropdown(label="Select Dataset", choices=[], value="")
@@ -47,33 +66,15 @@ def gs_demo_tab():
         # Set the update function to be called when the refresh button is clicked
         refresh_button.click(fn=update_dataset_dropdown, inputs=None, outputs=dataset_dropdown)
-        with gr.Accordion("Model Parameters", open=False):
-            with gr.Row():
-                with gr.Column():
-                    sh_degree = gr.Number(label="SH Degree", value=3)
-                    model_path = gr.Textbox(label="Model Path", value="")
-                    images = gr.Textbox(label="Images", value="images")
-                    resolution = gr.Number(label="Resolution", value=-1)
-                    white_background = gr.Checkbox(label="White Background", value=True)
-                    data_device = gr.Dropdown(label="Data Device", choices=["cuda", "cpu"], value="cuda")
-                    eval = gr.Checkbox(label="Eval", value=False)
-        with gr.Accordion("Pipeline Parameters", open=False):
-            with gr.Row():
-                with gr.Column():
-                    convert_SHs_python = gr.Checkbox(label="Convert SHs Python", value=False)
-                    compute_cov3D_python = gr.Checkbox(label="Compute Cov3D Python", value=False)
-                    debug = gr.Checkbox(label="Debug", value=False)
         with gr.Accordion("Optimization Parameters", open=False):
             with gr.Row():
                 with gr.Column():
-                    iterations = gr.Number(label="Iterations", value=1000)
-                    position_lr_init = gr.Number(label="Position LR Init", value=0.00016)
-                    position_lr_final = gr.Number(label="Position LR Final", value=0.0000016)
-                    position_lr_delay_mult = gr.Number(label="Position LR Delay Mult", value=0.01)
-                    position_lr_max_steps = gr.Number(label="Position LR Max Steps", value=30000)
                 with gr.Column():
                     feature_lr = gr.Number(label="Feature LR", value=0.0025)
                     opacity_lr = gr.Number(label="Opacity LR", value=0.05)
@@ -87,7 +88,7 @@ def gs_demo_tab():
                     densify_from_iter = gr.Number(label="Densify From Iter", value=500)
                     densify_until_iter = gr.Number(label="Densify Until Iter", value=15000)
                     densify_grad_threshold = gr.Number(label="Densify Grad Threshold", value=0.0002)
-                    random_background = gr.Checkbox(label="Random Background", value=False)
         start_button = gr.Button("Start Training")
@@ -129,12 +130,10 @@ def gs_demo_tab():
         start_button.click(
             fn=handle_training_complete,
             inputs=[
-                dataset_dropdown, sh_degree, model_path, images, resolution, white_background, data_device, eval,
-                convert_SHs_python, compute_cov3D_python, debug,
-                iterations, position_lr_init, position_lr_final, position_lr_delay_mult,
                 position_lr_max_steps, feature_lr, opacity_lr, scaling_lr, rotation_lr,
                 percent_dense, lambda_dssim, densification_interval, opacity_reset_interval,
-                densify_from_iter, densify_until_iter, densify_grad_threshold, random_background
             ],
             outputs=[video_output, load_model_button, output, model_path_state]
         )

         }
         </style>
         """)
+        # Centered title
+        gr.Markdown("""
+        <h2 style="text-align: center;">3D Gaussian Splatting Reconstruction</h2>
+        """)
+        # Instructions
+        gr.Markdown('''
+        <div style="padding: 10px; background-color: #e9f7ef; border-radius: 5px; margin-bottom: 10px;">
+            <h3>Instructions for 3DGS Demo</h3>
+            <ul style="text-align: left; color: #333;">
+                <li>Make sure to press "Refresh Datasets" to obtain an updated list of datasets from Stage 1. They are in the format run_0, run_1, run_...</li>
+                <li>Adjust optimization parameters if needed, and press "Start Training".</li>
+                <li>It is recommended to use 7k iterations to avoid exceeding the 3-minute limit. If you still exceed the limit, reduce the number of iterations.</li>
+                <li>After reconstruction is finished, you can view it as a small video generated or download the full 3DGS reconstruction below the video.</li>
+                <li>Press "Load 3D Model" to view the full 3DGS reconstruction.</li>
+            </ul>
+            <p><b>Note: 3DGS '.ply' models could be heavy, so it may take some time to download and view them in the 3D model section.</b></p>
+        </div>
+        ''')
         refresh_button = gr.Button("Refresh Datasets", elem_classes="refresh-button")
         dataset_dropdown = gr.Dropdown(label="Select Dataset", choices=[], value="")
         # Set the update function to be called when the refresh button is clicked
         refresh_button.click(fn=update_dataset_dropdown, inputs=None, outputs=dataset_dropdown)
         with gr.Accordion("Optimization Parameters", open=False):
             with gr.Row():
                 with gr.Column():
+                    position_lr_init = gr.Number(label="Position LR Init", value=0.00032)
+                    position_lr_final = gr.Number(label="Position LR Final", value=0.0000032)
+                    position_lr_delay_mult = gr.Number(label="Position LR Delay Mult", value=0.02)
+                    position_lr_max_steps = gr.Number(label="Position LR Max Steps", value=15000)
+                    feature_lr = gr.Number(label="Feature LR", value=0.005)
                 with gr.Column():
                     feature_lr = gr.Number(label="Feature LR", value=0.0025)
                     opacity_lr = gr.Number(label="Opacity LR", value=0.05)
                     densify_from_iter = gr.Number(label="Densify From Iter", value=500)
                     densify_until_iter = gr.Number(label="Densify Until Iter", value=15000)
                     densify_grad_threshold = gr.Number(label="Densify Grad Threshold", value=0.0002)
+        iterations = gr.Slider(label="Iterations", value=7000, minimum=1, maximum=15000, step=5)
         start_button = gr.Button("Start Training")
         start_button.click(
             fn=handle_training_complete,
             inputs=[
+                dataset_dropdown, iterations, position_lr_init, position_lr_final, position_lr_delay_mult,
                 position_lr_max_steps, feature_lr, opacity_lr, scaling_lr, rotation_lr,
                 percent_dense, lambda_dssim, densification_interval, opacity_reset_interval,
+                densify_from_iter, densify_until_iter, densify_grad_threshold
             ],
             outputs=[video_output, load_model_button, output, model_path_state]
         )

demo/gs_train.py CHANGED Viewed

@@ -21,7 +21,8 @@ class PipelineParams:
 @dataclass
 class OptimizationParams:
-    iterations: int = 7000
     position_lr_init: float = 0.00016
     position_lr_final: float = 0.0000016
     position_lr_delay_mult: float = 0.01
@@ -50,26 +51,12 @@ class ModelParams:
     data_device: str = "cuda"
     eval: bool = False
-@dataclass
-class TrainingArgs:
-    ip: str = "0.0.0.0"
-    port: int = 6007
-    debug_from: int = -1
-    detect_anomaly: bool = False
-    test_iterations: list[int] = field(default_factory=lambda: [7_000, 30_000])
-    save_iterations: list[int] = field(default_factory=lambda: [7_000, 30_000])
-    quiet: bool = False
-    checkpoint_iterations: list[int] = field(default_factory=lambda: [7_000, 15_000, 30_000])
-    start_checkpoint: str = None
 @spaces.GPU(duration=20)
 def train(
-    data_source_path, sh_degree, model_path, images, resolution, white_background, data_device, eval,
-    convert_SHs_python, compute_cov3D_python, debug,
-    iterations, position_lr_init, position_lr_final, position_lr_delay_mult,
     position_lr_max_steps, feature_lr, opacity_lr, scaling_lr, rotation_lr,
     percent_dense, lambda_dssim, densification_interval, opacity_reset_interval,
-    densify_from_iter, densify_until_iter, densify_grad_threshold, random_background
 ):
     # Add the path to the gaussian-splatting repository
@@ -96,22 +83,9 @@ def train(
     print(data_source_path)
     # Create instances of the parameter dataclasses
-    dataset = ModelParams(
-        sh_degree=sh_degree,
-        source_path=data_source_path,
-        model_path=model_path,
-        images=images,
-        resolution=resolution,
-        white_background=white_background,
-        data_device=data_device,
-        eval=eval
-    )
-    pipe = PipelineParams(
-        convert_SHs_python=convert_SHs_python,
-        compute_cov3D_python=compute_cov3D_python,
-        debug=debug
-    )
     opt = OptimizationParams(
         iterations=iterations,
@@ -130,20 +104,7 @@ def train(
         densify_from_iter=densify_from_iter,
         densify_until_iter=densify_until_iter,
         densify_grad_threshold=densify_grad_threshold,
-        random_background=random_background
-    )
-    print("local_renderer")
-    args = TrainingArgs()
-    testing_iterations = args.test_iterations
-    saving_iterations = args.save_iterations
-    checkpoint_iterations = args.checkpoint_iterations
-    debug_from = args.debug_from
-    tb_writer = prepare_output_and_logger(dataset)
     gaussians = GaussianModel(dataset.sh_degree)
     scene = Scene(dataset, gaussians)
     gaussians.training_setup(opt)
@@ -175,9 +136,6 @@ def train(
             viewpoint_stack = scene.getTrainCameras().copy()
         viewpoint_cam = viewpoint_stack.pop(randint(0, len(viewpoint_stack)-1))
-        # Render
-        if (iteration - 1) == debug_from:
-            pipe.debug = True
         bg = torch.rand((3), device=DEVICE) if opt.random_background else background
         render_pkg = render(viewpoint_cam, gaussians, pipe, bg)
@@ -201,7 +159,6 @@ def train(
                 progress_bar.close()
             # Log and save
-            training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render, (pipe, background))
             if (iteration == opt.iterations):
                 point_cloud_path = os.path.join(os.path.join(dataset.model_path, "point_cloud/iteration_{}".format(iteration)), "point_cloud.ply")
                 print("\n[ITER {}] Saving Gaussians to {}".format(iteration, point_cloud_path))
@@ -225,9 +182,9 @@ def train(
                     gaussians.optimizer.step()
                     gaussians.optimizer.zero_grad(set_to_none = True)
-                if (iteration == opt.iterations):
-                    print("\n[ITER {}] Saving Checkpoint".format(iteration))
-                    torch.save((gaussians.capture(), iteration), scene.model_path + "/chkpnt" + str(iteration) + ".pth")
     from os import makedirs

 @dataclass
 class OptimizationParams:
+    # DEFAULT PARAMETERS
+    iterations: int = 30_000
     position_lr_init: float = 0.00016
     position_lr_final: float = 0.0000016
     position_lr_delay_mult: float = 0.01
     data_device: str = "cuda"
     eval: bool = False
 @spaces.GPU(duration=20)
 def train(
+    data_source_path, iterations, position_lr_init, position_lr_final, position_lr_delay_mult,
     position_lr_max_steps, feature_lr, opacity_lr, scaling_lr, rotation_lr,
     percent_dense, lambda_dssim, densification_interval, opacity_reset_interval,
+    densify_from_iter, densify_until_iter, densify_grad_threshold
 ):
     # Add the path to the gaussian-splatting repository
     print(data_source_path)
     # Create instances of the parameter dataclasses
+    dataset = ModelParams(source_path=data_source_path,)
+    pipe = PipelineParams()
     opt = OptimizationParams(
         iterations=iterations,
         densify_from_iter=densify_from_iter,
         densify_until_iter=densify_until_iter,
         densify_grad_threshold=densify_grad_threshold,
+    )
     gaussians = GaussianModel(dataset.sh_degree)
     scene = Scene(dataset, gaussians)
     gaussians.training_setup(opt)
             viewpoint_stack = scene.getTrainCameras().copy()
         viewpoint_cam = viewpoint_stack.pop(randint(0, len(viewpoint_stack)-1))
         bg = torch.rand((3), device=DEVICE) if opt.random_background else background
         render_pkg = render(viewpoint_cam, gaussians, pipe, bg)
                 progress_bar.close()
             # Log and save
             if (iteration == opt.iterations):
                 point_cloud_path = os.path.join(os.path.join(dataset.model_path, "point_cloud/iteration_{}".format(iteration)), "point_cloud.ply")
                 print("\n[ITER {}] Saving Gaussians to {}".format(iteration, point_cloud_path))
                     gaussians.optimizer.step()
                     gaussians.optimizer.zero_grad(set_to_none = True)
+                # if (iteration == opt.iterations):
+                #     print("\n[ITER {}] Saving Checkpoint".format(iteration))
+                #     torch.save((gaussians.capture(), iteration), scene.model_path + "/chkpnt" + str(iteration) + ".pth")
     from os import makedirs

demo/mast3r_demo.py CHANGED Viewed

@@ -33,7 +33,6 @@ import matplotlib.pyplot as pl
 import torch
 from demo_globals import CACHE_PATH, MODEL, DEVICE, SILENT, DATASET_DIR
 class SparseGAState():
@@ -175,20 +174,38 @@ def save_colmap_scene(scene, save_dir, min_conf_thr=2, clean_depth=False):
     return save_path
 @spaces.GPU(duration=10)
-def get_reconstructed_scene(image_size, current_scene_state,
-                            filelist, optim_level, lr1, niter1, lr2, niter2, min_conf_thr, matching_conf_thr,
-                            as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, scenegraph_type, winsize,
-                            win_cyclic, refid, TSDF_thresh, shared_intrinsics, **kw):
     """
     from a list of images, run mast3r inference, sparse global aligner.
     then run get_3D_model_from_scene
     """
     imgs = load_images(filelist, size=image_size, verbose=not SILENT)
     if len(imgs) == 1:
         imgs = [imgs[0], copy.deepcopy(imgs[0])]
         imgs[1]['idx'] = 1
         filelist = [filelist[0], filelist[0] + '_2']
     scene_graph_params = [scenegraph_type]
     if scenegraph_type in ["swin", "logwin"]:
         scene_graph_params.append(str(winsize))
@@ -198,8 +215,6 @@ def get_reconstructed_scene(image_size, current_scene_state,
         scene_graph_params.append('noncyclic')
     scene_graph = '-'.join(scene_graph_params)
     pairs = make_pairs(imgs, scene_graph=scene_graph, prefilter=None, symmetrize=True)
-    if optim_level == 'coarse':
-        niter2 = 0
     base_cache_dir = os.path.join(CACHE_PATH, 'cache')
     os.makedirs(base_cache_dir, exist_ok=True)
@@ -214,7 +229,6 @@ def get_reconstructed_scene(image_size, current_scene_state,
         return run_cache_dir
-    ten = torch.zeros((1024)).cuda()
     cache_dir = get_next_dir(base_cache_dir)
     scene = sparse_global_alignment(filelist, pairs, cache_dir,
                                     MODEL, lr1=lr1, niter1=niter1, lr2=lr2, niter2=niter2, device=DEVICE,
@@ -243,34 +257,7 @@ def get_reconstructed_scene(image_size, current_scene_state,
     return scene_state, outfile
-def set_scenegraph_options(inputfiles, win_cyclic, refid, scenegraph_type):
-    num_files = len(inputfiles) if inputfiles is not None else 1
-    show_win_controls = scenegraph_type in ["swin", "logwin"]
-    show_winsize = scenegraph_type in ["swin", "logwin"]
-    show_cyclic = scenegraph_type in ["swin", "logwin"]
-    max_winsize, min_winsize = 1, 1
-    if scenegraph_type == "swin":
-        if win_cyclic:
-            max_winsize = max(1, math.ceil((num_files - 1) / 2))
-        else:
-            max_winsize = num_files - 1
-    elif scenegraph_type == "logwin":
-        if win_cyclic:
-            half_size = math.ceil((num_files - 1) / 2)
-            max_winsize = max(1, math.ceil(math.log(half_size, 2)))
-        else:
-            max_winsize = max(1, math.ceil(math.log(num_files, 2)))
-    winsize = gradio.Slider(label="Scene Graph: Window Size", value=max_winsize,
-                            minimum=min_winsize, maximum=max_winsize, step=1, visible=show_winsize)
-    win_cyclic = gradio.Checkbox(value=win_cyclic, label="Cyclic sequence", visible=show_cyclic)
-    win_col = gradio.Column(visible=show_win_controls)
-    refid = gradio.Slider(label="Scene Graph: Id", value=0, minimum=0,
-                          maximum=num_files - 1, step=1, visible=scenegraph_type == 'oneref')
-    return win_col, winsize, win_cyclic, refid
 def mast3r_demo_tab():
     if not SILENT:
         print('Outputing stuff in', CACHE_PATH)
@@ -281,100 +268,44 @@ def mast3r_demo_tab():
     with get_context() as demo:
         scene = gradio.State(None)
         gradio.HTML('<h2 style="text-align: center;">MASt3R Demo</h2>')
-        with gradio.Column():
-            inputfiles = gradio.File(file_count="multiple")
-            with gradio.Row():
-                with gradio.Column():
-                    with gradio.Row():
-                        lr1 = gradio.Slider(label="Coarse LR", value=0.07, minimum=0.01, maximum=0.2, step=0.01)
-                        niter1 = gradio.Number(value=500, precision=0, minimum=0, maximum=10_000,
-                                               label="num_iterations", info="For coarse alignment!")
-                        lr2 = gradio.Slider(label="Fine LR", value=0.014, minimum=0.005, maximum=0.05, step=0.001)
-                        niter2 = gradio.Number(value=200, precision=0, minimum=0, maximum=100_000,
-                                               label="num_iterations", info="For refinement!")
-                        optim_level = gradio.Dropdown(["coarse", "refine", "refine+depth"],
-                                                      value='refine+depth', label="OptLevel",
-                                                      info="Optimization level")
-                        image_size = gradio.Dropdown(choices=[512, 224], label="Image Size", value=512)
-                    with gradio.Row():
-                        matching_conf_thr = gradio.Slider(label="Matching Confidence Thr", value=5.,
-                                                          minimum=0., maximum=30., step=0.1,
-                                                          info="Before Fallback to Regr3D!")
-                        shared_intrinsics = gradio.Checkbox(value=False, label="Shared intrinsics",
-                                                            info="Only optimize one set of intrinsics for all views")
-                        scenegraph_type = gradio.Dropdown([("complete: all possible image pairs", "complete"),
-                                                           ("swin: sliding window", "swin"),
-                                                           ("logwin: sliding window with long range", "logwin"),
-                                                           ("oneref: match one image with all", "oneref")],
-                                                          value='complete', label="Scenegraph",
-                                                          info="Define how to make pairs",
-                                                          interactive=True)
-                        with gradio.Column(visible=False) as win_col:
-                            winsize = gradio.Slider(label="Scene Graph: Window Size", value=1,
-                                                    minimum=1, maximum=1, step=1)
-                            win_cyclic = gradio.Checkbox(value=False, label="Cyclic sequence")
-                        refid = gradio.Slider(label="Scene Graph: Id", value=0,
-                                              minimum=0, maximum=0, step=1, visible=False)
-            run_btn = gradio.Button("Run")
-            with gradio.Row():
-                min_conf_thr = gradio.Slider(label="min_conf_thr", value=1.5, minimum=0.0, maximum=10, step=0.1)
-                cam_size = gradio.Slider(label="cam_size", value=0.2, minimum=0.001, maximum=1.0, step=0.001)
-                TSDF_thresh = gradio.Slider(label="TSDF Threshold", value=0., minimum=0., maximum=1., step=0.01)
-            with gradio.Row():
-                as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud")
-                mask_sky = gradio.Checkbox(value=False, label="Mask sky")
-                clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps")
-                transparent_cams = gradio.Checkbox(value=False, label="Transparent cameras")
-            outmodel = gradio.Model3D()
-            scenegraph_type.change(set_scenegraph_options,
-                                   inputs=[inputfiles, win_cyclic, refid, scenegraph_type],
-                                   outputs=[win_col, winsize, win_cyclic, refid])
-            inputfiles.change(set_scenegraph_options,
-                              inputs=[inputfiles, win_cyclic, refid, scenegraph_type],
-                              outputs=[win_col, winsize, win_cyclic, refid])
-            win_cyclic.change(set_scenegraph_options,
-                              inputs=[inputfiles, win_cyclic, refid, scenegraph_type],
-                              outputs=[win_col, winsize, win_cyclic, refid])
-            run_btn.click(
-                fn=get_reconstructed_scene,
-                inputs=[image_size, scene, inputfiles, optim_level, lr1, niter1, lr2, niter2, min_conf_thr, matching_conf_thr,
-                        as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, scenegraph_type, winsize,
-                        win_cyclic, refid, TSDF_thresh, shared_intrinsics],
-                outputs=[scene, outmodel]
-            )
-            # min_conf_thr.release(
-            #     fn=get_3D_model_from_scene,
-            #     inputs=[scene, min_conf_thr, as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size, TSDF_thresh],
-            #     outputs=outmodel
-            # )
-            # cam_size.change(fn=get_3D_model_from_scene,
-            #                 inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                         clean_depth, transparent_cams, cam_size, TSDF_thresh],
-            #                 outputs=outmodel)
-            # TSDF_thresh.change(fn=get_3D_model_from_scene,
-            #                    inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                            clean_depth, transparent_cams, cam_size, TSDF_thresh],
-            #                    outputs=outmodel)
-            # as_pointcloud.change(fn=get_3D_model_from_scene,
-            #                      inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                              clean_depth, transparent_cams, cam_size, TSDF_thresh],
-            #                      outputs=outmodel)
-            # mask_sky.change(fn=get_3D_model_from_scene,
-            #                 inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                         clean_depth, transparent_cams, cam_size, TSDF_thresh],
-            #                 outputs=outmodel)
-            # clean_depth.change(fn=get_3D_model_from_scene,
-            #                    inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                            clean_depth, transparent_cams, cam_size, TSDF_thresh],
-            #                    outputs=outmodel)
-            # transparent_cams.change(fn=get_3D_model_from_scene,
-            #                         inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
-            #                                 clean_depth, transparent_cams, cam_size, TSDF_thresh],
-            #                         outputs=outmodel)
     return demo

 import torch
 from demo_globals import CACHE_PATH, MODEL, DEVICE, SILENT, DATASET_DIR
 class SparseGAState():
     return save_path
 @spaces.GPU(duration=10)
+def get_reconstructed_scene(current_scene_state,
+                            filelist, min_conf_thr, matching_conf_thr,
+                            as_pointcloud, cam_size, shared_intrinsics, **kw):
     """
     from a list of images, run mast3r inference, sparse global aligner.
     then run get_3D_model_from_scene
     """
+    image_size = 512
     imgs = load_images(filelist, size=image_size, verbose=not SILENT)
     if len(imgs) == 1:
         imgs = [imgs[0], copy.deepcopy(imgs[0])]
         imgs[1]['idx'] = 1
         filelist = [filelist[0], filelist[0] + '_2']
+    lr1 = 0.07
+    niter1 = 500
+    lr2 = 0.014
+    niter2 = 200
+    optim_level = 'refine'
+    mask_sky, clean_depth, transparent_cams = False, True, False
+    if len(filelist) < 13:
+        scenegraph_type = 'complete'
+        winsize = 1
+    else:
+        scenegraph_type = 'logwin'
+        half_size = math.ceil((len(filelist) - 1) / 2)
+        max_winsize = max(1, math.ceil(math.log(half_size, 2)))
+        winsize = min(5, max_winsize)
+    refid = 0
+    win_cyclic = False
+    TSDF_thresh = 0
     scene_graph_params = [scenegraph_type]
     if scenegraph_type in ["swin", "logwin"]:
         scene_graph_params.append(str(winsize))
         scene_graph_params.append('noncyclic')
     scene_graph = '-'.join(scene_graph_params)
     pairs = make_pairs(imgs, scene_graph=scene_graph, prefilter=None, symmetrize=True)
     base_cache_dir = os.path.join(CACHE_PATH, 'cache')
     os.makedirs(base_cache_dir, exist_ok=True)
         return run_cache_dir
     cache_dir = get_next_dir(base_cache_dir)
     scene = sparse_global_alignment(filelist, pairs, cache_dir,
                                     MODEL, lr1=lr1, niter1=niter1, lr2=lr2, niter2=niter2, device=DEVICE,
     return scene_state, outfile
 def mast3r_demo_tab():
     if not SILENT:
         print('Outputing stuff in', CACHE_PATH)
     with get_context() as demo:
         scene = gradio.State(None)
+        # Title for the MASt3R demo
         gradio.HTML('<h2 style="text-align: center;">MASt3R Demo</h2>')
+        # Add instructions for the MASt3R demo
+        gradio.HTML('''
+        <div style="padding: 10px; background-color: #e9f7ef; border-radius: 5px; margin-bottom: 10px;">
+            <h3>Instructions for MASt3R Demo</h3>
+            <ul style="text-align: left; color: #333;">
+                <li>Upload images. It is recommended to use no more than 10-12 images to avoid exceeding the 3-minute runtime limit for zeroGPU dynamic resources.</li>
+                <li>Press the "Run" button to start the process.</li>
+                <li>Once the stage is finished and the point cloud with cameras is visible below, switch to the 3DGS tab and follow the instructions there.</li>
+            </ul>
+        </div>
+        ''')
+        inputfiles = gradio.File(file_count="multiple")
+        run_btn = gradio.Button("Run")
+        with gradio.Row():
+            matching_conf_thr = gradio.Slider(label="Matching Confidence Thr", value=5.,
+                                minimum=0., maximum=30., step=0.1,
+                                info="Before Fallback to Regr3D!")
+            min_conf_thr = gradio.Slider(label="min_conf_thr", value=1.5, minimum=0.0, maximum=10, step=0.1)
+            cam_size = gradio.Slider(label="cam_size", value=0.2, minimum=0.001, maximum=1.0, step=0.001)
+        with gradio.Row():
+            as_pointcloud = gradio.Checkbox(value=True, label="As pointcloud")
+            shared_intrinsics = gradio.Checkbox(value=False, label="Shared intrinsics",
+                                info="Only optimize one set of intrinsics for all views")
+        outmodel = gradio.Model3D()
+        run_btn.click(
+            fn=get_reconstructed_scene,
+            inputs=[scene, inputfiles, min_conf_thr, matching_conf_thr,
+                    as_pointcloud, cam_size, shared_intrinsics],
+            outputs=[scene, outmodel]
+        )
     return demo

requirements.txt CHANGED Viewed

@@ -4,6 +4,10 @@
 torch==2.4.0
 torchvision==0.19.0
 gradio
 matplotlib
 tqdm
@@ -28,8 +32,4 @@ pytorch-lightning==2.1.0
 PyYAML==6.0.1
 ipywidgets
 jupyterlab
-sql
-https://huggingface.co/spaces/ostapagon/mast3r-3dgs/resolve/main/wheels/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl?download=true
-https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl?download=true
-https://huggingface.co/spaces/ostapagon/mast3r-3dgs/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl?download=true

 torch==2.4.0
 torchvision==0.19.0
+https://huggingface.co/spaces/ostapagon/mast3r-3dgs/resolve/main/wheels/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl?download=true
+https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl?download=true
+https://huggingface.co/spaces/ostapagon/mast3r-3dgs/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl?download=true
 gradio
 matplotlib
 tqdm
 PyYAML==6.0.1
 ipywidgets
 jupyterlab
+sql

wheels/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6da49f22b9fd37ee6ed579a9724670fc54c0514fd418931b486996c0c2ff8748
-size 3041144

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdbf04f4682c8dcd5d28b6c83755be96e46e6f5d5afb85e9cf0a0e49119e092e
+size 3043536