Spaces:

adirathor07
/

snap2scene

Sleeping

App Files Files Community

adirathor07 commited on Apr 12

Commit

9650ca8

1 Parent(s): 482624a

added other models demo

Browse files

Files changed (22) hide show

.devcontainer/devcontainer.json +33 -0
.gitattributes +1 -0
app.py +249 -3
media/DSC_0351.JPG +3 -0
media/WhatsApp Image 2025-04-12 at 17.40.27_1137ddf7.jpg +3 -0
media/gs.gif +3 -0
media/image.png +3 -0
media/input_images.png +3 -0
media/pix.jpg +3 -0
media/pix_output.jpg +3 -0
media/rotation_sfm_cam_1_2_3_4[1].gif +3 -0
media/shape_mesh.gif +3 -0
media/shape_mesh.png +3 -0
media/test_image_0.png +3 -0
media/truck_reconstruction.gif +3 -0
packages.txt +5 -0
ply_run.py +29 -0
requirements.txt +56 -7
sfm.py +305 -0
space_carving.py +111 -0
temp_sfm.zip +3 -0
temp_spacecarving.zip +3 -0

.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "name": "Python 3",
+  // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
+  "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
+  "customizations": {
+    "codespaces": {
+      "openFiles": [
+        "README.md",
+        "app.py"
+      ]
+    },
+    "vscode": {
+      "settings": {},
+      "extensions": [
+        "ms-python.python",
+        "ms-python.vscode-pylance"
+      ]
+    }
+  },
+  "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
+  "postAttachCommand": {
+    "server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
+  },
+  "portsAttributes": {
+    "8501": {
+      "label": "Application",
+      "onAutoForward": "openPreview"
+    }
+  },
+  "forwardPorts": [
+    8501
+  ]
+}

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 saved_model/*pth filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 saved_model/*pth filter=lfs diff=lfs merge=lfs -text
+media/** filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,11 +1,257 @@
 import streamlit as st
 import numpy as np
 import plotly.graph_objects as go
-from PIL import Image
 from helpers import *
-# --- APP START ---
-st.title("2D → 3D Voxel Reconstruction Viewer")
 uploaded_images = st.file_uploader(f"Upload images", accept_multiple_files=True, type=["png", "jpg", "jpeg"])
 # print(uploaded_images)

 import streamlit as st
+import os
+import shutil
+import zipfile
+from space_carving import run_space_carving
+from sfm import run_sfm
+import open3d as o3d
+from PIL import Image
+import matplotlib.pyplot as plt
 import numpy as np
 import plotly.graph_objects as go
 from helpers import *
+st.set_page_config(page_title="3D Reconstruction Web App", layout="wide")
+st.markdown("<div style='text-align: center;'><h1>3D Scene Reconstruction</h1></div>", unsafe_allow_html=True)
+st.markdown("<div style='text-align: center;'><h2>Computer Vision Course Project</h2></div>", unsafe_allow_html=True)
+st.markdown("<div style='text-align: right; font-weight:none;color : gray;'><h5>Course Instructor - Dr. Pratik Mazumdar</h5></div>", unsafe_allow_html=True)
+st.markdown(
+    "<h4><a href='https://github.com/majisouvik26/3d-scene-reconstruction' target='_blank'>Github Link of Project</a></h4>",
+    unsafe_allow_html=True
+)
+st.header("Team Member:")
+st.markdown("<div> <ul><li>Aditya Sahani(B22CS003)</li><li>Veeraraju Elluru(B22CS080)</li><li>Souvik Maji(B22CS089)</li><li>Dishit Sharma(B22CS082)</li><li>Aditya Rathor(B22AI044)</li></ul></div>", unsafe_allow_html=True)
+st.header("Introduction")
+st.write("3D scene reconstruction is a fundamental problem in computer vision that involves recovering the three-dimensional structure of a scene from a set of two-dimensional images. " \
+"The goal is to generate an accurate and detailed representation of the real world, typically in the form of point clouds, meshes, or volumetric models. This process plays a crucial role in various applications such as robotics, augmented and virtual reality (AR/VR), autonomous navigation, and cultural heritage preservation." \
+" The reconstruction pipeline often incorporates techniques like multi-view stereo, Structure from Motion (SfM), and volumetric methods like voxel carving. By leveraging image geometry, camera calibration data, and feature correspondences across views, 3D reconstruction enables machines to perceive and interpret the spatial layout of the physical world.")
+st.image("https://filelist.tudelft.nl/BK/Onderzoek/Research_stories/zhaiyu.png", caption="3D Reconstruction")
+st.header("Methods Used and Results")
+st.subheader("1. NeRF - Neural Radiance Fields (NeRF)")
+col1, col2 = st.columns(2)
+with col1:
+    st.image("media/test_image_0.png", width=400, caption="Input Image")
+with col2:
+    st.image("media/truck_reconstruction.gif", width=400, caption="3D Reconstruction")
+st.subheader("2. Space Carving")
+col1, col2 = st.columns(2)
+with col1:
+    st.image("media/input_images.png", width=400, caption="Input Image")
+with col2:
+    st.image("media/shape_mesh.png", width=400, caption="3D Reconstruction")
+st.subheader("3. Pix2Vox")
+col1, col2 = st.columns(2)
+with col1:
+    st.image("media/pix.jpg", width=400, caption="Input Image")
+with col2:
+    st.image("media/pix_output.jpg", width=400, caption="3D Reconstruction")
+st.subheader("4. SFM Method")
+col1, col2 = st.columns(2)
+with col1:
+    st.image("media/DSC_0351.JPG", width=400, caption="Input Image")
+with col2:
+    st.image("media/image.png", width=400, caption="3D Reconstruction")
+st.subheader("5. Incremental SFM Method")
+col1, col2 = st.columns(2)
+with col1:
+    st.image("media/WhatsApp Image 2025-04-12 at 17.40.27_1137ddf7.jpg", width=400, caption="Input Image")
+with col2:
+    st.image("media/rotation_sfm_cam_1_2_3_4[1].gif", width=400, caption="3D Reconstruction")
+st.subheader("6. Gaussian Splatting Method")
+# col1, col2 = st.columns(2)
+# with col1:
+#     st.image("WhatsApp Image 2025-04-12 at 17.40.27_1137ddf7.jpg", width=400, caption="Input Image")
+# with col2:
+st.image("media/gs.gif", width=400, caption="3D Reconstruction")
+st.header("DEMO OF MODELS")
+def show_ply_interactive(ply_path):
+    # Load point cloud
+    pcd = o3d.io.read_point_cloud(ply_path)
+    points = np.asarray(pcd.points)
+    # Optional: use colors
+    if pcd.has_colors():
+        colors = np.asarray(pcd.colors)
+    else:
+        colors = np.full_like(points, fill_value=0.5)  # default gray
+    # Create interactive plot
+    fig = go.Figure(data=[go.Scatter3d(
+        x=points[:, 0], y=points[:, 1], z=points[:, 2],
+        mode='markers',
+        marker=dict(
+            size=1.5,
+            color=colors,
+            opacity=0.8
+        )
+    )])
+    fig.update_layout(
+        scene=dict(
+            xaxis_title='X', yaxis_title='Y', zaxis_title='Z'
+        ),
+        width=800,
+        height=600,
+        margin=dict(r=10, l=10, b=10, t=10)
+    )
+    return fig
+# Show PLY as image
+def show_ply_as_image(ply_path):
+    # Load point cloud
+    pcd = o3d.io.read_point_cloud(ply_path)
+    # Create visualization window (offscreen)
+    vis = o3d.visualization.Visualizer()
+    vis.create_window(visible=False)
+    vis.add_geometry(pcd)
+    # Set camera view
+    ctr = vis.get_view_control()
+    ctr.set_zoom(0.7)
+    vis.poll_events()
+    vis.update_renderer()
+    # Screenshot to numpy
+    image = vis.capture_screen_float_buffer(do_render=True)
+    vis.destroy_window()
+    # Convert to displayable image
+    img = (np.asarray(image) * 255).astype(np.uint8)
+    return Image.fromarray(img)
+# ---------- Function to extract zip ----------
+def extract_zip(zip_file, extract_to):
+    if os.path.exists(extract_to):
+        shutil.rmtree(extract_to)
+    os.makedirs(extract_to)
+    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
+        zip_ref.extractall(extract_to)
+    return extract_to
+# ---------- SPACE CARVING ----------
+st.header("📦 Space Carving")
+st.markdown("""
+**Space Carving** is a volumetric method that uses silhouettes from multiple views to reconstruct a 3D object by carving away inconsistent voxels.
+👉 Upload a `.zip` file containing images (JPG/PNG) from different calibrated views.
+""")
+sc_zip = st.file_uploader("Upload ZIP file for Space Carving", type=["zip"])
+if sc_zip:
+    sc_extract_path = "uploads_spacecarving"
+    with open("temp_spacecarving.zip", "wb") as f:
+        f.write(sc_zip.getbuffer())
+    extract_zip("temp_spacecarving.zip", sc_extract_path)
+    st.success("Extracted  images.")
+    if st.button("Run Space Carving Model"):
+        output = run_space_carving()  # This should generate the .vtr file
+        st.success("Model ran successfully.")
+        # Path to generated .vtr file
+        vtr_path = "res_space/shape.vtr"  # Update if filename differs
+        if os.path.exists(vtr_path):
+            st.markdown("### 📥 Download Space Carved VTR File")
+            with open(vtr_path, "rb") as f:
+                st.download_button(
+                    label="Download .vtr file",
+                    data=f,
+                    file_name=os.path.basename(vtr_path),
+                    mime="application/octet-stream"
+                )
+        else:
+            st.warning("No .vtr file found. Make sure the model ran successfully.")
+# ---------- STRUCTURE FROM MOTION ----------
+st.markdown("---")
+st.header("📷 Structure from Motion (SfM)")
+st.markdown("""
+**Structure from Motion (SfM)** reconstructs 3D geometry and camera poses from a series of images.
+👉 Upload a `.zip` file containing your image dataset (JPG/PNG).
+""")
+sfm_zip_file = st.file_uploader("Upload ZIP file for SfM", type=["zip"])
+if sfm_zip_file is not None:
+    zip_name = os.path.splitext(sfm_zip_file.name)[0]  # 👉 'dataset.zip' → 'dataset'
+    sfm_extract_path = "uploads_sfm"
+    extract_zip(sfm_zip_file, sfm_extract_path)
+    st.success(f"Extracted {zip_name} dataset.")
+    if st.button("Run SfM Model"):
+        output = run_sfm(sfm_extract_path + "\\" + zip_name)
+        st.success("Model ran successfully.")
+        # Construct PLY path based on zip filename
+        ply_path = os.path.join("res", f"{zip_name}.ply")
+        if os.path.exists(ply_path):
+            st.markdown("### 🧩 Reconstructed Point Cloud Image")
+            image = show_ply_as_image(ply_path)
+            st.image(image, caption=f"{zip_name}.ply", use_column_width=True)
+            # Optional download
+            with open(ply_path, "rb") as f:
+                st.download_button(
+                    label="📥 Download .ply file",
+                    data=f,
+                    file_name=f"{zip_name}.ply",
+                    mime="application/octet-stream"
+                )
+        else:
+            st.warning(f"No .ply file named {zip_name}.ply found in 'res/'.")
+        if os.path.exists(ply_path):
+            st.markdown("### 🧩 Reconstructed Point Cloud (Interactive)")
+            fig = show_ply_interactive(ply_path)
+            st.plotly_chart(fig, use_container_width=True)
+st.header("🧠 Pix2Vox")
 uploaded_images = st.file_uploader(f"Upload images", accept_multiple_files=True, type=["png", "jpg", "jpeg"])
 # print(uploaded_images)

media/DSC_0351.JPG ADDED Viewed

Git LFS Details

SHA256: 48c7aa60596476946adaae0a6860a2b47e7de45bae2c5fef664601a7c63fd403
Pointer size: 131 Bytes
Size of remote file: 630 kB

media/WhatsApp Image 2025-04-12 at 17.40.27_1137ddf7.jpg ADDED Viewed

Git LFS Details

SHA256: dd621fd09ba7e1ccb9ab93bc7884bebc8e41156c758260ed12937068d530b87e
Pointer size: 130 Bytes
Size of remote file: 21.8 kB

media/gs.gif ADDED Viewed

Git LFS Details

SHA256: 219fd0cb5fbfcb673bd9166c33d2d79d0ebacec1281ed670e2038f54d2f7c777
Pointer size: 133 Bytes
Size of remote file: 15.3 MB

media/image.png ADDED Viewed

Git LFS Details

SHA256: 7d8ed5e5800d7dc2624ae8ba166f4ea74e59918f94710b309aa48928c5da77d2
Pointer size: 130 Bytes
Size of remote file: 97.4 kB

media/input_images.png ADDED Viewed

Git LFS Details

SHA256: 2468f156d9b707e5c0b665c30aa6b36291c19134136cdce4e924de99ac68e1a1
Pointer size: 131 Bytes
Size of remote file: 406 kB

media/pix.jpg ADDED Viewed

Git LFS Details

SHA256: 1462689fda8ee7c0df59286f5821127adb341a43f71e51bb0fc4ae4e4b863be7
Pointer size: 130 Bytes
Size of remote file: 25.7 kB

media/pix_output.jpg ADDED Viewed

Git LFS Details

SHA256: 624e8ad75b7e4b7eb144855f75e9b092cfe4903bce3b2c6f2f6a0efa08154380
Pointer size: 130 Bytes
Size of remote file: 24.7 kB

media/rotation_sfm_cam_1_2_3_4[1].gif ADDED Viewed

Git LFS Details

SHA256: f31c821cfb6db6b102000909696468226d9673274f539c90701e4ca6383b63a2
Pointer size: 132 Bytes
Size of remote file: 2.87 MB

media/shape_mesh.gif ADDED Viewed

Git LFS Details

SHA256: 855abb5675873cb9a3c9e0b0963acdd249af6bef18aef0e1b6bb9a2460807233
Pointer size: 132 Bytes
Size of remote file: 1.4 MB

media/shape_mesh.png ADDED Viewed

Git LFS Details

SHA256: 66366ee39cc2b63ed876455af0fca3c384d41158ffc066d22fafb229aa501f38
Pointer size: 131 Bytes
Size of remote file: 116 kB

media/test_image_0.png ADDED Viewed

Git LFS Details

SHA256: 2289332fa51ea3a17098dba178f24215d5938e31900bb6a776302cb07bf7a8ea
Pointer size: 131 Bytes
Size of remote file: 132 kB

media/truck_reconstruction.gif ADDED Viewed

Git LFS Details

SHA256: 5c9f3791342728595fec1dbae96fcffd95afa33e5f14d28db45a05c3e3bca5ca
Pointer size: 132 Bytes
Size of remote file: 5.33 MB

packages.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+libgl1
+libglib2.0-0
+libxrender1
+libsm6
+libxext6

ply_run.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import open3d as o3d
+from PIL import Image
+import matplotlib.pyplot as plt
+import numpy as np
+# Show PLY as image
+def show_ply_as_image(ply_path):
+    # Load point cloud
+    pcd = o3d.io.read_point_cloud(ply_path)
+    # Create visualization window (offscreen)
+    vis = o3d.visualization.Visualizer()
+    vis.create_window(visible=False)
+    vis.add_geometry(pcd)
+    # Set camera view
+    ctr = vis.get_view_control()
+    ctr.set_zoom(0.7)
+    vis.poll_events()
+    vis.update_renderer()
+    # Screenshot to numpy
+    image = vis.capture_screen_float_buffer(do_render=True)
+    vis.destroy_window()
+    # Convert to displayable image
+    img = (np.asarray(image) * 255).astype(np.uint8)
+    return Image.fromarray(img)

requirements.txt CHANGED Viewed

@@ -1,11 +1,60 @@
 argparse
 easydict
-matplotlib
-numpy
-opencv-python
-scipy
 torch
 torchvision
-streamlit
-plotly
-pillow

+attrs==25.3.0
+blinker==1.9.0
+cachetools==5.5.2
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+colorama==0.4.6
+fastjsonschema==2.21.1
+gitdb==4.0.12
+idna==3.10
+Jinja2==3.1.6
+MarkupSafe==3.0.2
+narwhals==1.34.1
+numpy==2.2.4
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+plotly==6.0.1
+prompt_toolkit==3.0.50
+protobuf==5.29.4
+pure_eval==0.2.3
+pyarrow==19.0.1
+pydeck==0.9.1
+Pygments==2.19.1
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+pytz==2025.2
+referencing==0.36.2
+requests==2.32.3
+retrying==1.3.4
+rpds-py==0.24.0
+scipy==1.15.2
+setuptools==78.1.0
+six==1.17.0
+smmap==5.0.2
+tenacity==9.1.2
+toml==0.10.2
+tomlkit==0.13.2
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+typing_extensions==4.13.2
+tzdata==2025.2
+urllib3==2.4.0
+watchdog==6.0.0
+wcwidth==0.2.13
+Werkzeug==3.0.6
+widgetsnbextension==4.0.14
+zipp==3.21.0
+vtk
+opencv-python
+opencv-contrib-python
+streamlit
+matplotlib
+open3d
 argparse
 easydict
 torch
 torchvision

sfm.py ADDED Viewed

	@@ -0,0 +1,305 @@

+import cv2
+import numpy as np
+import os
+from scipy.optimize import least_squares
+from tomlkit import boolean
+from tqdm import tqdm
+import matplotlib.pyplot as plt
+class Image_loader():
+    def __init__(self, img_dir:str, downscale_factor:float):
+        # loading the Camera intrinsic parameters K
+        with open(img_dir + '\\K.txt') as f:
+            self.K = np.array(list((map(lambda x:list(map(lambda x:float(x), x.strip().split(' '))),f.read().split('\n')))))
+            self.image_list = []
+        # Loading the set of images
+        for image in sorted(os.listdir(img_dir)):
+            if image[-4:].lower() == '.jpg' or image[-5:].lower() == '.png':
+                self.image_list.append(img_dir + '\\' + image)
+        self.path = os.getcwd()
+        self.factor = downscale_factor
+        self.downscale()
+    def downscale(self) -> None:
+        '''
+        Downscales the Image intrinsic parameter acc to the downscale factor
+        '''
+        self.K[0, 0] /= self.factor
+        self.K[1, 1] /= self.factor
+        self.K[0, 2] /= self.factor
+        self.K[1, 2] /= self.factor
+    def downscale_image(self, image):
+        for _ in range(1,int(self.factor / 2) + 1):
+            image = cv2.pyrDown(image)
+        return image
+class Sfm():
+    def __init__(self, img_dir:str, downscale_factor:float = 2.0) -> None:
+        '''
+            Initialise and Sfm object.
+        '''
+        self.img_obj = Image_loader(img_dir,downscale_factor)
+    def triangulation(self, point_2d_1, point_2d_2, projection_matrix_1, projection_matrix_2) -> tuple:
+        '''
+        Triangulates 3d points from 2d vectors and projection matrices
+        returns projection matrix of first camera, projection matrix of second camera, point cloud
+        '''
+        pt_cloud = cv2.triangulatePoints(point_2d_1, point_2d_2, projection_matrix_1.T, projection_matrix_2.T)
+        return projection_matrix_1.T, projection_matrix_2.T, (pt_cloud / pt_cloud[3])
+    def PnP(self, obj_point, image_point , K, dist_coeff, rot_vector, initial) ->  tuple:
+        '''
+        Finds an object pose from 3D-2D point correspondences using the RANSAC scheme.
+        returns rotational matrix, translational matrix, image points, object points, rotational vector
+        '''
+        if initial == 1:
+            obj_point = obj_point[:, 0 ,:]
+            image_point = image_point.T
+            rot_vector = rot_vector.T
+        _, rot_vector_calc, tran_vector, inlier = cv2.solvePnPRansac(obj_point, image_point, K, dist_coeff, cv2.SOLVEPNP_ITERATIVE)
+        # Converts a rotation matrix to a rotation vector or vice versa
+        rot_matrix, _ = cv2.Rodrigues(rot_vector_calc)
+        if inlier is not None:
+            image_point = image_point[inlier[:, 0]]
+            obj_point = obj_point[inlier[:, 0]]
+            rot_vector = rot_vector[inlier[:, 0]]
+        return rot_matrix, tran_vector, image_point, obj_point, rot_vector
+    def reprojection_error(self, obj_points, image_points, transform_matrix, K, homogenity) ->tuple:
+        '''
+        Calculates the reprojection error ie the distance between the projected points and the actual points.
+        returns total error, object points
+        '''
+        rot_matrix = transform_matrix[:3, :3]
+        tran_vector = transform_matrix[:3, 3]
+        rot_vector, _ = cv2.Rodrigues(rot_matrix)
+        if homogenity == 1:
+            obj_points = cv2.convertPointsFromHomogeneous(obj_points.T)
+        image_points_calc, _ = cv2.projectPoints(obj_points, rot_vector, tran_vector, K, None)
+        image_points_calc = np.float32(image_points_calc[:, 0, :])
+        total_error = cv2.norm(image_points_calc, np.float32(image_points.T) if homogenity == 1 else np.float32(image_points), cv2.NORM_L2)
+        return total_error / len(image_points_calc), obj_points
+    def optimal_reprojection_error(self, obj_points) -> np.array:
+        '''
+        calculates of the reprojection error during bundle adjustment
+        returns error
+        '''
+        transform_matrix = obj_points[0:12].reshape((3,4))
+        K = obj_points[12:21].reshape((3,3))
+        rest = int(len(obj_points[21:]) * 0.4)
+        p = obj_points[21:21 + rest].reshape((2, int(rest/2))).T
+        obj_points = obj_points[21 + rest:].reshape((int(len(obj_points[21 + rest:])/3), 3))
+        rot_matrix = transform_matrix[:3, :3]
+        tran_vector = transform_matrix[:3, 3]
+        rot_vector, _ = cv2.Rodrigues(rot_matrix)
+        image_points, _ = cv2.projectPoints(obj_points, rot_vector, tran_vector, K, None)
+        image_points = image_points[:, 0, :]
+        error = [ (p[idx] - image_points[idx])**2 for idx in range(len(p))]
+        return np.array(error).ravel()/len(p)
+    def bundle_adjustment(self, _3d_point, opt, transform_matrix_new, K, r_error) -> tuple:
+        '''
+        Bundle adjustment for the image and object points
+        returns object points, image points, transformation matrix
+        '''
+        opt_variables = np.hstack((transform_matrix_new.ravel(), K.ravel()))
+        opt_variables = np.hstack((opt_variables, opt.ravel()))
+        opt_variables = np.hstack((opt_variables, _3d_point.ravel()))
+        values_corrected = least_squares(self.optimal_reprojection_error, opt_variables, gtol = r_error).x
+        K = values_corrected[12:21].reshape((3,3))
+        rest = int(len(values_corrected[21:]) * 0.4)
+        return values_corrected[21 + rest:].reshape((int(len(values_corrected[21 + rest:])/3), 3)), values_corrected[21:21 + rest].reshape((2, int(rest/2))).T, values_corrected[0:12].reshape((3,4))
+    def to_ply(self, path, point_cloud, colors) -> None:
+        '''
+        Generates the .ply which can be used to open the point cloud
+        '''
+        out_points = point_cloud.reshape(-1, 3) * 200
+        out_colors = colors.reshape(-1, 3)
+        print(out_colors.shape, out_points.shape)
+        verts = np.hstack([out_points, out_colors])
+        mean = np.mean(verts[:, :3], axis=0)
+        scaled_verts = verts[:, :3] - mean
+        dist = np.sqrt(scaled_verts[:, 0] ** 2 + scaled_verts[:, 1] ** 2 + scaled_verts[:, 2] ** 2)
+        indx = np.where(dist < np.mean(dist) + 300)
+        verts = verts[indx]
+        ply_header = '''ply
+            format ascii 1.0
+            element vertex %(vert_num)d
+            property float x
+            property float y
+            property float z
+            property uchar blue
+            property uchar green
+            property uchar red
+            end_header
+            '''
+        with open(path + '\\res\\' + self.img_obj.image_list[0].split('\\')[-2] + '.ply', 'w') as f:
+            f.write(ply_header % dict(vert_num=len(verts)))
+            np.savetxt(f, verts, '%f %f %f %d %d %d')
+    def common_points(self, image_points_1, image_points_2, image_points_3) -> tuple:
+        '''
+        Finds the common points between image 1 and 2 , image 2 and 3
+        returns common points of image 1-2, common points of image 2-3, mask of common points 1-2 , mask for common points 2-3
+        '''
+        cm_points_1 = []
+        cm_points_2 = []
+        for i in range(image_points_1.shape[0]):
+            a = np.where(image_points_2 == image_points_1[i, :])
+            if a[0].size != 0:
+                cm_points_1.append(i)
+                cm_points_2.append(a[0][0])
+        mask_array_1 = np.ma.array(image_points_2, mask=False)
+        mask_array_1.mask[cm_points_2] = True
+        mask_array_1 = mask_array_1.compressed()
+        mask_array_1 = mask_array_1.reshape(int(mask_array_1.shape[0] / 2), 2)
+        mask_array_2 = np.ma.array(image_points_3, mask=False)
+        mask_array_2.mask[cm_points_2] = True
+        mask_array_2 = mask_array_2.compressed()
+        mask_array_2 = mask_array_2.reshape(int(mask_array_2.shape[0] / 2), 2)
+        print(" Shape New Array", mask_array_1.shape, mask_array_2.shape)
+        return np.array(cm_points_1), np.array(cm_points_2), mask_array_1, mask_array_2
+    def find_features(self, image_0, image_1) -> tuple:
+        '''
+        Feature detection using the sift algorithm and KNN
+        return keypoints(features) of image1 and image2
+        '''
+        sift = cv2.xfeatures2d.SIFT_create()
+        key_points_0, desc_0 = sift.detectAndCompute(cv2.cvtColor(image_0, cv2.COLOR_BGR2GRAY), None)
+        key_points_1, desc_1 = sift.detectAndCompute(cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY), None)
+        bf = cv2.BFMatcher()
+        matches = bf.knnMatch(desc_0, desc_1, k=2)
+        feature = []
+        for m, n in matches:
+            if m.distance < 0.70 * n.distance:
+                feature.append(m)
+        return np.float32([key_points_0[m.queryIdx].pt for m in feature]), np.float32([key_points_1[m.trainIdx].pt for m in feature])
+    def __call__(self, enable_bundle_adjustment:boolean=False):
+        cv2.namedWindow('image', cv2.WINDOW_NORMAL)
+        pose_array = self.img_obj.K.ravel()
+        transform_matrix_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]])
+        transform_matrix_1 = np.empty((3, 4))
+        pose_0 = np.matmul(self.img_obj.K, transform_matrix_0)
+        pose_1 = np.empty((3, 4))
+        total_points = np.zeros((1, 3))
+        total_colors = np.zeros((1, 3))
+        image_0 = self.img_obj.downscale_image(cv2.imread(self.img_obj.image_list[0]))
+        image_1 = self.img_obj.downscale_image(cv2.imread(self.img_obj.image_list[1]))
+        feature_0, feature_1 = self.find_features(image_0, image_1)
+        # Essential matrix
+        essential_matrix, em_mask = cv2.findEssentialMat(feature_0, feature_1, self.img_obj.K, method=cv2.RANSAC, prob=0.999, threshold=0.4, mask=None)
+        feature_0 = feature_0[em_mask.ravel() == 1]
+        feature_1 = feature_1[em_mask.ravel() == 1]
+        _, rot_matrix, tran_matrix, em_mask = cv2.recoverPose(essential_matrix, feature_0, feature_1, self.img_obj.K)
+        feature_0 = feature_0[em_mask.ravel() > 0]
+        feature_1 = feature_1[em_mask.ravel() > 0]
+        transform_matrix_1[:3, :3] = np.matmul(rot_matrix, transform_matrix_0[:3, :3])
+        transform_matrix_1[:3, 3] = transform_matrix_0[:3, 3] + np.matmul(transform_matrix_0[:3, :3], tran_matrix.ravel())
+        pose_1 = np.matmul(self.img_obj.K, transform_matrix_1)
+        feature_0, feature_1, points_3d = self.triangulation(pose_0, pose_1, feature_0, feature_1)
+        error, points_3d = self.reprojection_error(points_3d, feature_1, transform_matrix_1, self.img_obj.K, homogenity = 1)
+        #ideally error < 1
+        print("REPROJECTION ERROR: ", error)
+        _, _, feature_1, points_3d, _ = self.PnP(points_3d, feature_1, self.img_obj.K, np.zeros((5, 1), dtype=np.float32), feature_0, initial=1)
+        total_images = len(self.img_obj.image_list) - 2
+        pose_array = np.hstack((np.hstack((pose_array, pose_0.ravel())), pose_1.ravel()))
+        threshold = 0.5
+        for i in tqdm(range(total_images)):
+            image_2 = self.img_obj.downscale_image(cv2.imread(self.img_obj.image_list[i + 2]))
+            features_cur, features_2 = self.find_features(image_1, image_2)
+            if i != 0:
+                feature_0, feature_1, points_3d = self.triangulation(pose_0, pose_1, feature_0, feature_1)
+                feature_1 = feature_1.T
+                points_3d = cv2.convertPointsFromHomogeneous(points_3d.T)
+                points_3d = points_3d[:, 0, :]
+            cm_points_0, cm_points_1, cm_mask_0, cm_mask_1 = self.common_points(feature_1, features_cur, features_2)
+            cm_points_2 = features_2[cm_points_1]
+            cm_points_cur = features_cur[cm_points_1]
+            rot_matrix, tran_matrix, cm_points_2, points_3d, cm_points_cur = self.PnP(points_3d[cm_points_0], cm_points_2, self.img_obj.K, np.zeros((5, 1), dtype=np.float32), cm_points_cur, initial = 0)
+            transform_matrix_1 = np.hstack((rot_matrix, tran_matrix))
+            pose_2 = np.matmul(self.img_obj.K, transform_matrix_1)
+            error, points_3d = self.reprojection_error(points_3d, cm_points_2, transform_matrix_1, self.img_obj.K, homogenity = 0)
+            cm_mask_0, cm_mask_1, points_3d = self.triangulation(pose_1, pose_2, cm_mask_0, cm_mask_1)
+            error, points_3d = self.reprojection_error(points_3d, cm_mask_1, transform_matrix_1, self.img_obj.K, homogenity = 1)
+            print("Reprojection Error: ", error)
+            pose_array = np.hstack((pose_array, pose_2.ravel()))
+            # takes a long time to run
+            if enable_bundle_adjustment:
+                points_3d, cm_mask_1, transform_matrix_1 = self.bundle_adjustment(points_3d, cm_mask_1, transform_matrix_1, self.img_obj.K, threshold)
+                pose_2 = np.matmul(self.img_obj.K, transform_matrix_1)
+                error, points_3d = self.reprojection_error(points_3d, cm_mask_1, transform_matrix_1, self.img_obj.K, homogenity = 0)
+                print("Bundle Adjusted error: ",error)
+                total_points = np.vstack((total_points, points_3d))
+                points_left = np.array(cm_mask_1, dtype=np.int32)
+                color_vector = np.array([image_2[l[1], l[0]] for l in points_left])
+                total_colors = np.vstack((total_colors, color_vector))
+            else:
+                total_points = np.vstack((total_points, points_3d[:, 0, :]))
+                points_left = np.array(cm_mask_1, dtype=np.int32)
+                color_vector = np.array([image_2[l[1], l[0]] for l in points_left.T])
+                total_colors = np.vstack((total_colors, color_vector))
+            transform_matrix_0 = np.copy(transform_matrix_1)
+            pose_0 = np.copy(pose_1)
+            plt.scatter(i, error)
+            plt.pause(0.05)
+            image_0 = np.copy(image_1)
+            image_1 = np.copy(image_2)
+            feature_0 = np.copy(features_cur)
+            feature_1 = np.copy(features_2)
+            pose_1 = np.copy(pose_2)
+            cv2.imshow(self.img_obj.image_list[0].split('\\')[-2], image_2)
+            if cv2.waitKey(1) & 0xff == ord('q'):
+                break
+        cv2.destroyAllWindows()
+        print("Printing to .ply file")
+        print(total_points.shape, total_colors.shape)
+        self.to_ply(self.img_obj.path, total_points, total_colors)
+        print("Completed Exiting ...")
+        np.savetxt(self.img_obj.path + '\\res\\' + self.img_obj.image_list[0].split('\\')[-2]+'_pose_array.csv', pose_array, delimiter = '\n')
+def run_sfm(path):
+    sfm = Sfm(path)
+    sfm()

space_carving.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import scipy.io
+import numpy as np
+import cv2
+import glob
+import matplotlib.pyplot as plt
+import vtk
+def run_space_carving():
+    data = scipy.io.loadmat("uploads_spacecarving/data/dino_Ps.mat")["P"]
+    projections = [data[0, i] for i in range(data.shape[1])]
+    # === Load and preprocess images ===
+    files = sorted(glob.glob("uploads_spacecarving/data/*.ppm"))
+    images = []
+    for f in files:
+        im = cv2.imread(f, cv2.IMREAD_UNCHANGED).astype(float)
+        im /= 255
+        images.append(im[:, :, ::-1])  # BGR to RGB
+    # === Create silhouettes ===
+    imgH, imgW, _ = images[0].shape
+    silhouettes = []
+    for im in images:
+        mask = np.abs(im - [0.0, 0.0, 0.75])
+        mask = np.sum(mask, axis=2)
+        y, x = np.where(mask <= 1.1)
+        im[y, x, :] = [0.0, 0.0, 0.0]
+        im = im[:, :, 0]
+        im[im > 0] = 1.0
+        im = im.astype(np.uint8)
+        kernel = np.ones((5, 5), np.uint8)
+        im = cv2.morphologyEx(im, cv2.MORPH_OPEN, kernel)
+        silhouettes.append(im)
+    # === Create voxel grid ===
+    s = 120
+    x, y, z = np.mgrid[:s, :s, :s]
+    pts = np.vstack((x.flatten(), y.flatten(), z.flatten())).astype(float).T
+    nb_points_init = pts.shape[0]
+    # Normalize and center
+    pts[:, 0] /= np.max(pts[:, 0])
+    pts[:, 1] /= np.max(pts[:, 1])
+    pts[:, 2] /= np.max(pts[:, 2])
+    center = np.mean(pts, axis=0)
+    pts -= center
+    pts /= 5
+    pts[:, 2] -= 0.62
+    # Homogeneous coordinates
+    pts_hom = np.vstack((pts.T, np.ones((1, nb_points_init))))
+    # === Voxel carving: count silhouettes where voxel is occupied ===
+    filled = []
+    for P, sil in zip(projections, silhouettes):
+        uvs = P @ pts_hom
+        uvs /= uvs[2, :]
+        uvs = np.round(uvs).astype(int)
+        x_valid = np.logical_and(uvs[0, :] >= 0, uvs[0, :] < imgW)
+        y_valid = np.logical_and(uvs[1, :] >= 0, uvs[1, :] < imgH)
+        valid = np.logical_and(x_valid, y_valid)
+        indices = np.where(valid)[0]
+        fill = np.zeros(uvs.shape[1])
+        sub_uvs = uvs[:2, indices]
+        res = sil[sub_uvs[1, :], sub_uvs[0, :]]
+        fill[indices] = res
+        filled.append(fill)
+    filled = np.vstack(filled)
+    occupancy = np.sum(filled, axis=0)
+    # === Save voxel grid as .vtr (only the voxels with occupancy > threshold) ===
+    threshold = 25
+    occupancy_mask = (occupancy > threshold).astype(np.float32)
+    # Create grid coordinates
+    x_coords = sorted(list(set(np.round(pts[:, 0][::s*s], 6))))
+    y_coords = sorted(list(set(np.round(pts[:, 1][:s*s:s], 6))))
+    z_coords = sorted(list(set(np.round(pts[:, 2][:s], 6))))
+    x_array = vtk.vtkFloatArray()
+    y_array = vtk.vtkFloatArray()
+    z_array = vtk.vtkFloatArray()
+    for val in x_coords:
+        x_array.InsertNextValue(val)
+    for val in y_coords:
+        y_array.InsertNextValue(val)
+    for val in z_coords:
+        z_array.InsertNextValue(val)
+    # Only add occupancy values for retained voxels
+    values = vtk.vtkFloatArray()
+    values.SetName("Occupancy")
+    for i in range(len(occupancy_mask)):
+        values.InsertNextValue(occupancy_mask[i])
+    # Create rectilinear grid
+    rgrid = vtk.vtkRectilinearGrid()
+    rgrid.SetDimensions(len(x_coords), len(y_coords), len(z_coords))
+    rgrid.SetXCoordinates(x_array)
+    rgrid.SetYCoordinates(y_array)
+    rgrid.SetZCoordinates(z_array)
+    rgrid.GetPointData().SetScalars(values)
+    # Save to .vtr
+    writer = vtk.vtkXMLRectilinearGridWriter()
+    writer.SetFileName("res_space/shape.vtr")
+    writer.SetInputData(rgrid)
+    writer.Write()

temp_sfm.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4c0b988e5c327e36cc6d0c637c0267ff36bffce869a7ef5564e37655e54d62f
+size 33394335

temp_spacecarving.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:352e31109317c3ae5389c3ec5c307beca65e4ebccb1924796b1bb2a0c9b473cc
+size 31514321