adirathor07 commited on
Commit
9650ca8
·
1 Parent(s): 482624a

added other models demo

Browse files
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Python 3",
3
+ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
4
+ "image": "mcr.microsoft.com/devcontainers/python:1-3.11-bullseye",
5
+ "customizations": {
6
+ "codespaces": {
7
+ "openFiles": [
8
+ "README.md",
9
+ "app.py"
10
+ ]
11
+ },
12
+ "vscode": {
13
+ "settings": {},
14
+ "extensions": [
15
+ "ms-python.python",
16
+ "ms-python.vscode-pylance"
17
+ ]
18
+ }
19
+ },
20
+ "updateContentCommand": "[ -f packages.txt ] && sudo apt update && sudo apt upgrade -y && sudo xargs apt install -y <packages.txt; [ -f requirements.txt ] && pip3 install --user -r requirements.txt; pip3 install --user streamlit; echo '✅ Packages installed and Requirements met'",
21
+ "postAttachCommand": {
22
+ "server": "streamlit run app.py --server.enableCORS false --server.enableXsrfProtection false"
23
+ },
24
+ "portsAttributes": {
25
+ "8501": {
26
+ "label": "Application",
27
+ "onAutoForward": "openPreview"
28
+ }
29
+ },
30
+ "forwardPorts": [
31
+ 8501
32
+ ]
33
+ }
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  saved_model/*pth filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  saved_model/*pth filter=lfs diff=lfs merge=lfs -text
37
+ media/** filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,11 +1,257 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
2
  import numpy as np
3
  import plotly.graph_objects as go
4
- from PIL import Image
5
  from helpers import *
6
 
7
- # --- APP START ---
8
- st.title("2D → 3D Voxel Reconstruction Viewer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  uploaded_images = st.file_uploader(f"Upload images", accept_multiple_files=True, type=["png", "jpg", "jpeg"])
11
  # print(uploaded_images)
 
1
  import streamlit as st
2
+ import os
3
+ import shutil
4
+ import zipfile
5
+ from space_carving import run_space_carving
6
+ from sfm import run_sfm
7
+ import open3d as o3d
8
+ from PIL import Image
9
+ import matplotlib.pyplot as plt
10
  import numpy as np
11
  import plotly.graph_objects as go
 
12
  from helpers import *
13
 
14
+
15
+ st.set_page_config(page_title="3D Reconstruction Web App", layout="wide")
16
+ st.markdown("<div style='text-align: center;'><h1>3D Scene Reconstruction</h1></div>", unsafe_allow_html=True)
17
+ st.markdown("<div style='text-align: center;'><h2>Computer Vision Course Project</h2></div>", unsafe_allow_html=True)
18
+ st.markdown("<div style='text-align: right; font-weight:none;color : gray;'><h5>Course Instructor - Dr. Pratik Mazumdar</h5></div>", unsafe_allow_html=True)
19
+ st.markdown(
20
+ "<h4><a href='https://github.com/majisouvik26/3d-scene-reconstruction' target='_blank'>Github Link of Project</a></h4>",
21
+ unsafe_allow_html=True
22
+ )
23
+ st.header("Team Member:")
24
+ st.markdown("<div> <ul><li>Aditya Sahani(B22CS003)</li><li>Veeraraju Elluru(B22CS080)</li><li>Souvik Maji(B22CS089)</li><li>Dishit Sharma(B22CS082)</li><li>Aditya Rathor(B22AI044)</li></ul></div>", unsafe_allow_html=True)
25
+
26
+
27
+ st.header("Introduction")
28
+ st.write("3D scene reconstruction is a fundamental problem in computer vision that involves recovering the three-dimensional structure of a scene from a set of two-dimensional images. " \
29
+ "The goal is to generate an accurate and detailed representation of the real world, typically in the form of point clouds, meshes, or volumetric models. This process plays a crucial role in various applications such as robotics, augmented and virtual reality (AR/VR), autonomous navigation, and cultural heritage preservation." \
30
+ " The reconstruction pipeline often incorporates techniques like multi-view stereo, Structure from Motion (SfM), and volumetric methods like voxel carving. By leveraging image geometry, camera calibration data, and feature correspondences across views, 3D reconstruction enables machines to perceive and interpret the spatial layout of the physical world.")
31
+
32
+
33
+ st.image("https://filelist.tudelft.nl/BK/Onderzoek/Research_stories/zhaiyu.png", caption="3D Reconstruction")
34
+
35
+
36
+ st.header("Methods Used and Results")
37
+ st.subheader("1. NeRF - Neural Radiance Fields (NeRF)")
38
+
39
+ col1, col2 = st.columns(2)
40
+
41
+ with col1:
42
+ st.image("media/test_image_0.png", width=400, caption="Input Image")
43
+
44
+ with col2:
45
+ st.image("media/truck_reconstruction.gif", width=400, caption="3D Reconstruction")
46
+
47
+ st.subheader("2. Space Carving")
48
+ col1, col2 = st.columns(2)
49
+
50
+ with col1:
51
+ st.image("media/input_images.png", width=400, caption="Input Image")
52
+
53
+ with col2:
54
+ st.image("media/shape_mesh.png", width=400, caption="3D Reconstruction")
55
+
56
+ st.subheader("3. Pix2Vox")
57
+ col1, col2 = st.columns(2)
58
+
59
+ with col1:
60
+ st.image("media/pix.jpg", width=400, caption="Input Image")
61
+
62
+ with col2:
63
+ st.image("media/pix_output.jpg", width=400, caption="3D Reconstruction")
64
+
65
+
66
+ st.subheader("4. SFM Method")
67
+ col1, col2 = st.columns(2)
68
+
69
+ with col1:
70
+ st.image("media/DSC_0351.JPG", width=400, caption="Input Image")
71
+
72
+ with col2:
73
+ st.image("media/image.png", width=400, caption="3D Reconstruction")
74
+
75
+ st.subheader("5. Incremental SFM Method")
76
+ col1, col2 = st.columns(2)
77
+
78
+ with col1:
79
+ st.image("media/WhatsApp Image 2025-04-12 at 17.40.27_1137ddf7.jpg", width=400, caption="Input Image")
80
+
81
+ with col2:
82
+ st.image("media/rotation_sfm_cam_1_2_3_4[1].gif", width=400, caption="3D Reconstruction")
83
+
84
+ st.subheader("6. Gaussian Splatting Method")
85
+ # col1, col2 = st.columns(2)
86
+
87
+ # with col1:
88
+ # st.image("WhatsApp Image 2025-04-12 at 17.40.27_1137ddf7.jpg", width=400, caption="Input Image")
89
+
90
+ # with col2:
91
+ st.image("media/gs.gif", width=400, caption="3D Reconstruction")
92
+
93
+
94
+
95
+
96
+ st.header("DEMO OF MODELS")
97
+
98
+ def show_ply_interactive(ply_path):
99
+ # Load point cloud
100
+ pcd = o3d.io.read_point_cloud(ply_path)
101
+ points = np.asarray(pcd.points)
102
+
103
+ # Optional: use colors
104
+ if pcd.has_colors():
105
+ colors = np.asarray(pcd.colors)
106
+ else:
107
+ colors = np.full_like(points, fill_value=0.5) # default gray
108
+
109
+ # Create interactive plot
110
+ fig = go.Figure(data=[go.Scatter3d(
111
+ x=points[:, 0], y=points[:, 1], z=points[:, 2],
112
+ mode='markers',
113
+ marker=dict(
114
+ size=1.5,
115
+ color=colors,
116
+ opacity=0.8
117
+ )
118
+ )])
119
+
120
+ fig.update_layout(
121
+ scene=dict(
122
+ xaxis_title='X', yaxis_title='Y', zaxis_title='Z'
123
+ ),
124
+ width=800,
125
+ height=600,
126
+ margin=dict(r=10, l=10, b=10, t=10)
127
+ )
128
+
129
+ return fig
130
+
131
+ # Show PLY as image
132
+ def show_ply_as_image(ply_path):
133
+ # Load point cloud
134
+ pcd = o3d.io.read_point_cloud(ply_path)
135
+
136
+ # Create visualization window (offscreen)
137
+ vis = o3d.visualization.Visualizer()
138
+ vis.create_window(visible=False)
139
+ vis.add_geometry(pcd)
140
+
141
+ # Set camera view
142
+ ctr = vis.get_view_control()
143
+ ctr.set_zoom(0.7)
144
+
145
+ vis.poll_events()
146
+ vis.update_renderer()
147
+
148
+ # Screenshot to numpy
149
+ image = vis.capture_screen_float_buffer(do_render=True)
150
+ vis.destroy_window()
151
+
152
+ # Convert to displayable image
153
+ img = (np.asarray(image) * 255).astype(np.uint8)
154
+ return Image.fromarray(img)
155
+
156
+ # ---------- Function to extract zip ----------
157
+ def extract_zip(zip_file, extract_to):
158
+ if os.path.exists(extract_to):
159
+ shutil.rmtree(extract_to)
160
+ os.makedirs(extract_to)
161
+
162
+ with zipfile.ZipFile(zip_file, 'r') as zip_ref:
163
+ zip_ref.extractall(extract_to)
164
+
165
+ return extract_to
166
+
167
+ # ---------- SPACE CARVING ----------
168
+ st.header("📦 Space Carving")
169
+ st.markdown("""
170
+ **Space Carving** is a volumetric method that uses silhouettes from multiple views to reconstruct a 3D object by carving away inconsistent voxels.
171
+
172
+ 👉 Upload a `.zip` file containing images (JPG/PNG) from different calibrated views.
173
+ """)
174
+
175
+ sc_zip = st.file_uploader("Upload ZIP file for Space Carving", type=["zip"])
176
+
177
+ if sc_zip:
178
+ sc_extract_path = "uploads_spacecarving"
179
+ with open("temp_spacecarving.zip", "wb") as f:
180
+ f.write(sc_zip.getbuffer())
181
+
182
+ extract_zip("temp_spacecarving.zip", sc_extract_path)
183
+ st.success("Extracted images.")
184
+
185
+ if st.button("Run Space Carving Model"):
186
+ output = run_space_carving() # This should generate the .vtr file
187
+ st.success("Model ran successfully.")
188
+
189
+ # Path to generated .vtr file
190
+ vtr_path = "res_space/shape.vtr" # Update if filename differs
191
+
192
+ if os.path.exists(vtr_path):
193
+ st.markdown("### 📥 Download Space Carved VTR File")
194
+ with open(vtr_path, "rb") as f:
195
+ st.download_button(
196
+ label="Download .vtr file",
197
+ data=f,
198
+ file_name=os.path.basename(vtr_path),
199
+ mime="application/octet-stream"
200
+ )
201
+ else:
202
+ st.warning("No .vtr file found. Make sure the model ran successfully.")
203
+
204
+
205
+ # ---------- STRUCTURE FROM MOTION ----------
206
+ st.markdown("---")
207
+ st.header("📷 Structure from Motion (SfM)")
208
+ st.markdown("""
209
+ **Structure from Motion (SfM)** reconstructs 3D geometry and camera poses from a series of images.
210
+
211
+ 👉 Upload a `.zip` file containing your image dataset (JPG/PNG).
212
+ """)
213
+
214
+ sfm_zip_file = st.file_uploader("Upload ZIP file for SfM", type=["zip"])
215
+
216
+ if sfm_zip_file is not None:
217
+ zip_name = os.path.splitext(sfm_zip_file.name)[0] # 👉 'dataset.zip' → 'dataset'
218
+ sfm_extract_path = "uploads_sfm"
219
+
220
+ extract_zip(sfm_zip_file, sfm_extract_path)
221
+ st.success(f"Extracted {zip_name} dataset.")
222
+
223
+ if st.button("Run SfM Model"):
224
+ output = run_sfm(sfm_extract_path + "\\" + zip_name)
225
+ st.success("Model ran successfully.")
226
+
227
+ # Construct PLY path based on zip filename
228
+ ply_path = os.path.join("res", f"{zip_name}.ply")
229
+
230
+ if os.path.exists(ply_path):
231
+ st.markdown("### 🧩 Reconstructed Point Cloud Image")
232
+ image = show_ply_as_image(ply_path)
233
+ st.image(image, caption=f"{zip_name}.ply", use_column_width=True)
234
+
235
+ # Optional download
236
+ with open(ply_path, "rb") as f:
237
+ st.download_button(
238
+ label="📥 Download .ply file",
239
+ data=f,
240
+ file_name=f"{zip_name}.ply",
241
+ mime="application/octet-stream"
242
+ )
243
+ else:
244
+ st.warning(f"No .ply file named {zip_name}.ply found in 'res/'.")
245
+
246
+ if os.path.exists(ply_path):
247
+ st.markdown("### 🧩 Reconstructed Point Cloud (Interactive)")
248
+ fig = show_ply_interactive(ply_path)
249
+ st.plotly_chart(fig, use_container_width=True)
250
+
251
+
252
+
253
+
254
+ st.header("🧠 Pix2Vox")
255
 
256
  uploaded_images = st.file_uploader(f"Upload images", accept_multiple_files=True, type=["png", "jpg", "jpeg"])
257
  # print(uploaded_images)
media/DSC_0351.JPG ADDED

Git LFS Details

  • SHA256: 48c7aa60596476946adaae0a6860a2b47e7de45bae2c5fef664601a7c63fd403
  • Pointer size: 131 Bytes
  • Size of remote file: 630 kB
media/WhatsApp Image 2025-04-12 at 17.40.27_1137ddf7.jpg ADDED

Git LFS Details

  • SHA256: dd621fd09ba7e1ccb9ab93bc7884bebc8e41156c758260ed12937068d530b87e
  • Pointer size: 130 Bytes
  • Size of remote file: 21.8 kB
media/gs.gif ADDED

Git LFS Details

  • SHA256: 219fd0cb5fbfcb673bd9166c33d2d79d0ebacec1281ed670e2038f54d2f7c777
  • Pointer size: 133 Bytes
  • Size of remote file: 15.3 MB
media/image.png ADDED

Git LFS Details

  • SHA256: 7d8ed5e5800d7dc2624ae8ba166f4ea74e59918f94710b309aa48928c5da77d2
  • Pointer size: 130 Bytes
  • Size of remote file: 97.4 kB
media/input_images.png ADDED

Git LFS Details

  • SHA256: 2468f156d9b707e5c0b665c30aa6b36291c19134136cdce4e924de99ac68e1a1
  • Pointer size: 131 Bytes
  • Size of remote file: 406 kB
media/pix.jpg ADDED

Git LFS Details

  • SHA256: 1462689fda8ee7c0df59286f5821127adb341a43f71e51bb0fc4ae4e4b863be7
  • Pointer size: 130 Bytes
  • Size of remote file: 25.7 kB
media/pix_output.jpg ADDED

Git LFS Details

  • SHA256: 624e8ad75b7e4b7eb144855f75e9b092cfe4903bce3b2c6f2f6a0efa08154380
  • Pointer size: 130 Bytes
  • Size of remote file: 24.7 kB
media/rotation_sfm_cam_1_2_3_4[1].gif ADDED

Git LFS Details

  • SHA256: f31c821cfb6db6b102000909696468226d9673274f539c90701e4ca6383b63a2
  • Pointer size: 132 Bytes
  • Size of remote file: 2.87 MB
media/shape_mesh.gif ADDED

Git LFS Details

  • SHA256: 855abb5675873cb9a3c9e0b0963acdd249af6bef18aef0e1b6bb9a2460807233
  • Pointer size: 132 Bytes
  • Size of remote file: 1.4 MB
media/shape_mesh.png ADDED

Git LFS Details

  • SHA256: 66366ee39cc2b63ed876455af0fca3c384d41158ffc066d22fafb229aa501f38
  • Pointer size: 131 Bytes
  • Size of remote file: 116 kB
media/test_image_0.png ADDED

Git LFS Details

  • SHA256: 2289332fa51ea3a17098dba178f24215d5938e31900bb6a776302cb07bf7a8ea
  • Pointer size: 131 Bytes
  • Size of remote file: 132 kB
media/truck_reconstruction.gif ADDED

Git LFS Details

  • SHA256: 5c9f3791342728595fec1dbae96fcffd95afa33e5f14d28db45a05c3e3bca5ca
  • Pointer size: 132 Bytes
  • Size of remote file: 5.33 MB
packages.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ libgl1
2
+ libglib2.0-0
3
+ libxrender1
4
+ libsm6
5
+ libxext6
ply_run.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import open3d as o3d
2
+ from PIL import Image
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+
6
+ # Show PLY as image
7
+ def show_ply_as_image(ply_path):
8
+ # Load point cloud
9
+ pcd = o3d.io.read_point_cloud(ply_path)
10
+
11
+ # Create visualization window (offscreen)
12
+ vis = o3d.visualization.Visualizer()
13
+ vis.create_window(visible=False)
14
+ vis.add_geometry(pcd)
15
+
16
+ # Set camera view
17
+ ctr = vis.get_view_control()
18
+ ctr.set_zoom(0.7)
19
+
20
+ vis.poll_events()
21
+ vis.update_renderer()
22
+
23
+ # Screenshot to numpy
24
+ image = vis.capture_screen_float_buffer(do_render=True)
25
+ vis.destroy_window()
26
+
27
+ # Convert to displayable image
28
+ img = (np.asarray(image) * 255).astype(np.uint8)
29
+ return Image.fromarray(img)
requirements.txt CHANGED
@@ -1,11 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  argparse
2
  easydict
3
- matplotlib
4
- numpy
5
- opencv-python
6
- scipy
7
  torch
8
  torchvision
9
- streamlit
10
- plotly
11
- pillow
 
1
+ attrs==25.3.0
2
+ blinker==1.9.0
3
+ cachetools==5.5.2
4
+ certifi==2025.1.31
5
+ charset-normalizer==3.4.1
6
+ click==8.1.8
7
+ colorama==0.4.6
8
+ fastjsonschema==2.21.1
9
+ gitdb==4.0.12
10
+ idna==3.10
11
+ Jinja2==3.1.6
12
+ MarkupSafe==3.0.2
13
+ narwhals==1.34.1
14
+ numpy==2.2.4
15
+ packaging==24.2
16
+ pandas==2.2.3
17
+ pillow==11.1.0
18
+ plotly==6.0.1
19
+ prompt_toolkit==3.0.50
20
+ protobuf==5.29.4
21
+ pure_eval==0.2.3
22
+ pyarrow==19.0.1
23
+ pydeck==0.9.1
24
+ Pygments==2.19.1
25
+ pyparsing==3.2.3
26
+ python-dateutil==2.9.0.post0
27
+ pytz==2025.2
28
+ referencing==0.36.2
29
+ requests==2.32.3
30
+ retrying==1.3.4
31
+ rpds-py==0.24.0
32
+ scipy==1.15.2
33
+ setuptools==78.1.0
34
+ six==1.17.0
35
+ smmap==5.0.2
36
+ tenacity==9.1.2
37
+ toml==0.10.2
38
+ tomlkit==0.13.2
39
+ tornado==6.4.2
40
+ tqdm==4.67.1
41
+ traitlets==5.14.3
42
+ typing_extensions==4.13.2
43
+ tzdata==2025.2
44
+ urllib3==2.4.0
45
+ watchdog==6.0.0
46
+ wcwidth==0.2.13
47
+ Werkzeug==3.0.6
48
+ widgetsnbextension==4.0.14
49
+ zipp==3.21.0
50
+ vtk
51
+ opencv-python
52
+ opencv-contrib-python
53
+ streamlit
54
+ matplotlib
55
+ open3d
56
  argparse
57
  easydict
 
 
 
 
58
  torch
59
  torchvision
60
+
 
 
sfm.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import os
4
+ from scipy.optimize import least_squares
5
+ from tomlkit import boolean
6
+ from tqdm import tqdm
7
+ import matplotlib.pyplot as plt
8
+
9
+ class Image_loader():
10
+ def __init__(self, img_dir:str, downscale_factor:float):
11
+ # loading the Camera intrinsic parameters K
12
+ with open(img_dir + '\\K.txt') as f:
13
+ self.K = np.array(list((map(lambda x:list(map(lambda x:float(x), x.strip().split(' '))),f.read().split('\n')))))
14
+ self.image_list = []
15
+ # Loading the set of images
16
+ for image in sorted(os.listdir(img_dir)):
17
+ if image[-4:].lower() == '.jpg' or image[-5:].lower() == '.png':
18
+ self.image_list.append(img_dir + '\\' + image)
19
+
20
+ self.path = os.getcwd()
21
+ self.factor = downscale_factor
22
+ self.downscale()
23
+
24
+
25
+ def downscale(self) -> None:
26
+ '''
27
+ Downscales the Image intrinsic parameter acc to the downscale factor
28
+ '''
29
+ self.K[0, 0] /= self.factor
30
+ self.K[1, 1] /= self.factor
31
+ self.K[0, 2] /= self.factor
32
+ self.K[1, 2] /= self.factor
33
+
34
+ def downscale_image(self, image):
35
+ for _ in range(1,int(self.factor / 2) + 1):
36
+ image = cv2.pyrDown(image)
37
+ return image
38
+
39
+ class Sfm():
40
+ def __init__(self, img_dir:str, downscale_factor:float = 2.0) -> None:
41
+ '''
42
+ Initialise and Sfm object.
43
+ '''
44
+ self.img_obj = Image_loader(img_dir,downscale_factor)
45
+
46
+ def triangulation(self, point_2d_1, point_2d_2, projection_matrix_1, projection_matrix_2) -> tuple:
47
+ '''
48
+ Triangulates 3d points from 2d vectors and projection matrices
49
+ returns projection matrix of first camera, projection matrix of second camera, point cloud
50
+ '''
51
+ pt_cloud = cv2.triangulatePoints(point_2d_1, point_2d_2, projection_matrix_1.T, projection_matrix_2.T)
52
+ return projection_matrix_1.T, projection_matrix_2.T, (pt_cloud / pt_cloud[3])
53
+
54
+ def PnP(self, obj_point, image_point , K, dist_coeff, rot_vector, initial) -> tuple:
55
+ '''
56
+ Finds an object pose from 3D-2D point correspondences using the RANSAC scheme.
57
+ returns rotational matrix, translational matrix, image points, object points, rotational vector
58
+ '''
59
+ if initial == 1:
60
+ obj_point = obj_point[:, 0 ,:]
61
+ image_point = image_point.T
62
+ rot_vector = rot_vector.T
63
+ _, rot_vector_calc, tran_vector, inlier = cv2.solvePnPRansac(obj_point, image_point, K, dist_coeff, cv2.SOLVEPNP_ITERATIVE)
64
+ # Converts a rotation matrix to a rotation vector or vice versa
65
+ rot_matrix, _ = cv2.Rodrigues(rot_vector_calc)
66
+
67
+ if inlier is not None:
68
+ image_point = image_point[inlier[:, 0]]
69
+ obj_point = obj_point[inlier[:, 0]]
70
+ rot_vector = rot_vector[inlier[:, 0]]
71
+ return rot_matrix, tran_vector, image_point, obj_point, rot_vector
72
+
73
+ def reprojection_error(self, obj_points, image_points, transform_matrix, K, homogenity) ->tuple:
74
+ '''
75
+ Calculates the reprojection error ie the distance between the projected points and the actual points.
76
+ returns total error, object points
77
+ '''
78
+ rot_matrix = transform_matrix[:3, :3]
79
+ tran_vector = transform_matrix[:3, 3]
80
+ rot_vector, _ = cv2.Rodrigues(rot_matrix)
81
+ if homogenity == 1:
82
+ obj_points = cv2.convertPointsFromHomogeneous(obj_points.T)
83
+ image_points_calc, _ = cv2.projectPoints(obj_points, rot_vector, tran_vector, K, None)
84
+ image_points_calc = np.float32(image_points_calc[:, 0, :])
85
+ total_error = cv2.norm(image_points_calc, np.float32(image_points.T) if homogenity == 1 else np.float32(image_points), cv2.NORM_L2)
86
+ return total_error / len(image_points_calc), obj_points
87
+
88
+ def optimal_reprojection_error(self, obj_points) -> np.array:
89
+ '''
90
+ calculates of the reprojection error during bundle adjustment
91
+ returns error
92
+ '''
93
+ transform_matrix = obj_points[0:12].reshape((3,4))
94
+ K = obj_points[12:21].reshape((3,3))
95
+ rest = int(len(obj_points[21:]) * 0.4)
96
+ p = obj_points[21:21 + rest].reshape((2, int(rest/2))).T
97
+ obj_points = obj_points[21 + rest:].reshape((int(len(obj_points[21 + rest:])/3), 3))
98
+ rot_matrix = transform_matrix[:3, :3]
99
+ tran_vector = transform_matrix[:3, 3]
100
+ rot_vector, _ = cv2.Rodrigues(rot_matrix)
101
+ image_points, _ = cv2.projectPoints(obj_points, rot_vector, tran_vector, K, None)
102
+ image_points = image_points[:, 0, :]
103
+ error = [ (p[idx] - image_points[idx])**2 for idx in range(len(p))]
104
+ return np.array(error).ravel()/len(p)
105
+
106
+ def bundle_adjustment(self, _3d_point, opt, transform_matrix_new, K, r_error) -> tuple:
107
+ '''
108
+ Bundle adjustment for the image and object points
109
+ returns object points, image points, transformation matrix
110
+ '''
111
+ opt_variables = np.hstack((transform_matrix_new.ravel(), K.ravel()))
112
+ opt_variables = np.hstack((opt_variables, opt.ravel()))
113
+ opt_variables = np.hstack((opt_variables, _3d_point.ravel()))
114
+
115
+ values_corrected = least_squares(self.optimal_reprojection_error, opt_variables, gtol = r_error).x
116
+ K = values_corrected[12:21].reshape((3,3))
117
+ rest = int(len(values_corrected[21:]) * 0.4)
118
+ return values_corrected[21 + rest:].reshape((int(len(values_corrected[21 + rest:])/3), 3)), values_corrected[21:21 + rest].reshape((2, int(rest/2))).T, values_corrected[0:12].reshape((3,4))
119
+
120
+ def to_ply(self, path, point_cloud, colors) -> None:
121
+ '''
122
+ Generates the .ply which can be used to open the point cloud
123
+ '''
124
+ out_points = point_cloud.reshape(-1, 3) * 200
125
+ out_colors = colors.reshape(-1, 3)
126
+ print(out_colors.shape, out_points.shape)
127
+ verts = np.hstack([out_points, out_colors])
128
+
129
+
130
+ mean = np.mean(verts[:, :3], axis=0)
131
+ scaled_verts = verts[:, :3] - mean
132
+ dist = np.sqrt(scaled_verts[:, 0] ** 2 + scaled_verts[:, 1] ** 2 + scaled_verts[:, 2] ** 2)
133
+ indx = np.where(dist < np.mean(dist) + 300)
134
+ verts = verts[indx]
135
+ ply_header = '''ply
136
+ format ascii 1.0
137
+ element vertex %(vert_num)d
138
+ property float x
139
+ property float y
140
+ property float z
141
+ property uchar blue
142
+ property uchar green
143
+ property uchar red
144
+ end_header
145
+ '''
146
+ with open(path + '\\res\\' + self.img_obj.image_list[0].split('\\')[-2] + '.ply', 'w') as f:
147
+ f.write(ply_header % dict(vert_num=len(verts)))
148
+ np.savetxt(f, verts, '%f %f %f %d %d %d')
149
+
150
+
151
+ def common_points(self, image_points_1, image_points_2, image_points_3) -> tuple:
152
+ '''
153
+ Finds the common points between image 1 and 2 , image 2 and 3
154
+ returns common points of image 1-2, common points of image 2-3, mask of common points 1-2 , mask for common points 2-3
155
+ '''
156
+ cm_points_1 = []
157
+ cm_points_2 = []
158
+ for i in range(image_points_1.shape[0]):
159
+ a = np.where(image_points_2 == image_points_1[i, :])
160
+ if a[0].size != 0:
161
+ cm_points_1.append(i)
162
+ cm_points_2.append(a[0][0])
163
+
164
+ mask_array_1 = np.ma.array(image_points_2, mask=False)
165
+ mask_array_1.mask[cm_points_2] = True
166
+ mask_array_1 = mask_array_1.compressed()
167
+ mask_array_1 = mask_array_1.reshape(int(mask_array_1.shape[0] / 2), 2)
168
+
169
+ mask_array_2 = np.ma.array(image_points_3, mask=False)
170
+ mask_array_2.mask[cm_points_2] = True
171
+ mask_array_2 = mask_array_2.compressed()
172
+ mask_array_2 = mask_array_2.reshape(int(mask_array_2.shape[0] / 2), 2)
173
+ print(" Shape New Array", mask_array_1.shape, mask_array_2.shape)
174
+ return np.array(cm_points_1), np.array(cm_points_2), mask_array_1, mask_array_2
175
+
176
+
177
+ def find_features(self, image_0, image_1) -> tuple:
178
+ '''
179
+ Feature detection using the sift algorithm and KNN
180
+ return keypoints(features) of image1 and image2
181
+ '''
182
+
183
+ sift = cv2.xfeatures2d.SIFT_create()
184
+ key_points_0, desc_0 = sift.detectAndCompute(cv2.cvtColor(image_0, cv2.COLOR_BGR2GRAY), None)
185
+ key_points_1, desc_1 = sift.detectAndCompute(cv2.cvtColor(image_1, cv2.COLOR_BGR2GRAY), None)
186
+
187
+ bf = cv2.BFMatcher()
188
+ matches = bf.knnMatch(desc_0, desc_1, k=2)
189
+ feature = []
190
+ for m, n in matches:
191
+ if m.distance < 0.70 * n.distance:
192
+ feature.append(m)
193
+
194
+ return np.float32([key_points_0[m.queryIdx].pt for m in feature]), np.float32([key_points_1[m.trainIdx].pt for m in feature])
195
+
196
+ def __call__(self, enable_bundle_adjustment:boolean=False):
197
+ cv2.namedWindow('image', cv2.WINDOW_NORMAL)
198
+ pose_array = self.img_obj.K.ravel()
199
+ transform_matrix_0 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0]])
200
+ transform_matrix_1 = np.empty((3, 4))
201
+
202
+ pose_0 = np.matmul(self.img_obj.K, transform_matrix_0)
203
+ pose_1 = np.empty((3, 4))
204
+ total_points = np.zeros((1, 3))
205
+ total_colors = np.zeros((1, 3))
206
+
207
+ image_0 = self.img_obj.downscale_image(cv2.imread(self.img_obj.image_list[0]))
208
+ image_1 = self.img_obj.downscale_image(cv2.imread(self.img_obj.image_list[1]))
209
+
210
+ feature_0, feature_1 = self.find_features(image_0, image_1)
211
+
212
+ # Essential matrix
213
+ essential_matrix, em_mask = cv2.findEssentialMat(feature_0, feature_1, self.img_obj.K, method=cv2.RANSAC, prob=0.999, threshold=0.4, mask=None)
214
+ feature_0 = feature_0[em_mask.ravel() == 1]
215
+ feature_1 = feature_1[em_mask.ravel() == 1]
216
+
217
+
218
+ _, rot_matrix, tran_matrix, em_mask = cv2.recoverPose(essential_matrix, feature_0, feature_1, self.img_obj.K)
219
+ feature_0 = feature_0[em_mask.ravel() > 0]
220
+ feature_1 = feature_1[em_mask.ravel() > 0]
221
+ transform_matrix_1[:3, :3] = np.matmul(rot_matrix, transform_matrix_0[:3, :3])
222
+ transform_matrix_1[:3, 3] = transform_matrix_0[:3, 3] + np.matmul(transform_matrix_0[:3, :3], tran_matrix.ravel())
223
+
224
+ pose_1 = np.matmul(self.img_obj.K, transform_matrix_1)
225
+
226
+ feature_0, feature_1, points_3d = self.triangulation(pose_0, pose_1, feature_0, feature_1)
227
+ error, points_3d = self.reprojection_error(points_3d, feature_1, transform_matrix_1, self.img_obj.K, homogenity = 1)
228
+ #ideally error < 1
229
+ print("REPROJECTION ERROR: ", error)
230
+ _, _, feature_1, points_3d, _ = self.PnP(points_3d, feature_1, self.img_obj.K, np.zeros((5, 1), dtype=np.float32), feature_0, initial=1)
231
+
232
+ total_images = len(self.img_obj.image_list) - 2
233
+ pose_array = np.hstack((np.hstack((pose_array, pose_0.ravel())), pose_1.ravel()))
234
+
235
+ threshold = 0.5
236
+ for i in tqdm(range(total_images)):
237
+ image_2 = self.img_obj.downscale_image(cv2.imread(self.img_obj.image_list[i + 2]))
238
+ features_cur, features_2 = self.find_features(image_1, image_2)
239
+
240
+ if i != 0:
241
+ feature_0, feature_1, points_3d = self.triangulation(pose_0, pose_1, feature_0, feature_1)
242
+ feature_1 = feature_1.T
243
+ points_3d = cv2.convertPointsFromHomogeneous(points_3d.T)
244
+ points_3d = points_3d[:, 0, :]
245
+
246
+
247
+ cm_points_0, cm_points_1, cm_mask_0, cm_mask_1 = self.common_points(feature_1, features_cur, features_2)
248
+ cm_points_2 = features_2[cm_points_1]
249
+ cm_points_cur = features_cur[cm_points_1]
250
+
251
+ rot_matrix, tran_matrix, cm_points_2, points_3d, cm_points_cur = self.PnP(points_3d[cm_points_0], cm_points_2, self.img_obj.K, np.zeros((5, 1), dtype=np.float32), cm_points_cur, initial = 0)
252
+ transform_matrix_1 = np.hstack((rot_matrix, tran_matrix))
253
+ pose_2 = np.matmul(self.img_obj.K, transform_matrix_1)
254
+
255
+ error, points_3d = self.reprojection_error(points_3d, cm_points_2, transform_matrix_1, self.img_obj.K, homogenity = 0)
256
+
257
+
258
+ cm_mask_0, cm_mask_1, points_3d = self.triangulation(pose_1, pose_2, cm_mask_0, cm_mask_1)
259
+ error, points_3d = self.reprojection_error(points_3d, cm_mask_1, transform_matrix_1, self.img_obj.K, homogenity = 1)
260
+ print("Reprojection Error: ", error)
261
+ pose_array = np.hstack((pose_array, pose_2.ravel()))
262
+ # takes a long time to run
263
+ if enable_bundle_adjustment:
264
+ points_3d, cm_mask_1, transform_matrix_1 = self.bundle_adjustment(points_3d, cm_mask_1, transform_matrix_1, self.img_obj.K, threshold)
265
+ pose_2 = np.matmul(self.img_obj.K, transform_matrix_1)
266
+ error, points_3d = self.reprojection_error(points_3d, cm_mask_1, transform_matrix_1, self.img_obj.K, homogenity = 0)
267
+ print("Bundle Adjusted error: ",error)
268
+ total_points = np.vstack((total_points, points_3d))
269
+ points_left = np.array(cm_mask_1, dtype=np.int32)
270
+ color_vector = np.array([image_2[l[1], l[0]] for l in points_left])
271
+ total_colors = np.vstack((total_colors, color_vector))
272
+ else:
273
+ total_points = np.vstack((total_points, points_3d[:, 0, :]))
274
+ points_left = np.array(cm_mask_1, dtype=np.int32)
275
+ color_vector = np.array([image_2[l[1], l[0]] for l in points_left.T])
276
+ total_colors = np.vstack((total_colors, color_vector))
277
+
278
+
279
+
280
+ transform_matrix_0 = np.copy(transform_matrix_1)
281
+ pose_0 = np.copy(pose_1)
282
+ plt.scatter(i, error)
283
+ plt.pause(0.05)
284
+
285
+ image_0 = np.copy(image_1)
286
+ image_1 = np.copy(image_2)
287
+ feature_0 = np.copy(features_cur)
288
+ feature_1 = np.copy(features_2)
289
+ pose_1 = np.copy(pose_2)
290
+ cv2.imshow(self.img_obj.image_list[0].split('\\')[-2], image_2)
291
+ if cv2.waitKey(1) & 0xff == ord('q'):
292
+ break
293
+ cv2.destroyAllWindows()
294
+
295
+ print("Printing to .ply file")
296
+ print(total_points.shape, total_colors.shape)
297
+ self.to_ply(self.img_obj.path, total_points, total_colors)
298
+ print("Completed Exiting ...")
299
+ np.savetxt(self.img_obj.path + '\\res\\' + self.img_obj.image_list[0].split('\\')[-2]+'_pose_array.csv', pose_array, delimiter = '\n')
300
+
301
+
302
+
303
+ def run_sfm(path):
304
+ sfm = Sfm(path)
305
+ sfm()
space_carving.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import scipy.io
2
+ import numpy as np
3
+ import cv2
4
+ import glob
5
+ import matplotlib.pyplot as plt
6
+ import vtk
7
+
8
+ def run_space_carving():
9
+ data = scipy.io.loadmat("uploads_spacecarving/data/dino_Ps.mat")["P"]
10
+ projections = [data[0, i] for i in range(data.shape[1])]
11
+
12
+ # === Load and preprocess images ===
13
+ files = sorted(glob.glob("uploads_spacecarving/data/*.ppm"))
14
+ images = []
15
+ for f in files:
16
+ im = cv2.imread(f, cv2.IMREAD_UNCHANGED).astype(float)
17
+ im /= 255
18
+ images.append(im[:, :, ::-1]) # BGR to RGB
19
+
20
+ # === Create silhouettes ===
21
+ imgH, imgW, _ = images[0].shape
22
+ silhouettes = []
23
+ for im in images:
24
+ mask = np.abs(im - [0.0, 0.0, 0.75])
25
+ mask = np.sum(mask, axis=2)
26
+ y, x = np.where(mask <= 1.1)
27
+ im[y, x, :] = [0.0, 0.0, 0.0]
28
+ im = im[:, :, 0]
29
+ im[im > 0] = 1.0
30
+ im = im.astype(np.uint8)
31
+ kernel = np.ones((5, 5), np.uint8)
32
+ im = cv2.morphologyEx(im, cv2.MORPH_OPEN, kernel)
33
+ silhouettes.append(im)
34
+
35
+ # === Create voxel grid ===
36
+ s = 120
37
+ x, y, z = np.mgrid[:s, :s, :s]
38
+ pts = np.vstack((x.flatten(), y.flatten(), z.flatten())).astype(float).T
39
+ nb_points_init = pts.shape[0]
40
+
41
+ # Normalize and center
42
+ pts[:, 0] /= np.max(pts[:, 0])
43
+ pts[:, 1] /= np.max(pts[:, 1])
44
+ pts[:, 2] /= np.max(pts[:, 2])
45
+ center = np.mean(pts, axis=0)
46
+ pts -= center
47
+ pts /= 5
48
+ pts[:, 2] -= 0.62
49
+
50
+ # Homogeneous coordinates
51
+ pts_hom = np.vstack((pts.T, np.ones((1, nb_points_init))))
52
+
53
+ # === Voxel carving: count silhouettes where voxel is occupied ===
54
+ filled = []
55
+ for P, sil in zip(projections, silhouettes):
56
+ uvs = P @ pts_hom
57
+ uvs /= uvs[2, :]
58
+ uvs = np.round(uvs).astype(int)
59
+ x_valid = np.logical_and(uvs[0, :] >= 0, uvs[0, :] < imgW)
60
+ y_valid = np.logical_and(uvs[1, :] >= 0, uvs[1, :] < imgH)
61
+ valid = np.logical_and(x_valid, y_valid)
62
+ indices = np.where(valid)[0]
63
+ fill = np.zeros(uvs.shape[1])
64
+ sub_uvs = uvs[:2, indices]
65
+ res = sil[sub_uvs[1, :], sub_uvs[0, :]]
66
+ fill[indices] = res
67
+ filled.append(fill)
68
+
69
+ filled = np.vstack(filled)
70
+ occupancy = np.sum(filled, axis=0)
71
+
72
+ # === Save voxel grid as .vtr (only the voxels with occupancy > threshold) ===
73
+ threshold = 25
74
+ occupancy_mask = (occupancy > threshold).astype(np.float32)
75
+
76
+ # Create grid coordinates
77
+ x_coords = sorted(list(set(np.round(pts[:, 0][::s*s], 6))))
78
+ y_coords = sorted(list(set(np.round(pts[:, 1][:s*s:s], 6))))
79
+ z_coords = sorted(list(set(np.round(pts[:, 2][:s], 6))))
80
+
81
+ x_array = vtk.vtkFloatArray()
82
+ y_array = vtk.vtkFloatArray()
83
+ z_array = vtk.vtkFloatArray()
84
+
85
+ for val in x_coords:
86
+ x_array.InsertNextValue(val)
87
+ for val in y_coords:
88
+ y_array.InsertNextValue(val)
89
+ for val in z_coords:
90
+ z_array.InsertNextValue(val)
91
+
92
+ # Only add occupancy values for retained voxels
93
+ values = vtk.vtkFloatArray()
94
+ values.SetName("Occupancy")
95
+ for i in range(len(occupancy_mask)):
96
+ values.InsertNextValue(occupancy_mask[i])
97
+
98
+ # Create rectilinear grid
99
+ rgrid = vtk.vtkRectilinearGrid()
100
+ rgrid.SetDimensions(len(x_coords), len(y_coords), len(z_coords))
101
+ rgrid.SetXCoordinates(x_array)
102
+ rgrid.SetYCoordinates(y_array)
103
+ rgrid.SetZCoordinates(z_array)
104
+ rgrid.GetPointData().SetScalars(values)
105
+
106
+ # Save to .vtr
107
+ writer = vtk.vtkXMLRectilinearGridWriter()
108
+ writer.SetFileName("res_space/shape.vtr")
109
+ writer.SetInputData(rgrid)
110
+ writer.Write()
111
+
temp_sfm.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c0b988e5c327e36cc6d0c637c0267ff36bffce869a7ef5564e37655e54d62f
3
+ size 33394335
temp_spacecarving.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:352e31109317c3ae5389c3ec5c307beca65e4ebccb1924796b1bb2a0c9b473cc
3
+ size 31514321