jbilcke-hf HF staff commited on
Commit
eecf990
·
verified ·
1 Parent(s): ab71c41

Update gradio_app.py

Browse files
Files changed (1) hide show
  1. gradio_app.py +80 -836
gradio_app.py CHANGED
@@ -1,230 +1,43 @@
1
  import os
2
- import random
3
  import tempfile
4
- import time
5
- import zipfile
6
- from contextlib import nullcontext
7
- from functools import lru_cache
8
  from typing import Any
9
-
10
- import cv2
11
- import gradio as gr
12
- import numpy as np
13
  import torch
14
- import trimesh
15
- from gradio_litmodel3d import LitModel3D
16
- from gradio_pointcloudeditor import PointCloudEditor
17
  from PIL import Image
 
 
18
  from transparent_background import Remover
19
 
 
20
  os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
21
- os.system("pip install ./deps/pynim-0.0.3-cp310-cp310-linux_x86_64.whl")
22
-
23
  import spar3d.utils as spar3d_utils
24
- from spar3d.models.mesh import QUAD_REMESH_AVAILABLE, TRIANGLE_REMESH_AVAILABLE
25
  from spar3d.system import SPAR3D
26
 
27
- os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.environ.get("TMPDIR", "/tmp"), "gradio")
28
-
29
- bg_remover = Remover() # default setting
30
-
31
  COND_WIDTH = 512
32
  COND_HEIGHT = 512
33
  COND_DISTANCE = 2.2
34
  COND_FOVY = 0.591627
35
  BACKGROUND_COLOR = [0.5, 0.5, 0.5]
36
 
37
- # Cached. Doesn't change
38
- c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
39
- intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
40
- COND_FOVY, COND_HEIGHT, COND_WIDTH
41
- )
42
-
43
- generated_files = []
44
-
45
- # Delete previous gradio temp dir folder
46
- if os.path.exists(os.environ["GRADIO_TEMP_DIR"]):
47
- print(f"Deleting {os.environ['GRADIO_TEMP_DIR']}")
48
- import shutil
49
-
50
- shutil.rmtree(os.environ["GRADIO_TEMP_DIR"])
51
-
52
  device = spar3d_utils.get_device()
53
-
54
  model = SPAR3D.from_pretrained(
55
  "stabilityai/stable-point-aware-3d",
56
  config_name="config.yaml",
57
- weight_name="model.safetensors",
58
- )
59
- model.eval()
60
- model = model.to(device)
61
-
62
- example_files = [
63
- os.path.join("demo_files/examples", f) for f in os.listdir("demo_files/examples")
64
- ]
65
-
66
-
67
- def create_zip_file(glb_file, pc_file, illumination_file):
68
- if not all([glb_file, pc_file, illumination_file]):
69
- return None
70
-
71
- # Create a temporary zip file
72
- temp_dir = tempfile.mkdtemp()
73
- zip_path = os.path.join(temp_dir, "spar3d_output.zip")
74
-
75
- with zipfile.ZipFile(zip_path, "w") as zipf:
76
- zipf.write(glb_file, "mesh.glb")
77
- zipf.write(pc_file, "points.ply")
78
- zipf.write(illumination_file, "illumination.hdr")
79
-
80
- generated_files.append(zip_path)
81
- return zip_path
82
-
83
-
84
- def forward_model(
85
- batch,
86
- system,
87
- guidance_scale=3.0,
88
- seed=0,
89
- device="cuda",
90
- remesh_option="none",
91
- vertex_count=-1,
92
- texture_resolution=1024,
93
- ):
94
- batch_size = batch["rgb_cond"].shape[0]
95
-
96
- # prepare the condition for point cloud generation
97
- # set seed
98
- random.seed(seed)
99
- torch.manual_seed(seed)
100
- np.random.seed(seed)
101
- cond_tokens = system.forward_pdiff_cond(batch)
102
-
103
- if "pc_cond" not in batch:
104
- sample_iter = system.sampler.sample_batch_progressive(
105
- batch_size,
106
- cond_tokens,
107
- guidance_scale=guidance_scale,
108
- device=device,
109
- )
110
- for x in sample_iter:
111
- samples = x["xstart"]
112
- batch["pc_cond"] = samples.permute(0, 2, 1).float()
113
- batch["pc_cond"] = spar3d_utils.normalize_pc_bbox(batch["pc_cond"])
114
-
115
- # subsample to the 512 points
116
- batch["pc_cond"] = batch["pc_cond"][
117
- :, torch.randperm(batch["pc_cond"].shape[1])[:512]
118
- ]
119
-
120
- # get the point cloud
121
- xyz = batch["pc_cond"][0, :, :3].cpu().numpy()
122
- color_rgb = (batch["pc_cond"][0, :, 3:6] * 255).cpu().numpy().astype(np.uint8)
123
- pc_rgb_trimesh = trimesh.PointCloud(vertices=xyz, colors=color_rgb)
124
-
125
- # forward for the final mesh
126
- trimesh_mesh, _glob_dict = model.generate_mesh(
127
- batch,
128
- texture_resolution,
129
- remesh=remesh_option,
130
- vertex_count=vertex_count,
131
- estimate_illumination=True,
132
- )
133
- trimesh_mesh = trimesh_mesh[0]
134
- illumination = _glob_dict["illumination"]
135
-
136
- return trimesh_mesh, pc_rgb_trimesh, illumination.cpu().detach().numpy()[0]
137
-
138
-
139
- def run_model(
140
- input_image,
141
- guidance_scale,
142
- random_seed,
143
- pc_cond,
144
- remesh_option,
145
- vertex_count,
146
- texture_resolution,
147
- ):
148
- start = time.time()
149
- with torch.no_grad():
150
- with (
151
- torch.autocast(device_type=device, dtype=torch.bfloat16)
152
- if "cuda" in device
153
- else nullcontext()
154
- ):
155
- model_batch = create_batch(input_image)
156
- model_batch = {k: v.to(device) for k, v in model_batch.items()}
157
-
158
- if pc_cond is not None:
159
- # Check if pc_cond is a list
160
- if isinstance(pc_cond, list):
161
- cond_tensor = torch.tensor(pc_cond).float().cuda().view(-1, 6)
162
- xyz = cond_tensor[:, :3]
163
- color_rgb = cond_tensor[:, 3:]
164
- elif isinstance(pc_cond, dict):
165
- xyz = torch.tensor(pc_cond["positions"]).float().cuda()
166
- color_rgb = torch.tensor(pc_cond["colors"]).float().cuda()
167
- else:
168
- xyz = torch.tensor(pc_cond.vertices).float().cuda()
169
- color_rgb = (
170
- torch.tensor(pc_cond.colors[:, :3]).float().cuda() / 255.0
171
- )
172
- model_batch["pc_cond"] = torch.cat([xyz, color_rgb], dim=-1).unsqueeze(
173
- 0
174
- )
175
- # sub-sample the point cloud to the target number of points
176
- if model_batch["pc_cond"].shape[1] > 512:
177
- idx = torch.randperm(model_batch["pc_cond"].shape[1])[:512]
178
- model_batch["pc_cond"] = model_batch["pc_cond"][:, idx]
179
- elif model_batch["pc_cond"].shape[1] < 512:
180
- num_points = model_batch["pc_cond"].shape[1]
181
- gr.Warning(
182
- f"The uploaded point cloud should have at least 512 points. This point cloud only has {num_points}. Results may be worse."
183
- )
184
- pad = 512 - num_points
185
- sampled_idx = torch.randint(
186
- 0, model_batch["pc_cond"].shape[1], (pad,)
187
- )
188
- model_batch["pc_cond"] = torch.cat(
189
- [
190
- model_batch["pc_cond"],
191
- model_batch["pc_cond"][:, sampled_idx],
192
- ],
193
- dim=1,
194
- )
195
-
196
- trimesh_mesh, trimesh_pc, illumination_map = forward_model(
197
- model_batch,
198
- model,
199
- guidance_scale=guidance_scale,
200
- seed=random_seed,
201
- device="cuda",
202
- remesh_option=remesh_option.lower(),
203
- vertex_count=vertex_count,
204
- texture_resolution=texture_resolution,
205
- )
206
-
207
- # Create new tmp file
208
- temp_dir = tempfile.mkdtemp()
209
- tmp_file = os.path.join(temp_dir, "mesh.glb")
210
-
211
- trimesh_mesh.export(tmp_file, file_type="glb", include_normals=True)
212
- generated_files.append(tmp_file)
213
-
214
- tmp_file_pc = os.path.join(temp_dir, "points.ply")
215
- trimesh_pc.export(tmp_file_pc)
216
- generated_files.append(tmp_file_pc)
217
-
218
- tmp_file_illumination = os.path.join(temp_dir, "illumination.hdr")
219
- cv2.imwrite(tmp_file_illumination, illumination_map)
220
- generated_files.append(tmp_file_illumination)
221
-
222
- print("Generation took:", time.time() - start, "s")
223
-
224
- return tmp_file, tmp_file_pc, tmp_file_illumination, trimesh_pc
225
 
 
 
 
 
 
226
 
227
  def create_batch(input_image: Image) -> dict[str, Any]:
 
228
  img_cond = (
229
  torch.from_numpy(
230
  np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32)
@@ -238,644 +51,75 @@ def create_batch(input_image: Image) -> dict[str, Any]:
238
  torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
239
  )
240
 
241
- batch_elem = {
242
- "rgb_cond": rgb_cond,
243
- "mask_cond": mask_cond,
244
  "c2w_cond": c2w_cond.unsqueeze(0),
245
  "intrinsic_cond": intrinsic.unsqueeze(0),
246
  "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
247
  }
248
- # Add batch dim
249
- batched = {k: v.unsqueeze(0) for k, v in batch_elem.items()}
250
- return batched
251
-
252
-
253
- @lru_cache
254
- def checkerboard(squares: int, size: int, min_value: float = 0.5):
255
- base = np.zeros((squares, squares)) + min_value
256
- base[1::2, ::2] = 1
257
- base[::2, 1::2] = 1
258
-
259
- repeat_mult = size // squares
260
- return (
261
- base.repeat(repeat_mult, axis=0)
262
- .repeat(repeat_mult, axis=1)[:, :, None]
263
- .repeat(3, axis=-1)
264
- )
265
-
266
-
267
- def remove_background(input_image: Image) -> Image:
268
- return bg_remover.process(input_image.convert("RGB"))
269
-
270
-
271
- def show_mask_img(input_image: Image) -> Image:
272
- img_numpy = np.array(input_image)
273
- alpha = img_numpy[:, :, 3] / 255.0
274
- chkb = checkerboard(32, 512) * 255
275
- new_img = img_numpy[..., :3] * alpha[:, :, None] + chkb * (1 - alpha[:, :, None])
276
- return Image.fromarray(new_img.astype(np.uint8), mode="RGB")
277
-
278
-
279
- def process_model_run(
280
- background_state,
281
- guidance_scale,
282
- random_seed,
283
- pc_cond,
284
- remesh_option,
285
- vertex_count_type,
286
- vertex_count,
287
- texture_resolution,
288
- ):
289
- # Adjust vertex count based on selection
290
- final_vertex_count = (
291
- -1
292
- if vertex_count_type == "Keep Vertex Count"
293
- else (
294
- vertex_count // 2
295
- if vertex_count_type == "Target Face Count"
296
- else vertex_count
297
- )
298
- )
299
- print(
300
- f"Final vertex count: {final_vertex_count} with type {vertex_count_type} and vertex count {vertex_count}"
301
- )
302
-
303
- glb_file, pc_file, illumination_file, pc_plot = run_model(
304
- background_state,
305
- guidance_scale,
306
- random_seed,
307
- pc_cond,
308
- remesh_option,
309
- final_vertex_count,
310
- texture_resolution,
311
- )
312
- # Create a single float list of x y z r g b
313
- point_list = []
314
- for i in range(pc_plot.vertices.shape[0]):
315
- point_list.extend(
316
- [
317
- pc_plot.vertices[i, 0],
318
- pc_plot.vertices[i, 1],
319
- pc_plot.vertices[i, 2],
320
- pc_plot.colors[i, 0] / 255.0,
321
- pc_plot.colors[i, 1] / 255.0,
322
- pc_plot.colors[i, 2] / 255.0,
323
- ]
324
- )
325
-
326
- return glb_file, pc_file, illumination_file, point_list
327
-
328
-
329
- def regenerate_run(
330
- background_state,
331
- guidance_scale,
332
- random_seed,
333
- pc_cond,
334
- remesh_option,
335
- vertex_count_type,
336
- vertex_count,
337
- texture_resolution,
338
- ):
339
- glb_file, pc_file, illumination_file, point_list = process_model_run(
340
- background_state,
341
- guidance_scale,
342
- random_seed,
343
- pc_cond,
344
- remesh_option,
345
- vertex_count_type,
346
- vertex_count,
347
- texture_resolution,
348
- )
349
- zip_file = create_zip_file(glb_file, pc_file, illumination_file)
350
-
351
- return (
352
- gr.update(), # run_btn
353
- gr.update(), # img_proc_state
354
- gr.update(), # background_remove_state
355
- gr.update(), # preview_removal
356
- gr.update(value=glb_file, visible=True), # output_3d
357
- gr.update(visible=True), # hdr_row
358
- illumination_file, # hdr_file
359
- gr.update(visible=True), # point_cloud_row
360
- gr.update(value=point_list), # point_cloud_editor
361
- gr.update(value=pc_file), # pc_download
362
- gr.update(visible=False), # regenerate_btn
363
- gr.update(value=zip_file, visible=True), # download_all_btn
364
- )
365
-
366
-
367
- def run_button(
368
- run_btn,
369
- input_image,
370
- background_state,
371
- foreground_ratio,
372
- no_crop,
373
- guidance_scale,
374
- random_seed,
375
- pc_upload,
376
- pc_cond_file,
377
- remesh_option,
378
- vertex_count_type,
379
- vertex_count,
380
- texture_resolution,
381
- ):
382
- if run_btn == "Run":
383
- if torch.cuda.is_available():
384
- torch.cuda.reset_peak_memory_stats()
385
-
386
- if pc_upload:
387
- # make sure the pc_cond_file has been uploaded
388
- try:
389
- pc_cond = trimesh.load(pc_cond_file.name)
390
- except Exception:
391
- raise gr.Error(
392
- "Please upload a valid point cloud ply file as condition."
393
- )
394
- else:
395
- pc_cond = None
396
-
397
- glb_file, pc_file, illumination_file, pc_list = process_model_run(
398
- background_state,
399
- guidance_scale,
400
- random_seed,
401
- pc_cond,
402
- remesh_option,
403
- vertex_count_type,
404
- vertex_count,
405
- texture_resolution,
406
- )
407
-
408
- zip_file = create_zip_file(glb_file, pc_file, illumination_file)
409
-
410
- if torch.cuda.is_available():
411
- print("Peak Memory:", torch.cuda.max_memory_allocated() / 1024 / 1024, "MB")
412
- elif torch.backends.mps.is_available():
413
- print(
414
- "Peak Memory:", torch.mps.driver_allocated_memory() / 1024 / 1024, "MB"
415
- )
416
-
417
- return (
418
- gr.update(), # run_btn
419
- gr.update(), # img_proc_state
420
- gr.update(), # background_remove_state
421
- gr.update(), # preview_removal
422
- gr.update(value=glb_file, visible=True), # output_3d
423
- gr.update(visible=True), # hdr_row
424
- illumination_file, # hdr_file
425
- gr.update(visible=True), # point_cloud_row
426
- gr.update(value=pc_list), # point_cloud_editor
427
- gr.update(value=pc_file), # pc_download
428
- gr.update(visible=False), # regenerate_btn
429
- gr.update(value=zip_file, visible=True), # download_all_btn
430
- )
431
-
432
- elif run_btn == "Remove Background":
433
- rem_removed = remove_background(input_image)
434
-
435
- fr_res = spar3d_utils.foreground_crop(
436
- rem_removed,
437
- crop_ratio=foreground_ratio,
438
  newsize=(COND_WIDTH, COND_HEIGHT),
439
- no_crop=no_crop,
440
- )
441
-
442
- return (
443
- gr.update(value="Run", visible=True), # run_btn
444
- rem_removed, # img_proc_state,
445
- fr_res, # background_remove_state
446
- gr.update(value=show_mask_img(fr_res), visible=True), # preview_removal
447
- gr.update(value=None, visible=False), # output_3d
448
- gr.update(visible=False), # hdr_row
449
- None, # hdr_file
450
- gr.update(visible=False), # point_cloud_row
451
- gr.update(value=None), # point_cloud_editor
452
- gr.update(value=None), # pc_download
453
- gr.update(visible=False), # regenerate_btn
454
- gr.update(value=None, visible=False), # download_all_btn
455
- )
456
-
457
-
458
- def requires_bg_remove(image, fr, no_crop):
459
- if image is None:
460
- return (
461
- gr.update(visible=False, value="Run"), # run_Btn
462
- None, # img_proc_state
463
- None, # background_remove_state
464
- gr.update(value=None, visible=False), # preview_removal
465
- gr.update(value=None, visible=False), # output_3d
466
- gr.update(value=None, visible=False), # hdr_row
467
- None, # hdr_file
468
- gr.update(visible=False), # point_cloud_row
469
- gr.update(value=None), # point_cloud_editor
470
- gr.update(value=None), # pc_download
471
- gr.update(visible=False), # regenerate_btn
472
- gr.update(value=None, visible=False), # download_all_btn
473
  )
474
- alpha_channel = np.array(image.getchannel("A"))
475
- min_alpha = alpha_channel.min()
476
-
477
- if min_alpha == 0:
478
- print("Already has alpha")
479
- fr_res = spar3d_utils.foreground_crop(
480
- image, fr, newsize=(COND_WIDTH, COND_HEIGHT), no_crop=no_crop
481
- )
482
- return (
483
- gr.update(value="Run", visible=True), # run_Btn
484
- image, # img_proc_state
485
- fr_res, # background_remove_state
486
- gr.update(value=show_mask_img(fr_res), visible=True), # preview_removal
487
- gr.update(value=None, visible=False), # output_3d
488
- gr.update(visible=False), # hdr_row
489
- None, # hdr_file
490
- gr.update(visible=False), # point_cloud_row
491
- gr.update(value=None), # point_cloud_editor
492
- gr.update(value=None), # pc_download
493
- gr.update(visible=False), # regenerate_btn
494
- gr.update(value=None, visible=False), # download_all_btn
495
- )
496
- return (
497
- gr.update(value="Remove Background", visible=True), # run_Btn
498
- None, # img_proc_state
499
- None, # background_remove_state
500
- gr.update(value=None, visible=False), # preview_removal
501
- gr.update(value=None, visible=False), # output_3d
502
- gr.update(visible=False), # hdr_row
503
- None, # hdr_file
504
- gr.update(visible=False), # point_cloud_row
505
- gr.update(value=None), # point_cloud_editor
506
- gr.update(value=None), # pc_download
507
- gr.update(visible=False), # regenerate_btn
508
- gr.update(value=None, visible=False), # download_all_btn
509
- )
510
-
511
-
512
- def update_foreground_ratio(img_proc, fr, no_crop):
513
- foreground_res = spar3d_utils.foreground_crop(
514
- img_proc, fr, newsize=(COND_WIDTH, COND_HEIGHT), no_crop=no_crop
515
- )
516
- return (
517
- foreground_res,
518
- gr.update(value=show_mask_img(foreground_res)),
519
- )
520
-
521
-
522
- def update_resolution_controls(remesh_choice, vertex_count_type):
523
- show_controls = remesh_choice.lower() != "none"
524
- show_vertex_count = vertex_count_type != "Keep Vertex Count"
525
- return (
526
- gr.update(visible=show_controls), # vertex_count_type
527
- gr.update(visible=show_controls and show_vertex_count), # vertex_count_slider
528
- )
529
-
530
-
531
- with gr.Blocks() as demo:
532
- img_proc_state = gr.State()
533
- background_remove_state = gr.State()
534
- hdr_illumination_file_state = gr.State()
535
- gr.Markdown(
536
- """
537
- # SPAR3D: Stable Point-Aware Reconstruction of 3D Objects from Single Images
538
-
539
- <a href="https://arxiv.org/abs/2501.04689"><img src="https://img.shields.io/badge/Arxiv-2501.04689-B31B1B.svg"></a><a href="https://huggingface.co/stabilityai/stable-point-aware-3d"><img src="https://img.shields.io/badge/%F0%9F%A4%97%20Model_Card-Huggingface-orange"></a>
540
- <br>
541
- # [Project Page](https://spar3d.github.io/)
542
-
543
- SPAR3D is a state-of-the-art method for 3D mesh reconstruction from a single image. This demo allows you to upload an image and generate a 3D mesh model from it. A feature of SPAR3D is it generates point clouds as intermediate representation before producing the mesh. You can edit the point cloud to adjust the final mesh. We provide a simple point cloud editor in this demo, where you can drag, recolor and rescale the point clouds. If you have more advanced editing needs (e.g. box selection, duplication, local streching, etc.), you can download the point cloud and edit it in softwares such as MeshLab or Blender. The edited point cloud can then be uploaded to this demo to generate a new 3D model by checking the "Point cloud upload" box.
544
-
545
- **Tips**
546
-
547
- 1. If the image does not have a valid alpha channel, it will go through the background removal step. Our built-in background removal can be inaccurate sometimes, which will result in poor mesh quality. In such cases, you can use external background removal tools to obtain a RGBA image before uploading here.
548
- 2. You can adjust the foreground ratio to control the size of the foreground object. This may have major impact on the final mesh.
549
- 3. Guidance scale controls the strength of the image condition in the point cloud generation process. A higher value may result in higher mesh fidelity, but the variability by changing the random seed will be lower. Note that the guidance scale and the seed are not effective when the point cloud is manually uploaded.
550
- 4. Our online editor supports multi-selection by holding down the shift key. This allows you to recolor multiple points at once.
551
- 5. The editing should mainly alter the unseen parts of the object. Visible parts can be edited, but the edits should be consistent with the image. Editing the visible parts in a way that contradicts the image may result in poor mesh quality.
552
- 6. You can upload your own HDR environment map to light the 3D model.
553
- """
554
- )
555
- with gr.Row(variant="panel"):
556
- with gr.Column():
557
- with gr.Row():
558
- input_img = gr.Image(
559
- type="pil", label="Input Image", sources="upload", image_mode="RGBA"
560
- )
561
- preview_removal = gr.Image(
562
- label="Preview Background Removal",
563
- type="pil",
564
- image_mode="RGB",
565
- interactive=False,
566
- visible=False,
567
- )
568
-
569
- gr.Markdown("### Input Controls")
570
- with gr.Group():
571
- with gr.Row():
572
- no_crop = gr.Checkbox(label="No cropping", value=False)
573
- pc_upload = gr.Checkbox(label="Point cloud upload", value=False)
574
-
575
- pc_cond_file = gr.File(
576
- label="Point Cloud Upload",
577
- file_types=[".ply"],
578
- file_count="single",
579
- visible=False,
580
- )
581
-
582
- foreground_ratio = gr.Slider(
583
- label="Padding Ratio",
584
- minimum=1.0,
585
- maximum=2.0,
586
- value=1.3,
587
- step=0.05,
588
- )
589
-
590
- pc_upload.change(
591
- lambda x: gr.update(visible=x),
592
- inputs=pc_upload,
593
- outputs=[pc_cond_file],
594
- )
595
-
596
- no_crop.change(
597
- update_foreground_ratio,
598
- inputs=[img_proc_state, foreground_ratio, no_crop],
599
- outputs=[background_remove_state, preview_removal],
600
- )
601
-
602
- foreground_ratio.change(
603
- update_foreground_ratio,
604
- inputs=[img_proc_state, foreground_ratio, no_crop],
605
- outputs=[background_remove_state, preview_removal],
606
- )
607
-
608
- gr.Markdown("### Point Diffusion Controls")
609
- with gr.Group():
610
- guidance_scale = gr.Slider(
611
- label="Guidance Scale",
612
- minimum=1.0,
613
- maximum=10.0,
614
- value=3.0,
615
- step=1.0,
616
- )
617
 
618
- random_seed = gr.Slider(
619
- label="Seed",
620
- minimum=0,
621
- maximum=10000,
622
- value=0,
623
- step=1,
624
- )
625
-
626
- no_remesh = not TRIANGLE_REMESH_AVAILABLE and not QUAD_REMESH_AVAILABLE
627
- gr.Markdown(
628
- "### Texture Controls"
629
- if no_remesh
630
- else "### Meshing and Texture Controls"
631
- )
632
- with gr.Group():
633
- remesh_choices = ["None"]
634
- if TRIANGLE_REMESH_AVAILABLE:
635
- remesh_choices.append("Triangle")
636
- if QUAD_REMESH_AVAILABLE:
637
- remesh_choices.append("Quad")
638
-
639
- remesh_option = gr.Radio(
640
- choices=remesh_choices,
641
- label="Remeshing",
642
- value="None",
643
- visible=not no_remesh,
644
- )
645
-
646
- vertex_count_type = gr.Radio(
647
- choices=[
648
- "Keep Vertex Count",
649
- "Target Vertex Count",
650
- "Target Face Count",
651
- ],
652
- label="Mesh Resolution Control",
653
- value="Keep Vertex Count",
654
- visible=False,
655
- )
656
-
657
- vertex_count_slider = gr.Slider(
658
- label="Target Count",
659
- minimum=0,
660
- maximum=20000,
661
- value=2000,
662
- visible=False,
663
- )
664
-
665
- texture_size = gr.Slider(
666
- label="Texture Size",
667
- minimum=512,
668
- maximum=2048,
669
- value=1024,
670
- step=256,
671
- visible=True,
672
- )
673
-
674
- remesh_option.change(
675
- update_resolution_controls,
676
- inputs=[remesh_option, vertex_count_type],
677
- outputs=[vertex_count_type, vertex_count_slider],
678
- )
679
-
680
- vertex_count_type.change(
681
- update_resolution_controls,
682
- inputs=[remesh_option, vertex_count_type],
683
- outputs=[vertex_count_type, vertex_count_slider],
684
- )
685
-
686
- run_btn = gr.Button("Run", variant="primary", visible=False)
687
-
688
- with gr.Column():
689
- with gr.Group(visible=False) as point_cloud_row:
690
- point_size_slider = gr.Slider(
691
- label="Point Size",
692
- minimum=0.01,
693
- maximum=1.0,
694
- value=0.2,
695
- step=0.01,
696
- )
697
- point_cloud_editor = PointCloudEditor(
698
- up_axis="Z",
699
- forward_axis="X",
700
- lock_scale_z=True,
701
- lock_scale_y=True,
702
- visible=True,
703
- )
704
-
705
- pc_download = gr.File(
706
- label="Point Cloud Download",
707
- file_types=[".ply"],
708
- file_count="single",
709
- )
710
- point_size_slider.change(
711
- fn=lambda x: gr.update(point_size=x),
712
- inputs=point_size_slider,
713
- outputs=point_cloud_editor,
714
- )
715
-
716
- regenerate_btn = gr.Button(
717
- "Re-run with point cloud", variant="primary", visible=False
718
- )
719
-
720
- output_3d = LitModel3D(
721
- label="3D Model",
722
- visible=False,
723
- clear_color=[0.0, 0.0, 0.0, 0.0],
724
- tonemapping="aces",
725
- contrast=1.0,
726
- scale=1.0,
727
- )
728
- with gr.Column(visible=False, scale=1.0) as hdr_row:
729
- gr.Markdown(
730
- """## HDR Environment Map
731
-
732
- Select an HDR environment map to light the 3D model. You can also upload your own HDR environment maps.
733
- """
734
- )
735
-
736
- with gr.Row():
737
- hdr_illumination_file = gr.File(
738
- label="HDR Env Map",
739
- file_types=[".hdr"],
740
- file_count="single",
741
- )
742
- example_hdris = [
743
- os.path.join("demo_files/hdri", f)
744
- for f in os.listdir("demo_files/hdri")
745
- ]
746
- hdr_illumination_example = gr.Examples(
747
- examples=example_hdris,
748
- inputs=hdr_illumination_file,
749
- )
750
-
751
- def update_hdr_illumination_file(state, cur_update):
752
- # If the current value of hdr_illumination_file is the same as cur_update, then we don't need to update
753
- if (
754
- hdr_illumination_file.value is not None
755
- and hdr_illumination_file.value == cur_update
756
- ):
757
- return (
758
- gr.update(),
759
- gr.update(),
760
- )
761
- update_value = cur_update if cur_update is not None else state
762
- if update_value is not None:
763
- return (
764
- gr.update(value=update_value),
765
- gr.update(
766
- env_map=(
767
- update_value.name
768
- if isinstance(update_value, gr.File)
769
- else update_value
770
- )
771
- ),
772
- )
773
- return (gr.update(value=None), gr.update(env_map=None))
774
-
775
- hdr_illumination_file.change(
776
- update_hdr_illumination_file,
777
- inputs=[hdr_illumination_file_state, hdr_illumination_file],
778
- outputs=[hdr_illumination_file, output_3d],
779
- )
780
-
781
- download_all_btn = gr.File(
782
- label="Download All Files (ZIP)", file_count="single", visible=False
783
- )
784
-
785
- hdr_illumination_file_state.change(
786
- fn=lambda x: gr.update(value=x),
787
- inputs=hdr_illumination_file_state,
788
- outputs=hdr_illumination_file,
789
- )
790
-
791
- examples = gr.Examples(
792
- examples=example_files, inputs=input_img, examples_per_page=11
793
- )
794
-
795
- input_img.change(
796
- requires_bg_remove,
797
- inputs=[input_img, foreground_ratio, no_crop],
798
- outputs=[
799
- run_btn,
800
- img_proc_state,
801
- background_remove_state,
802
- preview_removal,
803
- output_3d,
804
- hdr_row,
805
- hdr_illumination_file_state,
806
- point_cloud_row,
807
- point_cloud_editor,
808
- pc_download,
809
- regenerate_btn,
810
- download_all_btn,
811
- ],
812
- )
813
-
814
- point_cloud_editor.edit(
815
- fn=lambda _x: gr.update(visible=True),
816
- inputs=point_cloud_editor,
817
- outputs=regenerate_btn,
818
- )
819
-
820
- regenerate_btn.click(
821
- regenerate_run,
822
- inputs=[
823
- background_remove_state,
824
- guidance_scale,
825
- random_seed,
826
- point_cloud_editor,
827
- remesh_option,
828
- vertex_count_type,
829
- vertex_count_slider,
830
- texture_size,
831
- ],
832
- outputs=[
833
- run_btn,
834
- img_proc_state,
835
- background_remove_state,
836
- preview_removal,
837
- output_3d,
838
- hdr_row,
839
- hdr_illumination_file_state,
840
- point_cloud_row,
841
- point_cloud_editor,
842
- pc_download,
843
- regenerate_btn,
844
- download_all_btn,
845
- ],
846
- )
847
-
848
- run_btn.click(
849
- run_button,
850
- inputs=[
851
- run_btn,
852
- input_img,
853
- background_remove_state,
854
- foreground_ratio,
855
- no_crop,
856
- guidance_scale,
857
- random_seed,
858
- pc_upload,
859
- pc_cond_file,
860
- remesh_option,
861
- vertex_count_type,
862
- vertex_count_slider,
863
- texture_size,
864
- ],
865
- outputs=[
866
- run_btn,
867
- img_proc_state,
868
- background_remove_state,
869
- preview_removal,
870
- output_3d,
871
- hdr_row,
872
- hdr_illumination_file_state,
873
- point_cloud_row,
874
- point_cloud_editor,
875
- pc_download,
876
- regenerate_btn,
877
- download_all_btn,
878
- ],
879
- )
880
 
881
- demo.queue().launch(share=False)
 
 
1
  import os
2
+ import base64
3
  import tempfile
 
 
 
 
4
  from typing import Any
 
 
 
 
5
  import torch
6
+ import numpy as np
 
 
7
  from PIL import Image
8
+ import gradio as gr
9
+ import trimesh
10
  from transparent_background import Remover
11
 
12
+ # Import and setup SPAR3D
13
  os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
 
 
14
  import spar3d.utils as spar3d_utils
 
15
  from spar3d.system import SPAR3D
16
 
17
+ # Constants
 
 
 
18
  COND_WIDTH = 512
19
  COND_HEIGHT = 512
20
  COND_DISTANCE = 2.2
21
  COND_FOVY = 0.591627
22
  BACKGROUND_COLOR = [0.5, 0.5, 0.5]
23
 
24
+ # Initialize models
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  device = spar3d_utils.get_device()
26
+ bg_remover = Remover()
27
  model = SPAR3D.from_pretrained(
28
  "stabilityai/stable-point-aware-3d",
29
  config_name="config.yaml",
30
+ weight_name="model.safetensors"
31
+ ).eval().to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # Initialize camera parameters
34
+ c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
35
+ intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
36
+ COND_FOVY, COND_HEIGHT, COND_WIDTH
37
+ )
38
 
39
  def create_batch(input_image: Image) -> dict[str, Any]:
40
+ """Prepare image batch for model input."""
41
  img_cond = (
42
  torch.from_numpy(
43
  np.asarray(input_image.resize((COND_WIDTH, COND_HEIGHT))).astype(np.float32)
 
51
  torch.tensor(BACKGROUND_COLOR)[None, None, :], img_cond[:, :, :3], mask_cond
52
  )
53
 
54
+ batch = {
55
+ "rgb_cond": rgb_cond.unsqueeze(0),
56
+ "mask_cond": mask_cond.unsqueeze(0),
57
  "c2w_cond": c2w_cond.unsqueeze(0),
58
  "intrinsic_cond": intrinsic.unsqueeze(0),
59
  "intrinsic_normed_cond": intrinsic_normed_cond.unsqueeze(0),
60
  }
61
+ return batch
62
+
63
+ def process_image(image_base64: str) -> str:
64
+ """Process image and return GLB as base64."""
65
+ try:
66
+ # Decode base64 image
67
+ image_data = base64.b64decode(image_base64)
68
+ input_image = Image.open(tempfile.SpooledTemporaryFile(suffix='.png'))
69
+ input_image.frombytes(image_data)
70
+
71
+ # Remove background if needed
72
+ if input_image.mode != 'RGBA':
73
+ input_image = bg_remover.process(input_image.convert("RGB"))
74
+
75
+ # Auto crop
76
+ input_image = spar3d_utils.foreground_crop(
77
+ input_image,
78
+ crop_ratio=1.3, # Default padding ratio
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  newsize=(COND_WIDTH, COND_HEIGHT),
80
+ no_crop=False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ # Prepare batch
84
+ batch = create_batch(input_image)
85
+ batch = {k: v.to(device) for k, v in batch.items()}
86
+
87
+ # Generate mesh
88
+ with torch.no_grad():
89
+ with torch.autocast(device_type=device, dtype=torch.bfloat16) if "cuda" in device else nullcontext():
90
+ trimesh_mesh, _ = model.generate_mesh(
91
+ batch,
92
+ texture_resolution=1024,
93
+ remesh="none",
94
+ vertex_count=-1,
95
+ estimate_illumination=False
96
+ )
97
+ trimesh_mesh = trimesh_mesh[0]
98
+
99
+ # Export to GLB
100
+ temp_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
101
+ trimesh_mesh.export(temp_file.name, file_type="glb", include_normals=True)
102
+
103
+ # Convert to base64
104
+ with open(temp_file.name, 'rb') as f:
105
+ glb_base64 = base64.b64encode(f.read()).decode('utf-8')
106
+
107
+ # Cleanup
108
+ os.unlink(temp_file.name)
109
+
110
+ return glb_base64
111
+
112
+ except Exception as e:
113
+ return str(e)
114
+
115
+ # Create Gradio interface
116
+ demo = gr.Interface(
117
+ fn=process_image,
118
+ inputs=gr.Text(label="Base64 Image"),
119
+ outputs=gr.Text(label="Base64 GLB"),
120
+ title="SPAR3D Image to GLB Converter",
121
+ description="Upload a base64-encoded image and get back a base64-encoded GLB file"
122
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ if __name__ == "__main__":
125
+ demo.launch(share=False)