Update
Browse files- app_image_to_3d.py +8 -17
- app_text_to_3d.py +2 -14
- model.py +29 -43
- requirements.txt +1 -1
- style.css +0 -8
app_image_to_3d.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
#!/usr/bin/env python
|
2 |
|
|
|
3 |
import shlex
|
4 |
import subprocess
|
5 |
|
@@ -11,14 +12,15 @@ from utils import randomize_seed_fn
|
|
11 |
|
12 |
|
13 |
def create_demo(model: Model) -> gr.Blocks:
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
18 |
examples = ['corgi.png']
|
19 |
|
20 |
def process_example_fn(image_path: str) -> str:
|
21 |
-
return model.run_image(image_path
|
22 |
|
23 |
with gr.Blocks() as demo:
|
24 |
with gr.Box():
|
@@ -26,7 +28,7 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
26 |
show_label=False,
|
27 |
type='filepath')
|
28 |
run_button = gr.Button('Run')
|
29 |
-
result = gr.
|
30 |
with gr.Accordion('Advanced options', open=False):
|
31 |
seed = gr.Slider(label='Seed',
|
32 |
minimum=0,
|
@@ -46,15 +48,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
46 |
maximum=100,
|
47 |
step=1,
|
48 |
value=64)
|
49 |
-
image_size = gr.Slider(label='Image size',
|
50 |
-
minimum=64,
|
51 |
-
maximum=256,
|
52 |
-
step=64,
|
53 |
-
value=128)
|
54 |
-
render_mode = gr.Dropdown(label='Render mode',
|
55 |
-
choices=['nerf', 'stf'],
|
56 |
-
value='nerf',
|
57 |
-
visible=False)
|
58 |
|
59 |
gr.Examples(examples=examples,
|
60 |
inputs=image,
|
@@ -67,8 +60,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
67 |
seed,
|
68 |
guidance_scale,
|
69 |
num_inference_steps,
|
70 |
-
image_size,
|
71 |
-
render_mode,
|
72 |
]
|
73 |
|
74 |
run_button.click(
|
|
|
1 |
#!/usr/bin/env python
|
2 |
|
3 |
+
import pathlib
|
4 |
import shlex
|
5 |
import subprocess
|
6 |
|
|
|
12 |
|
13 |
|
14 |
def create_demo(model: Model) -> gr.Blocks:
|
15 |
+
if not pathlib.Path('corgi.png').exists():
|
16 |
+
subprocess.run(
|
17 |
+
shlex.split(
|
18 |
+
'wget https://raw.githubusercontent.com/openai/shap-e/d99cedaea18e0989e340163dbaeb4b109fa9e8ec/shap_e/examples/example_data/corgi.png -O corgi.png'
|
19 |
+
))
|
20 |
examples = ['corgi.png']
|
21 |
|
22 |
def process_example_fn(image_path: str) -> str:
|
23 |
+
return model.run_image(image_path)
|
24 |
|
25 |
with gr.Blocks() as demo:
|
26 |
with gr.Box():
|
|
|
28 |
show_label=False,
|
29 |
type='filepath')
|
30 |
run_button = gr.Button('Run')
|
31 |
+
result = gr.Model3D(label='Result', show_label=False)
|
32 |
with gr.Accordion('Advanced options', open=False):
|
33 |
seed = gr.Slider(label='Seed',
|
34 |
minimum=0,
|
|
|
48 |
maximum=100,
|
49 |
step=1,
|
50 |
value=64)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
gr.Examples(examples=examples,
|
53 |
inputs=image,
|
|
|
60 |
seed,
|
61 |
guidance_scale,
|
62 |
num_inference_steps,
|
|
|
|
|
63 |
]
|
64 |
|
65 |
run_button.click(
|
app_text_to_3d.py
CHANGED
@@ -21,7 +21,7 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
21 |
]
|
22 |
|
23 |
def process_example_fn(prompt: str) -> str:
|
24 |
-
return model.run_text(prompt
|
25 |
|
26 |
with gr.Blocks() as demo:
|
27 |
with gr.Box():
|
@@ -32,7 +32,7 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
32 |
max_lines=1,
|
33 |
placeholder='Enter your prompt').style(container=False)
|
34 |
run_button = gr.Button('Run').style(full_width=False)
|
35 |
-
result = gr.
|
36 |
with gr.Accordion('Advanced options', open=False):
|
37 |
seed = gr.Slider(label='Seed',
|
38 |
minimum=0,
|
@@ -52,15 +52,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
52 |
maximum=100,
|
53 |
step=1,
|
54 |
value=64)
|
55 |
-
image_size = gr.Slider(label='Image size',
|
56 |
-
minimum=64,
|
57 |
-
maximum=256,
|
58 |
-
step=64,
|
59 |
-
value=128)
|
60 |
-
render_mode = gr.Dropdown(label='Render mode',
|
61 |
-
choices=['nerf', 'stf'],
|
62 |
-
value='nerf',
|
63 |
-
visible=False)
|
64 |
|
65 |
gr.Examples(examples=examples,
|
66 |
inputs=prompt,
|
@@ -73,8 +64,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
73 |
seed,
|
74 |
guidance_scale,
|
75 |
num_inference_steps,
|
76 |
-
image_size,
|
77 |
-
render_mode,
|
78 |
]
|
79 |
prompt.submit(
|
80 |
fn=randomize_seed_fn,
|
@@ -86,7 +75,6 @@ def create_demo(model: Model) -> gr.Blocks:
|
|
86 |
inputs=inputs,
|
87 |
outputs=result,
|
88 |
)
|
89 |
-
|
90 |
run_button.click(
|
91 |
fn=randomize_seed_fn,
|
92 |
inputs=[seed, randomize_seed],
|
|
|
21 |
]
|
22 |
|
23 |
def process_example_fn(prompt: str) -> str:
|
24 |
+
return model.run_text(prompt)
|
25 |
|
26 |
with gr.Blocks() as demo:
|
27 |
with gr.Box():
|
|
|
32 |
max_lines=1,
|
33 |
placeholder='Enter your prompt').style(container=False)
|
34 |
run_button = gr.Button('Run').style(full_width=False)
|
35 |
+
result = gr.Model3D(label='Result', show_label=False)
|
36 |
with gr.Accordion('Advanced options', open=False):
|
37 |
seed = gr.Slider(label='Seed',
|
38 |
minimum=0,
|
|
|
52 |
maximum=100,
|
53 |
step=1,
|
54 |
value=64)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
gr.Examples(examples=examples,
|
57 |
inputs=prompt,
|
|
|
64 |
seed,
|
65 |
guidance_scale,
|
66 |
num_inference_steps,
|
|
|
|
|
67 |
]
|
68 |
prompt.submit(
|
69 |
fn=randomize_seed_fn,
|
|
|
75 |
inputs=inputs,
|
76 |
outputs=result,
|
77 |
)
|
|
|
78 |
run_button.click(
|
79 |
fn=randomize_seed_fn,
|
80 |
inputs=[seed, randomize_seed],
|
model.py
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
import tempfile
|
2 |
|
3 |
-
import imageio
|
4 |
import numpy as np
|
5 |
-
import PIL.Image
|
6 |
import torch
|
|
|
7 |
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
|
8 |
from shap_e.diffusion.sample import sample_latents
|
9 |
from shap_e.models.download import load_config, load_model
|
10 |
from shap_e.models.nn.camera import (DifferentiableCameraBatch,
|
11 |
DifferentiableProjectiveCamera)
|
12 |
from shap_e.models.transmitter.base import Transmitter, VectorDecoder
|
|
|
13 |
from shap_e.util.collections import AttrDict
|
14 |
from shap_e.util.image_util import load_image
|
15 |
|
@@ -47,23 +47,20 @@ def create_pan_cameras(size: int,
|
|
47 |
)
|
48 |
|
49 |
|
50 |
-
# Copied from https://github.com/openai/shap-e/blob/
|
51 |
@torch.no_grad()
|
52 |
-
def
|
53 |
xm: Transmitter | VectorDecoder,
|
54 |
latent: torch.Tensor,
|
55 |
-
|
56 |
-
rendering_mode: str = 'stf',
|
57 |
-
):
|
58 |
decoded = xm.renderer.render_views(
|
59 |
-
AttrDict(cameras=
|
|
|
60 |
params=(xm.encoder if isinstance(xm, Transmitter) else
|
61 |
xm).bottleneck_to_params(latent[None]),
|
62 |
-
options=AttrDict(rendering_mode=
|
63 |
-
render_with_direction=False),
|
64 |
)
|
65 |
-
|
66 |
-
return [PIL.Image.fromarray(x) for x in arr]
|
67 |
|
68 |
|
69 |
class Model:
|
@@ -82,24 +79,29 @@ class Model:
|
|
82 |
self.model = load_model(model_name, device=self.device)
|
83 |
self.model_name = model_name
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
def run_text(self,
|
95 |
prompt: str,
|
96 |
seed: int = 0,
|
97 |
guidance_scale: float = 15.0,
|
98 |
-
num_steps: int = 64
|
99 |
-
output_image_size: int = 64,
|
100 |
-
render_mode: str = 'nerf') -> str:
|
101 |
self.load_model('text300M')
|
102 |
-
|
103 |
torch.manual_seed(seed)
|
104 |
|
105 |
latents = sample_latents(
|
@@ -117,27 +119,17 @@ class Model:
|
|
117 |
sigma_max=160,
|
118 |
s_churn=0,
|
119 |
)
|
120 |
-
|
121 |
-
cameras = create_pan_cameras(output_image_size, self.device)
|
122 |
-
frames = decode_latent_images(self.xm,
|
123 |
-
latents[0],
|
124 |
-
cameras,
|
125 |
-
rendering_mode=render_mode)
|
126 |
-
return self.to_video(frames)
|
127 |
|
128 |
def run_image(self,
|
129 |
image_path: str,
|
130 |
seed: int = 0,
|
131 |
guidance_scale: float = 3.0,
|
132 |
-
num_steps: int = 64
|
133 |
-
output_image_size: int = 64,
|
134 |
-
render_mode: str = 'nerf') -> str:
|
135 |
self.load_model('image300M')
|
136 |
-
|
137 |
torch.manual_seed(seed)
|
138 |
|
139 |
image = load_image(image_path)
|
140 |
-
|
141 |
latents = sample_latents(
|
142 |
batch_size=1,
|
143 |
model=self.model,
|
@@ -153,10 +145,4 @@ class Model:
|
|
153 |
sigma_max=160,
|
154 |
s_churn=0,
|
155 |
)
|
156 |
-
|
157 |
-
cameras = create_pan_cameras(output_image_size, self.device)
|
158 |
-
frames = decode_latent_images(self.xm,
|
159 |
-
latents[0],
|
160 |
-
cameras,
|
161 |
-
rendering_mode=render_mode)
|
162 |
-
return self.to_video(frames)
|
|
|
1 |
import tempfile
|
2 |
|
|
|
3 |
import numpy as np
|
|
|
4 |
import torch
|
5 |
+
import trimesh
|
6 |
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
|
7 |
from shap_e.diffusion.sample import sample_latents
|
8 |
from shap_e.models.download import load_config, load_model
|
9 |
from shap_e.models.nn.camera import (DifferentiableCameraBatch,
|
10 |
DifferentiableProjectiveCamera)
|
11 |
from shap_e.models.transmitter.base import Transmitter, VectorDecoder
|
12 |
+
from shap_e.rendering.torch_mesh import TorchMesh
|
13 |
from shap_e.util.collections import AttrDict
|
14 |
from shap_e.util.image_util import load_image
|
15 |
|
|
|
47 |
)
|
48 |
|
49 |
|
50 |
+
# Copied from https://github.com/openai/shap-e/blob/8625e7c15526d8510a2292f92165979268d0e945/shap_e/util/notebooks.py#LL64C1-L76C33
|
51 |
@torch.no_grad()
|
52 |
+
def decode_latent_mesh(
|
53 |
xm: Transmitter | VectorDecoder,
|
54 |
latent: torch.Tensor,
|
55 |
+
) -> TorchMesh:
|
|
|
|
|
56 |
decoded = xm.renderer.render_views(
|
57 |
+
AttrDict(cameras=create_pan_cameras(
|
58 |
+
2, latent.device)), # lowest resolution possible
|
59 |
params=(xm.encoder if isinstance(xm, Transmitter) else
|
60 |
xm).bottleneck_to_params(latent[None]),
|
61 |
+
options=AttrDict(rendering_mode='stf', render_with_direction=False),
|
|
|
62 |
)
|
63 |
+
return decoded.raw_meshes[0]
|
|
|
64 |
|
65 |
|
66 |
class Model:
|
|
|
79 |
self.model = load_model(model_name, device=self.device)
|
80 |
self.model_name = model_name
|
81 |
|
82 |
+
def to_glb(self, latent: torch.Tensor) -> str:
|
83 |
+
ply_path = tempfile.NamedTemporaryFile(suffix='.ply',
|
84 |
+
delete=False,
|
85 |
+
mode='w+b')
|
86 |
+
decode_latent_mesh(self.xm, latent).tri_mesh().write_ply(ply_path)
|
87 |
+
|
88 |
+
mesh = trimesh.load(ply_path.name)
|
89 |
+
rot = trimesh.transformations.rotation_matrix(-np.pi / 2, [1, 0, 0])
|
90 |
+
mesh = mesh.apply_transform(rot)
|
91 |
+
rot = trimesh.transformations.rotation_matrix(np.pi, [0, 1, 0])
|
92 |
+
mesh = mesh.apply_transform(rot)
|
93 |
+
|
94 |
+
mesh_path = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
|
95 |
+
mesh.export(mesh_path.name, file_type='glb')
|
96 |
+
|
97 |
+
return mesh_path.name
|
98 |
|
99 |
def run_text(self,
|
100 |
prompt: str,
|
101 |
seed: int = 0,
|
102 |
guidance_scale: float = 15.0,
|
103 |
+
num_steps: int = 64) -> str:
|
|
|
|
|
104 |
self.load_model('text300M')
|
|
|
105 |
torch.manual_seed(seed)
|
106 |
|
107 |
latents = sample_latents(
|
|
|
119 |
sigma_max=160,
|
120 |
s_churn=0,
|
121 |
)
|
122 |
+
return self.to_glb(latents[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
def run_image(self,
|
125 |
image_path: str,
|
126 |
seed: int = 0,
|
127 |
guidance_scale: float = 3.0,
|
128 |
+
num_steps: int = 64) -> str:
|
|
|
|
|
129 |
self.load_model('image300M')
|
|
|
130 |
torch.manual_seed(seed)
|
131 |
|
132 |
image = load_image(image_path)
|
|
|
133 |
latents = sample_latents(
|
134 |
batch_size=1,
|
135 |
model=self.model,
|
|
|
145 |
sigma_max=160,
|
146 |
s_churn=0,
|
147 |
)
|
148 |
+
return self.to_glb(latents[0])
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
git+https://github.com/openai/shap-e@8625e7c
|
2 |
gradio==3.28.3
|
3 |
-
imageio[ffmpeg]==2.28.1
|
4 |
torch==2.0.0
|
5 |
torchvision==0.15.1
|
|
|
|
1 |
git+https://github.com/openai/shap-e@8625e7c
|
2 |
gradio==3.28.3
|
|
|
3 |
torch==2.0.0
|
4 |
torchvision==0.15.1
|
5 |
+
trimesh==3.21.5
|
style.css
CHANGED
@@ -8,14 +8,6 @@ h1 {
|
|
8 |
padding-top: 1.5rem;
|
9 |
}
|
10 |
|
11 |
-
#result-1 video {
|
12 |
-
object-fit: scale-down;
|
13 |
-
}
|
14 |
-
|
15 |
-
#result-2 video {
|
16 |
-
object-fit: scale-down;
|
17 |
-
}
|
18 |
-
|
19 |
#prompt-container {
|
20 |
gap: 0;
|
21 |
}
|
|
|
8 |
padding-top: 1.5rem;
|
9 |
}
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
#prompt-container {
|
12 |
gap: 0;
|
13 |
}
|