eliphatfs
commited on
Commit
·
50b3aa3
1
Parent(s):
4dbdaec
Update support library.
Browse files- openshape/demo/caption.py +1 -0
- openshape/demo/classification.py +9 -0
- openshape/demo/misc_utils.py +1 -1
- openshape/demo/sd_pc2img.py +12 -4
openshape/demo/caption.py
CHANGED
|
@@ -148,6 +148,7 @@ def pc_caption(pc_encoder: torch.nn.Module, pc, cond_scale):
|
|
| 148 |
ref_dev = next(pc_encoder.parameters()).device
|
| 149 |
prefix = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
|
| 150 |
prefix = prefix.float() * cond_scale
|
|
|
|
| 151 |
prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
|
| 152 |
text = generate2(model, tokenizer, embed=prefix_embed)
|
| 153 |
return text
|
|
|
|
| 148 |
ref_dev = next(pc_encoder.parameters()).device
|
| 149 |
prefix = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
|
| 150 |
prefix = prefix.float() * cond_scale
|
| 151 |
+
prefix = prefix.to(next(model.parameters()).device)
|
| 152 |
prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
|
| 153 |
text = generate2(model, tokenizer, embed=prefix_embed)
|
| 154 |
return text
|
openshape/demo/classification.py
CHANGED
|
@@ -11,3 +11,12 @@ def pred_lvis_sims(pc_encoder: torch.nn.Module, pc):
|
|
| 11 |
sim = torch.matmul(F.normalize(lvis.feats, dim=-1), F.normalize(enc, dim=-1).squeeze())
|
| 12 |
argsort = torch.argsort(sim, descending=True)
|
| 13 |
return OrderedDict((lvis.categories[i], sim[i]) for i in argsort if i < len(lvis.categories))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
sim = torch.matmul(F.normalize(lvis.feats, dim=-1), F.normalize(enc, dim=-1).squeeze())
|
| 12 |
argsort = torch.argsort(sim, descending=True)
|
| 13 |
return OrderedDict((lvis.categories[i], sim[i]) for i in argsort if i < len(lvis.categories))
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@torch.no_grad()
|
| 17 |
+
def pred_custom_sims(pc_encoder: torch.nn.Module, pc, cats, feats):
|
| 18 |
+
ref_dev = next(pc_encoder.parameters()).device
|
| 19 |
+
enc = pc_encoder(torch.tensor(pc[:, [0, 2, 1, 3, 4, 5]].T[None], device=ref_dev)).cpu()
|
| 20 |
+
sim = torch.matmul(F.normalize(feats, dim=-1), F.normalize(enc, dim=-1).squeeze())
|
| 21 |
+
argsort = torch.argsort(sim, descending=True)
|
| 22 |
+
return OrderedDict((cats[i], sim[i]) for i in argsort if i < len(cats))
|
openshape/demo/misc_utils.py
CHANGED
|
@@ -109,7 +109,7 @@ def input_3d_shape(key=None):
|
|
| 109 |
objaid = st.text_input("Enter an Objaverse ID", key=objaid_key)
|
| 110 |
model = st.file_uploader("Or upload a model (.glb/.obj/.ply)", key=model_key)
|
| 111 |
npy = st.file_uploader("Or upload a point cloud numpy array (.npy of Nx3 XYZ or Nx6 XYZRGB)", key=npy_key)
|
| 112 |
-
swap_yz_axes = st.
|
| 113 |
f32 = numpy.float32
|
| 114 |
|
| 115 |
def load_data(prog):
|
|
|
|
| 109 |
objaid = st.text_input("Enter an Objaverse ID", key=objaid_key)
|
| 110 |
model = st.file_uploader("Or upload a model (.glb/.obj/.ply)", key=model_key)
|
| 111 |
npy = st.file_uploader("Or upload a point cloud numpy array (.npy of Nx3 XYZ or Nx6 XYZRGB)", key=npy_key)
|
| 112 |
+
swap_yz_axes = st.radio("Gravity", ["Y is up (for most Objaverse shapes)", "Z is up"], key=swap_key) == "Z is up"
|
| 113 |
f32 = numpy.float32
|
| 114 |
|
| 115 |
def load_data(prog):
|
openshape/demo/sd_pc2img.py
CHANGED
|
@@ -13,25 +13,33 @@ class Wrapper(transformers.modeling_utils.PreTrainedModel):
|
|
| 13 |
return rst.ObjectProxy(image_embeds=x)
|
| 14 |
|
| 15 |
|
| 16 |
-
half = torch.float16 if torch.cuda.is_available() else torch.bfloat16
|
| 17 |
pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
|
| 18 |
"diffusers/stable-diffusion-2-1-unclip-i2i-l",
|
| 19 |
-
|
| 20 |
image_encoder = Wrapper()
|
| 21 |
)
|
|
|
|
|
|
|
| 22 |
if torch.cuda.is_available():
|
| 23 |
pipe = pipe.to('cuda:' + str(torch.cuda.current_device()))
|
| 24 |
pipe.enable_model_cpu_offload(torch.cuda.current_device())
|
|
|
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
@torch.no_grad()
|
| 28 |
def pc_to_image(pc_encoder: torch.nn.Module, pc, prompt, noise_level, width, height, cfg_scale, num_steps, callback):
|
| 29 |
ref_dev = next(pc_encoder.parameters()).device
|
| 30 |
enc = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
return pipe(
|
| 32 |
-
prompt=', '.join(["best quality"
|
| 33 |
negative_prompt="cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry",
|
| 34 |
-
image=
|
| 35 |
width=width, height=height,
|
| 36 |
guidance_scale=cfg_scale,
|
| 37 |
noise_level=noise_level,
|
|
|
|
| 13 |
return rst.ObjectProxy(image_embeds=x)
|
| 14 |
|
| 15 |
|
|
|
|
| 16 |
pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
|
| 17 |
"diffusers/stable-diffusion-2-1-unclip-i2i-l",
|
| 18 |
+
# variant="fp16",
|
| 19 |
image_encoder = Wrapper()
|
| 20 |
)
|
| 21 |
+
# pe = pipe.text_encoder.text_model.embeddings
|
| 22 |
+
# pe.position_ids = torch.arange(pe.position_ids.shape[-1]).expand((1, -1)).to(pe.position_ids) # workaround
|
| 23 |
if torch.cuda.is_available():
|
| 24 |
pipe = pipe.to('cuda:' + str(torch.cuda.current_device()))
|
| 25 |
pipe.enable_model_cpu_offload(torch.cuda.current_device())
|
| 26 |
+
pipe.enable_attention_slicing()
|
| 27 |
+
pipe.enable_vae_slicing()
|
| 28 |
|
| 29 |
|
| 30 |
@torch.no_grad()
|
| 31 |
def pc_to_image(pc_encoder: torch.nn.Module, pc, prompt, noise_level, width, height, cfg_scale, num_steps, callback):
|
| 32 |
ref_dev = next(pc_encoder.parameters()).device
|
| 33 |
enc = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
|
| 34 |
+
enc = torch.nn.functional.normalize(enc, dim=-1) * (768 ** 0.5) / 2
|
| 35 |
+
if torch.cuda.is_available():
|
| 36 |
+
enc = enc.to('cuda:' + str(torch.cuda.current_device()))
|
| 37 |
+
# enc = enc.type(half)
|
| 38 |
+
# with torch.autocast("cuda"):
|
| 39 |
return pipe(
|
| 40 |
+
prompt=', '.join(["best quality"] + ([prompt] if prompt else [])),
|
| 41 |
negative_prompt="cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry",
|
| 42 |
+
image=enc,
|
| 43 |
width=width, height=height,
|
| 44 |
guidance_scale=cfg_scale,
|
| 45 |
noise_level=noise_level,
|