Update support library.

Files changed (4) hide show

openshape/demo/caption.py CHANGED Viewed

@@ -148,6 +148,7 @@ def pc_caption(pc_encoder: torch.nn.Module, pc, cond_scale):
     ref_dev = next(pc_encoder.parameters()).device
     prefix = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
     prefix = prefix.float() * cond_scale
     prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
     text = generate2(model, tokenizer, embed=prefix_embed)
     return text

     ref_dev = next(pc_encoder.parameters()).device
     prefix = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
     prefix = prefix.float() * cond_scale
+    prefix = prefix.to(next(model.parameters()).device)
     prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
     text = generate2(model, tokenizer, embed=prefix_embed)
     return text

openshape/demo/classification.py CHANGED Viewed

@@ -11,3 +11,12 @@ def pred_lvis_sims(pc_encoder: torch.nn.Module, pc):
     sim = torch.matmul(F.normalize(lvis.feats, dim=-1), F.normalize(enc, dim=-1).squeeze())
     argsort = torch.argsort(sim, descending=True)
     return OrderedDict((lvis.categories[i], sim[i]) for i in argsort if i < len(lvis.categories))

     sim = torch.matmul(F.normalize(lvis.feats, dim=-1), F.normalize(enc, dim=-1).squeeze())
     argsort = torch.argsort(sim, descending=True)
     return OrderedDict((lvis.categories[i], sim[i]) for i in argsort if i < len(lvis.categories))
+@torch.no_grad()
+def pred_custom_sims(pc_encoder: torch.nn.Module, pc, cats, feats):
+    ref_dev = next(pc_encoder.parameters()).device
+    enc = pc_encoder(torch.tensor(pc[:, [0, 2, 1, 3, 4, 5]].T[None], device=ref_dev)).cpu()
+    sim = torch.matmul(F.normalize(feats, dim=-1), F.normalize(enc, dim=-1).squeeze())
+    argsort = torch.argsort(sim, descending=True)
+    return OrderedDict((cats[i], sim[i]) for i in argsort if i < len(cats))

openshape/demo/misc_utils.py CHANGED Viewed

@@ -109,7 +109,7 @@ def input_3d_shape(key=None):
     objaid = st.text_input("Enter an Objaverse ID", key=objaid_key)
     model = st.file_uploader("Or upload a model (.glb/.obj/.ply)", key=model_key)
     npy = st.file_uploader("Or upload a point cloud numpy array (.npy of Nx3 XYZ or Nx6 XYZRGB)", key=npy_key)
-    swap_yz_axes = st.checkbox("Swap Y/Z axes of input (Y is up for OpenShape)", key=swap_key)
     f32 = numpy.float32
     def load_data(prog):

     objaid = st.text_input("Enter an Objaverse ID", key=objaid_key)
     model = st.file_uploader("Or upload a model (.glb/.obj/.ply)", key=model_key)
     npy = st.file_uploader("Or upload a point cloud numpy array (.npy of Nx3 XYZ or Nx6 XYZRGB)", key=npy_key)
+    swap_yz_axes = st.radio("Gravity", ["Y is up (for most Objaverse shapes)", "Z is up"], key=swap_key) == "Z is up"
     f32 = numpy.float32
     def load_data(prog):

openshape/demo/sd_pc2img.py CHANGED Viewed

@@ -13,25 +13,33 @@ class Wrapper(transformers.modeling_utils.PreTrainedModel):
         return rst.ObjectProxy(image_embeds=x)
-half = torch.float16 if torch.cuda.is_available() else torch.bfloat16
 pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
     "diffusers/stable-diffusion-2-1-unclip-i2i-l",
-    torch_dtype=half, variant="fp16",
     image_encoder = Wrapper()
 )
 if torch.cuda.is_available():
     pipe = pipe.to('cuda:' + str(torch.cuda.current_device()))
     pipe.enable_model_cpu_offload(torch.cuda.current_device())
 @torch.no_grad()
 def pc_to_image(pc_encoder: torch.nn.Module, pc, prompt, noise_level, width, height, cfg_scale, num_steps, callback):
     ref_dev = next(pc_encoder.parameters()).device
     enc = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
     return pipe(
-        prompt=', '.join(["best quality", "super high resolution"] + ([prompt] if prompt else [])),
         negative_prompt="cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry",
-        image=torch.nn.functional.normalize(enc, dim=-1) * (768 ** 0.5) / 2,
         width=width, height=height,
         guidance_scale=cfg_scale,
         noise_level=noise_level,

         return rst.ObjectProxy(image_embeds=x)
 pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
     "diffusers/stable-diffusion-2-1-unclip-i2i-l",
+    # variant="fp16",
     image_encoder = Wrapper()
 )
+# pe = pipe.text_encoder.text_model.embeddings
+# pe.position_ids = torch.arange(pe.position_ids.shape[-1]).expand((1, -1)).to(pe.position_ids)  # workaround
 if torch.cuda.is_available():
     pipe = pipe.to('cuda:' + str(torch.cuda.current_device()))
     pipe.enable_model_cpu_offload(torch.cuda.current_device())
+pipe.enable_attention_slicing()
+pipe.enable_vae_slicing()
 @torch.no_grad()
 def pc_to_image(pc_encoder: torch.nn.Module, pc, prompt, noise_level, width, height, cfg_scale, num_steps, callback):
     ref_dev = next(pc_encoder.parameters()).device
     enc = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
+    enc = torch.nn.functional.normalize(enc, dim=-1) * (768 ** 0.5) / 2
+    if torch.cuda.is_available():
+        enc = enc.to('cuda:' + str(torch.cuda.current_device()))
+    # enc = enc.type(half)
+    # with torch.autocast("cuda"):
     return pipe(
+        prompt=', '.join(["best quality"] + ([prompt] if prompt else [])),
         negative_prompt="cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry",
+        image=enc,
         width=width, height=height,
         guidance_scale=cfg_scale,
         noise_level=noise_level,