Lotus_Depth

App Files Files Community

ghostsInTheMachine commited on 1 day ago

Commit

693892f

•

1 Parent(s): 6a4fc34

Update infer.py

Browse files

Files changed (1) hide show

infer.py +18 -25

infer.py CHANGED Viewed

@@ -33,7 +33,7 @@ def load_models(task_name, device):
     logging.info(f"Successfully loaded pipelines from {model_g} and {model_d}.")
     return pipe_g, pipe_d
-def infer_pipe(pipe, images_batch, task_name, seed, device):
     if seed is None:
         generator = None
     else:
@@ -46,44 +46,37 @@ def infer_pipe(pipe, images_batch, task_name, seed, device):
     with torch.no_grad():
         with autocast_ctx:
-            # Convert list of images to tensor
-            images = [np.array(img.convert('RGB')).astype(np.float32) for img in images_batch]
-            test_images = torch.stack([torch.tensor(img).permute(2, 0, 1) for img in images])
-            test_images = test_images / 127.5 - 1.0
-            test_images = test_images.to(device).type(torch.float16)
-            # Ensure task_emb matches expected dimensions
-            batch_size = test_images.shape[0]
             task_emb = torch.tensor([1, 0], device=device, dtype=torch.float16).unsqueeze(0)
             task_emb = torch.cat([torch.sin(task_emb), torch.cos(task_emb)], dim=-1)
-            task_emb = task_emb.expand(batch_size, -1)
             # Run inference
-            preds = pipe(
-                rgb_in=test_images,
                 prompt='',
                 num_inference_steps=1,
                 generator=generator,
                 output_type='np',
                 timesteps=[999],
                 task_emb=task_emb,
-            ).images
-            # Post-process predictions
-            outputs = []
             if task_name == 'depth':
-                for p in preds:
-                    output_npy = p.mean(axis=-1)
-                    output_color = colorize_depth_map(output_npy)
-                    outputs.append(output_color)
             else:
-                for p in preds:
-                    output_npy = p
-                    output_color = Image.fromarray((output_npy * 255).astype(np.uint8))
-                    outputs.append(output_color)
-    return outputs
-def lotus(images_batch, task_name, seed, device, pipe_g, pipe_d):
-    output_d = infer_pipe(pipe_d, images_batch, task_name, seed, device)
     return output_d  # Only returning depth outputs for this application

     logging.info(f"Successfully loaded pipelines from {model_g} and {model_d}.")
     return pipe_g, pipe_d
+def infer_pipe(pipe, image, task_name, seed, device):
     if seed is None:
         generator = None
     else:
     with torch.no_grad():
         with autocast_ctx:
+            # Convert image to tensor
+            img = np.array(image.convert('RGB')).astype(np.float32)
+            test_image = torch.tensor(img).permute(2, 0, 1).unsqueeze(0)
+            test_image = test_image / 127.5 - 1.0
+            test_image = test_image.to(device).type(torch.float16)
+            # Create task_emb
             task_emb = torch.tensor([1, 0], device=device, dtype=torch.float16).unsqueeze(0)
             task_emb = torch.cat([torch.sin(task_emb), torch.cos(task_emb)], dim=-1)
             # Run inference
+            pred = pipe(
+                rgb_in=test_image,
                 prompt='',
                 num_inference_steps=1,
                 generator=generator,
                 output_type='np',
                 timesteps=[999],
                 task_emb=task_emb,
+            ).images[0]
+            # Post-process prediction
             if task_name == 'depth':
+                output_npy = pred.mean(axis=-1)
+                output_color = colorize_depth_map(output_npy)
             else:
+                output_npy = pred
+                output_color = Image.fromarray((output_npy * 255).astype(np.uint8))
+    return output_color
+def lotus(image, task_name, seed, device, pipe_g, pipe_d):
+    output_d = infer_pipe(pipe_d, image, task_name, seed, device)
     return output_d  # Only returning depth outputs for this application