sapiens-demo

Running

App Files Files Community

joselobenitezg commited on Aug 27, 2024

Commit

b05da2d

1 Parent(s): d04cd0a

add pose inference

Browse files

Files changed (3) hide show

app.py +10 -3
inference/pose.py +6 -2
load_and_test.ipynb +23 -21

app.py CHANGED Viewed

@@ -5,19 +5,26 @@ from PIL import Image
 import cv2
 import spaces
-from inference.seg import process_image_or_video
 from config import SAPIENS_LITE_MODELS_PATH
 def update_model_choices(task):
     model_choices = list(SAPIENS_LITE_MODELS_PATH[task.lower()].keys())
     return gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)
-@spaces.GPU(duration=120)
 def process_image(input_image, task, version):
     if isinstance(input_image, np.ndarray):
         input_image = Image.fromarray(input_image)
-    result = process_image_or_video(input_image, task=task.lower(), version=version)
     return result

 import cv2
 import spaces
+from inference.seg import process_image_or_video as process_seg
+from inference.pose import process_image_or_video as process_pose
 from config import SAPIENS_LITE_MODELS_PATH
 def update_model_choices(task):
     model_choices = list(SAPIENS_LITE_MODELS_PATH[task.lower()].keys())
     return gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)
+@spaces.GPU()
 def process_image(input_image, task, version):
     if isinstance(input_image, np.ndarray):
         input_image = Image.fromarray(input_image)
+    if task.lower() == 'seg':
+        result = process_seg(input_image, task=task.lower(), version=version)
+    elif task.lower() == 'pose':
+        result = process_pose(input_image, task=task.lower(), version=version)
+    else:
+        result = None
+        print(f"Tarea no soportada: {task}")
     return result

inference/pose.py CHANGED Viewed

@@ -90,6 +90,9 @@ def load_model(task, version):
     try:
         model_path = SAPIENS_LITE_MODELS_PATH[task][version]
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         model = torch.jit.load(model_path)
         model.eval()
         model.to(device)
@@ -109,6 +112,7 @@ def preprocess_image(image, input_shape):
     return img.unsqueeze(0)
 def udp_decode(heatmap, img_size, heatmap_size):
     h, w = heatmap_size
     keypoints = np.zeros((heatmap.shape[0], 2))
     keypoint_scores = np.zeros(heatmap.shape[0])
@@ -133,8 +137,6 @@ def process_image_or_video(input_data, task='pose', version='sapiens_1b'):
     if model is None or device is None:
         return None
-    input_shape = (3, 1024, 768)
     def process_frame(frame):
         if isinstance(frame, np.ndarray):
             frame = Image.fromarray(frame)
@@ -142,6 +144,8 @@ def process_image_or_video(input_data, task='pose', version='sapiens_1b'):
         if frame.mode == 'RGBA':
             frame = frame.convert('RGB')
         img = preprocess_image(frame, input_shape)
         with torch.no_grad():

     try:
         model_path = SAPIENS_LITE_MODELS_PATH[task][version]
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.allow_tf32 = True
         model = torch.jit.load(model_path)
         model.eval()
         model.to(device)
     return img.unsqueeze(0)
 def udp_decode(heatmap, img_size, heatmap_size):
+    # This is a simplified version. You might need to implement the full UDP decode logic
     h, w = heatmap_size
     keypoints = np.zeros((heatmap.shape[0], 2))
     keypoint_scores = np.zeros(heatmap.shape[0])
     if model is None or device is None:
         return None
     def process_frame(frame):
         if isinstance(frame, np.ndarray):
             frame = Image.fromarray(frame)
         if frame.mode == 'RGBA':
             frame = frame.convert('RGB')
+        input_shape = (3, frame.height, frame.width)
         img = preprocess_image(frame, input_shape)
         with torch.no_grad():

load_and_test.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -77,7 +77,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -829,7 +829,7 @@
        ")"
       ]
      },
-     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -842,7 +842,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -856,7 +856,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -871,7 +871,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -888,7 +888,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -899,7 +899,7 @@
        "<PIL.Image.Image image mode=RGB size=640x480>"
       ]
      },
-     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -917,7 +917,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -926,7 +926,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -937,7 +937,7 @@
        "<PIL.Image.Image image mode=RGB size=1024x1024>"
       ]
      },
-     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -955,7 +955,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -977,7 +977,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -2188,7 +2188,7 @@
        ")"
       ]
      },
-     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2230,7 +2230,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 67,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2292,7 +2292,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
    "metadata": {},
    "outputs": [
     {
@@ -2303,7 +2303,7 @@
        "<PIL.Image.Image image mode=RGB size=640x480>"
       ]
      },
-     "execution_count": 68,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2321,16 +2321,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 69,
    "metadata": {},
    "outputs": [],
    "source": [
     "output_pose = get_pose(resized_pil_image, model)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 70,
    "metadata": {},
    "outputs": [
     {
@@ -2341,7 +2343,7 @@
        "<PIL.Image.Image image mode=RGB size=640x480>"
       ]
      },
-     "execution_count": 70,
      "metadata": {},
      "output_type": "execute_result"
     }

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
        ")"
       ]
      },
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
        "<PIL.Image.Image image mode=RGB size=640x480>"
       ]
      },
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
        "<PIL.Image.Image image mode=RGB size=1024x1024>"
       ]
      },
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
        ")"
       ]
      },
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
        "<PIL.Image.Image image mode=RGB size=640x480>"
       ]
      },
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
+    "from PIL import Image, ImageDraw\n",
+    "\n",
     "output_pose = get_pose(resized_pil_image, model)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
        "<PIL.Image.Image image mode=RGB size=640x480>"
       ]
      },
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }