CM2000112 / internals /pipelines /pose_detector.py
jayparmr's picture
Upload folder using huggingface_hub
b71808f
raw
history blame
6.86 kB
from pathlib import Path
from typing import Optional, Union
from PIL import Image, ImageDraw
from torch import ge
from internals.util.commons import download_file, download_image, safe_index
from internals.util.config import get_root_dir
from models.pose.body import Body
class PoseDetector:
__pose_model = (
"https://comic-assets.s3.ap-south-1.amazonaws.com/models/body_pose_model.pth"
)
__loaded = False
def load(self):
if self.__loaded:
return
pose_path = Path.home() / ".cache" / self.__pose_model.split("/")[-1]
download_file(self.__pose_model, pose_path)
self.body_estimation = Body(str(pose_path))
self.__loaded = True
def transform(
self,
image: Union[str, Image.Image],
width: int,
height: int,
client_coordinates: Optional[dict],
) -> Image.Image:
"Infer pose coordinates from image, map head and body coordinates to infered ones, create pose"
self.load()
if type(image) is str:
image = download_image(image)
infer_coordinates = self.infer(image, width, height)
candidate_list = self.make_pose_from_subset(
infer_coordinates["candidate"], infer_coordinates["subset"]
)
# hard check only one person
infer_coordinates["candidate"] = candidate_list[0]
if client_coordinates and client_coordinates["candidate"]:
client_coordinates = self.resize_coordinates(
client_coordinates, 384, 384, width, height
)
infer_coordinates = self.map_coordinates(
client_coordinates, infer_coordinates
)
print(infer_coordinates)
return self.create_pose(infer_coordinates, width, height)
def resize_coordinates(
self, data: dict, ori_width, ori_height, new_width, new_height
):
points = data["candidate"]
new_points = []
if new_width > new_height:
ori_min = min(ori_width, ori_height)
new_min = min(new_width, new_height)
else:
ori_min = max(ori_width, ori_height)
new_min = max(new_width, new_height)
for _, pair in enumerate(points):
x = pair[0] * new_min / ori_min
y = pair[1] * new_min / ori_min
new_points.append([x, y])
return {"candidate": new_points, "subset": data["subset"]}
def create_pose(self, data: dict, width: int, height: int) -> Image.Image:
image = Image.new("RGBA", (width, height), "black")
draw = ImageDraw.Draw(image)
points: list = data["candidate"]
for pair in self.__pose_logical_map:
xy = safe_index(points, pair[0] - 1)
x1y1 = safe_index(points, pair[1] - 1)
if xy and x1y1:
draw.line(
(xy[0], xy[1], x1y1[0], x1y1[1]),
fill=pair[2],
width=4,
)
for i, point in enumerate(points):
x = safe_index(point, 0)
y = safe_index(point, 1)
if x and y:
draw.ellipse((x - 3, y - 3, x + 3, y + 3), fill=self.__points_color[i])
return image
def infer(self, image: Union[str, Image.Image], width, height) -> dict:
self.load()
candidate = []
subset = []
if type(image) == str:
image = download_image(image)
image = image.resize((width, height))
candidate, subset = self.body_estimation.__call__(image)
candidate = candidate.tolist()
subset = subset.tolist()
candidate = [item[:2] for item in candidate]
return {"candidate": candidate, "subset": subset}
def map_coordinates(
self, client_coordinates: dict, infer_coordinates: dict
) -> dict:
client_points = client_coordinates["candidate"]
infer_points = infer_coordinates["candidate"]
c_neck = client_points[1]
i_neck = infer_points[1]
dx = i_neck[0] - c_neck[0]
dy = i_neck[1] - c_neck[1]
# Considering client coordinates truthy and translate it to the position of infered coordinates
for i in range(len(client_points)):
point = client_points[i - 1]
infer_points[i - 1] = [point[0] + dx, point[1] + dy]
return {"candidate": infer_points, "subset": infer_coordinates["subset"]}
def make_pose_from_subset(self, candidate, subset):
"Maps pose coordinates for subset"
def make_pose_from_subset_item(candidate, subset_item):
pose = []
for j in range(18):
i = int(subset_item[j])
pose.append(
None
if i < 0 or not safe_index(candidate, i)
else list(map(lambda x: x, candidate[i]))
)
return pose
return list(
map(
lambda subset_item: make_pose_from_subset_item(candidate, subset_item),
subset,
)
)
def __convert_keypoints(self, keypoints):
return [keypoints[i] for i in self.__kim]
__kim = [0, 17, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3]
__pose_logical_map = [
[2, 3, (255, 0, 0, 153)],
[3, 4, (255, 85, 0, 153)],
[4, 5, (255, 170, 0, 153)],
[2, 6, (255, 255, 0, 153)],
[6, 7, (170, 255, 0, 153)],
[7, 8, (85, 255, 0, 153)],
[2, 9, (0, 255, 0, 153)],
[9, 10, (0, 255, 85, 153)],
[10, 11, (0, 255, 170, 153)],
[2, 12, (0, 255, 255, 153)],
[12, 13, (0, 170, 255, 153)],
[13, 14, (0, 85, 255, 153)],
[2, 1, (0, 0, 255, 153)],
[1, 15, (85, 0, 255, 153)],
[15, 17, (170, 0, 255, 153)],
[1, 16, (255, 0, 255, 153)],
[16, 18, (255, 0, 170, 153)],
]
# __pose_logical_map = [
# [1, 2, "#000099"],
# [1, 16, "#330099"],
# [1, 15, "#660099"],
# [16, 18, "#990099"],
# [15, 17, "#990066"],
# [2, 3, "#990001"],
# [2, 6, "#993301"],
# [3, 4, "#996502"],
# [4, 5, "#999900"],
# [6, 7, "#669900"],
# [7, 8, "#349900"],
# [2, 9, "#009900"],
# [2, 12, "#009999"],
# [9, 10, "#009966"],
# [10, 11, "#009966"],
# [12, 13, "#006699"],
# [13, 14, "#013399"],
# ]
__points_color = [
"#ff0000",
"#ff5600",
"#ffaa01",
"#ffff00",
"#aaff03",
"#53ff00",
"#03ff00",
"#03ff55",
"#03ffaa",
"#03ffff",
"#05aaff",
"#0055ff",
"#0000ff",
"#5500ff",
"#aa00ff",
"#ff00aa",
"#ff00ff",
"#ff0055",
]