from pathlib import Path from typing import Optional, Union from PIL import Image, ImageDraw from torch import ge from internals.util.commons import download_file, download_image, safe_index from internals.util.config import get_root_dir from models.pose.body import Body class PoseDetector: __pose_model = ( "https://comic-assets.s3.ap-south-1.amazonaws.com/models/body_pose_model.pth" ) __loaded = False def load(self): if self.__loaded: return pose_path = Path.home() / ".cache" / self.__pose_model.split("/")[-1] download_file(self.__pose_model, pose_path) self.body_estimation = Body(str(pose_path)) self.__loaded = True def transform( self, image: Union[str, Image.Image], width: int, height: int, client_coordinates: Optional[dict], ) -> Image.Image: "Infer pose coordinates from image, map head and body coordinates to infered ones, create pose" self.load() if type(image) is str: image = download_image(image) infer_coordinates = self.infer(image, width, height) candidate_list = self.make_pose_from_subset( infer_coordinates["candidate"], infer_coordinates["subset"] ) # hard check only one person infer_coordinates["candidate"] = candidate_list[0] if client_coordinates and client_coordinates["candidate"]: client_coordinates = self.resize_coordinates( client_coordinates, 384, 384, width, height ) infer_coordinates = self.map_coordinates( client_coordinates, infer_coordinates ) print(infer_coordinates) return self.create_pose(infer_coordinates, width, height) def resize_coordinates( self, data: dict, ori_width, ori_height, new_width, new_height ): points = data["candidate"] new_points = [] if new_width > new_height: ori_min = min(ori_width, ori_height) new_min = min(new_width, new_height) else: ori_min = max(ori_width, ori_height) new_min = max(new_width, new_height) for _, pair in enumerate(points): x = pair[0] * new_min / ori_min y = pair[1] * new_min / ori_min new_points.append([x, y]) return {"candidate": new_points, "subset": data["subset"]} def create_pose(self, data: dict, width: int, height: int) -> Image.Image: image = Image.new("RGBA", (width, height), "black") draw = ImageDraw.Draw(image) points: list = data["candidate"] for pair in self.__pose_logical_map: xy = safe_index(points, pair[0] - 1) x1y1 = safe_index(points, pair[1] - 1) if xy and x1y1: draw.line( (xy[0], xy[1], x1y1[0], x1y1[1]), fill=pair[2], width=4, ) for i, point in enumerate(points): x = safe_index(point, 0) y = safe_index(point, 1) if x and y: draw.ellipse((x - 3, y - 3, x + 3, y + 3), fill=self.__points_color[i]) return image def infer(self, image: Union[str, Image.Image], width, height) -> dict: self.load() candidate = [] subset = [] if type(image) == str: image = download_image(image) image = image.resize((width, height)) candidate, subset = self.body_estimation.__call__(image) candidate = candidate.tolist() subset = subset.tolist() candidate = [item[:2] for item in candidate] return {"candidate": candidate, "subset": subset} def map_coordinates( self, client_coordinates: dict, infer_coordinates: dict ) -> dict: client_points = client_coordinates["candidate"] infer_points = infer_coordinates["candidate"] c_neck = client_points[1] i_neck = infer_points[1] dx = i_neck[0] - c_neck[0] dy = i_neck[1] - c_neck[1] # Considering client coordinates truthy and translate it to the position of infered coordinates for i in range(len(client_points)): point = client_points[i - 1] infer_points[i - 1] = [point[0] + dx, point[1] + dy] return {"candidate": infer_points, "subset": infer_coordinates["subset"]} def make_pose_from_subset(self, candidate, subset): "Maps pose coordinates for subset" def make_pose_from_subset_item(candidate, subset_item): pose = [] for j in range(18): i = int(subset_item[j]) pose.append( None if i < 0 or not safe_index(candidate, i) else list(map(lambda x: x, candidate[i])) ) return pose return list( map( lambda subset_item: make_pose_from_subset_item(candidate, subset_item), subset, ) ) def __convert_keypoints(self, keypoints): return [keypoints[i] for i in self.__kim] __kim = [0, 17, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3] __pose_logical_map = [ [2, 3, (255, 0, 0, 153)], [3, 4, (255, 85, 0, 153)], [4, 5, (255, 170, 0, 153)], [2, 6, (255, 255, 0, 153)], [6, 7, (170, 255, 0, 153)], [7, 8, (85, 255, 0, 153)], [2, 9, (0, 255, 0, 153)], [9, 10, (0, 255, 85, 153)], [10, 11, (0, 255, 170, 153)], [2, 12, (0, 255, 255, 153)], [12, 13, (0, 170, 255, 153)], [13, 14, (0, 85, 255, 153)], [2, 1, (0, 0, 255, 153)], [1, 15, (85, 0, 255, 153)], [15, 17, (170, 0, 255, 153)], [1, 16, (255, 0, 255, 153)], [16, 18, (255, 0, 170, 153)], ] # __pose_logical_map = [ # [1, 2, "#000099"], # [1, 16, "#330099"], # [1, 15, "#660099"], # [16, 18, "#990099"], # [15, 17, "#990066"], # [2, 3, "#990001"], # [2, 6, "#993301"], # [3, 4, "#996502"], # [4, 5, "#999900"], # [6, 7, "#669900"], # [7, 8, "#349900"], # [2, 9, "#009900"], # [2, 12, "#009999"], # [9, 10, "#009966"], # [10, 11, "#009966"], # [12, 13, "#006699"], # [13, 14, "#013399"], # ] __points_color = [ "#ff0000", "#ff5600", "#ffaa01", "#ffff00", "#aaff03", "#53ff00", "#03ff00", "#03ff55", "#03ffaa", "#03ffff", "#05aaff", "#0055ff", "#0000ff", "#5500ff", "#aa00ff", "#ff00aa", "#ff00ff", "#ff0055", ]