|
import gradio as gr |
|
import cv2 |
|
import mediapipe as mp |
|
import numpy as np |
|
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation |
|
import torch |
|
|
|
|
|
mp_pose = mp.solutions.pose |
|
pose = mp_pose.Pose(static_image_mode=True, model_complexity=2) |
|
mp_drawing = mp.solutions.drawing_utils |
|
|
|
|
|
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") |
|
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") |
|
|
|
|
|
PART_COLORS = { |
|
"head": (0, 255, 0), |
|
"shoulders": (255, 0, 0), |
|
"upper_body": (0, 0, 255), |
|
"arms": (255, 255, 0), |
|
"lower_body": (255, 0, 255) |
|
} |
|
|
|
PART_LABELS = { |
|
"head": [0], |
|
"shoulders": [2], |
|
"upper_body": [3, 4], |
|
"arms": [5, 6], |
|
"lower_body": [7, 8] |
|
} |
|
|
|
def segment_image(image): |
|
|
|
inputs = feature_extractor(images=image, return_tensors="pt") |
|
outputs = model(**inputs) |
|
logits = outputs.logits |
|
segmentation = torch.argmax(logits, dim=1).squeeze().cpu().numpy() |
|
|
|
|
|
segmentation_resized = cv2.resize(segmentation, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST) |
|
|
|
|
|
segmented_image = np.zeros_like(image) |
|
|
|
|
|
for part, color in PART_COLORS.items(): |
|
mask = np.isin(segmentation_resized, PART_LABELS[part]) |
|
segmented_image[mask] = color |
|
|
|
return segmented_image |
|
|
|
|
|
def estimate_pose(image): |
|
|
|
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) |
|
|
|
results = pose.process(image_rgb) |
|
|
|
if not results.pose_landmarks: |
|
return image, segment_image(image) |
|
|
|
|
|
annotated_image = image.copy() |
|
mp_drawing.draw_landmarks( |
|
annotated_image, |
|
results.pose_landmarks, |
|
mp_pose.POSE_CONNECTIONS, |
|
landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2), |
|
connection_drawing_spec=mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2), |
|
) |
|
|
|
return annotated_image, segment_image(image) |
|
|
|
|
|
interface = gr.Interface( |
|
fn=estimate_pose, |
|
inputs=gr.Image(type="numpy", label="Upload an Image"), |
|
outputs=[ |
|
gr.Image(type="numpy", label="Pose Landmarks Image"), |
|
gr.Image(type="numpy", label="Segmented Body Parts"), |
|
], |
|
title="Human Pose Estimation and Segmentation", |
|
description="Upload an image to detect and visualize human pose landmarks and segment body parts (head, shoulders, upper body, arms, lower body) with different colors.", |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|