File size: 3,215 Bytes
abcd2ae
 
 
 
f14d347
 
abcd2ae
 
 
 
 
 
f14d347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c25ecb0
 
 
f14d347
c25ecb0
f14d347
 
 
 
c25ecb0
f14d347
 
 
 
c25ecb0
abcd2ae
 
 
 
 
 
 
f14d347
abcd2ae
 
 
 
 
 
 
 
 
 
 
f14d347
abcd2ae
 
 
 
 
f14d347
 
 
 
 
 
abcd2ae
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio as gr
import cv2
import mediapipe as mp
import numpy as np
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
import torch

# Initialize Mediapipe Pose Estimation
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True, model_complexity=2)
mp_drawing = mp.solutions.drawing_utils

# Initialize Segformer Model for Segmentation
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")

# Define body part mapping with unique colors
PART_COLORS = {
    "head": (0, 255, 0),
    "shoulders": (255, 0, 0),
    "upper_body": (0, 0, 255),
    "arms": (255, 255, 0),
    "lower_body": (255, 0, 255)
}

PART_LABELS = {
    "head": [0],  # Face class in Segformer
    "shoulders": [2],  # Upper body classes (may include neck, shoulders)
    "upper_body": [3, 4],  # Torso classes
    "arms": [5, 6],  # Arms
    "lower_body": [7, 8]  # Legs
}

def segment_image(image):
    # Preprocess the image for Segformer
    inputs = feature_extractor(images=image, return_tensors="pt")
    outputs = model(**inputs)
    logits = outputs.logits
    segmentation = torch.argmax(logits, dim=1).squeeze().cpu().numpy()

    # Resize segmentation mask to match original image size
    segmentation_resized = cv2.resize(segmentation, (image.shape[1], image.shape[0]), interpolation=cv2.INTER_NEAREST)
    
    # Create a blank mask image with the same size as the original image
    segmented_image = np.zeros_like(image)
    
    # Color each part with unique colors
    for part, color in PART_COLORS.items():
        mask = np.isin(segmentation_resized, PART_LABELS[part])
        segmented_image[mask] = color
    
    return segmented_image


def estimate_pose(image):
    # Convert image from BGR (OpenCV) to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # Perform pose detection
    results = pose.process(image_rgb)
    
    if not results.pose_landmarks:
        return image, segment_image(image)  # Return original image and segmented image if no pose found

    # Draw pose landmarks on the image
    annotated_image = image.copy()
    mp_drawing.draw_landmarks(
        annotated_image,
        results.pose_landmarks,
        mp_pose.POSE_CONNECTIONS,
        landmark_drawing_spec=mp_drawing.DrawingSpec(color=(0, 255, 0), thickness=2, circle_radius=2),
        connection_drawing_spec=mp_drawing.DrawingSpec(color=(0, 0, 255), thickness=2, circle_radius=2),
    )

    return annotated_image, segment_image(image)

# Gradio Interface
interface = gr.Interface(
    fn=estimate_pose,
    inputs=gr.Image(type="numpy", label="Upload an Image"),
    outputs=[
        gr.Image(type="numpy", label="Pose Landmarks Image"),
        gr.Image(type="numpy", label="Segmented Body Parts"),
    ],
    title="Human Pose Estimation and Segmentation",
    description="Upload an image to detect and visualize human pose landmarks and segment body parts (head, shoulders, upper body, arms, lower body) with different colors.",
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch()