tanthinhdt commited on
Commit
f7d725a
·
verified ·
1 Parent(s): 6117d60

feat(utils): calculate arm angle to detect sign better

Browse files
Files changed (1) hide show
  1. utils.py +120 -10
utils.py CHANGED
@@ -1,9 +1,10 @@
1
  import cv2
2
- import numpy as np
3
  import torch
 
4
  from mediapipe.python.solutions import (drawing_styles, drawing_utils,
5
  holistic, pose)
6
  from torchvision.transforms.v2 import Compose, UniformTemporalSubsample
 
7
 
8
 
9
  def draw_skeleton_on_image(
@@ -11,7 +12,7 @@ def draw_skeleton_on_image(
11
  detection_results,
12
  resize_to: tuple[int, int] = None,
13
  ) -> np.ndarray:
14
- """
15
  Draw skeleton on the image.
16
 
17
  Parameters
@@ -27,7 +28,7 @@ def draw_skeleton_on_image(
27
  -------
28
  np.ndarray
29
  Annotated image with skeleton.
30
- """
31
  annotated_image = np.copy(image)
32
 
33
  # Draw pose connections
@@ -63,24 +64,69 @@ def draw_skeleton_on_image(
63
  return annotated_image
64
 
65
 
66
- def are_hands_down(pose_landmarks: list) -> bool:
67
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  Check if the hand is down.
69
 
70
  Parameters
71
  ----------
72
  hand_landmarks : list
73
  Hand landmarks.
 
 
74
 
75
  Returns
76
  -------
77
  bool
78
  True if the hand is down, False otherwise.
79
- """
80
  if pose_landmarks is None:
81
  return True
82
 
83
  landmarks = pose_landmarks.landmark
 
 
 
 
 
84
  left_elbow = [
85
  landmarks[pose.PoseLandmark.LEFT_ELBOW.value].x,
86
  landmarks[pose.PoseLandmark.LEFT_ELBOW.value].y,
@@ -91,6 +137,13 @@ def are_hands_down(pose_landmarks: list) -> bool:
91
  landmarks[pose.PoseLandmark.LEFT_WRIST.value].y,
92
  landmarks[pose.PoseLandmark.LEFT_SHOULDER.value].visibility,
93
  ]
 
 
 
 
 
 
 
94
  right_elbow = [
95
  landmarks[pose.PoseLandmark.RIGHT_ELBOW.value].x,
96
  landmarks[pose.PoseLandmark.RIGHT_ELBOW.value].y,
@@ -101,18 +154,50 @@ def are_hands_down(pose_landmarks: list) -> bool:
101
  landmarks[pose.PoseLandmark.RIGHT_WRIST.value].y,
102
  landmarks[pose.PoseLandmark.RIGHT_SHOULDER.value].visibility,
103
  ]
 
104
 
105
  is_visible = all(
106
- [left_elbow[2] > 0, left_wrist[2] > 0, right_elbow[2] > 0, right_wrist[2] > 0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  )
108
- return is_visible and left_wrist[1] > left_elbow[1] and right_wrist[1] > right_elbow[1]
109
 
110
 
111
  def get_predictions(
112
  inputs: dict,
113
- model,
114
  k: int = 3,
115
  ) -> list:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  if inputs is None:
117
  return []
118
 
@@ -143,6 +228,31 @@ def preprocess(
143
  device: str,
144
  transform: Compose,
145
  ) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  skeleton_video = []
147
  did_sample_start = False
148
 
@@ -165,7 +275,7 @@ def preprocess(
165
  skeleton_frame = transform(torch.tensor(skeleton_frame).permute(2, 0, 1))
166
 
167
  # Extract sign video.
168
- if not are_hands_down(detection_results.pose_landmarks):
169
  if not did_sample_start:
170
  did_sample_start = True
171
  elif did_sample_start:
 
1
  import cv2
 
2
  import torch
3
+ import numpy as np
4
  from mediapipe.python.solutions import (drawing_styles, drawing_utils,
5
  holistic, pose)
6
  from torchvision.transforms.v2 import Compose, UniformTemporalSubsample
7
+ from transformers import VideoMAEForVideoClassification
8
 
9
 
10
  def draw_skeleton_on_image(
 
12
  detection_results,
13
  resize_to: tuple[int, int] = None,
14
  ) -> np.ndarray:
15
+ '''
16
  Draw skeleton on the image.
17
 
18
  Parameters
 
28
  -------
29
  np.ndarray
30
  Annotated image with skeleton.
31
+ '''
32
  annotated_image = np.copy(image)
33
 
34
  # Draw pose connections
 
64
  return annotated_image
65
 
66
 
67
+ def calculate_angle(
68
+ shoulder: list,
69
+ elbow: list,
70
+ wrist: list,
71
+ ) -> float:
72
+ '''
73
+ Calculate the angle between the shoulder, elbow, and wrist.
74
+
75
+ Parameters
76
+ ----------
77
+ shoulder : list
78
+ Shoulder coordinates.
79
+ elbow : list
80
+ Elbow coordinates.
81
+ wrist : list
82
+ Wrist coordinates.
83
+
84
+ Returns
85
+ -------
86
+ float
87
+ Angle in degree between the shoulder, elbow, and wrist.
88
+ '''
89
+ shoulder = np.array(shoulder)
90
+ elbow = np.array(elbow)
91
+ wrist = np.array(wrist)
92
+
93
+ radians = np.arctan2(wrist[1] - elbow[1], wrist[0] - elbow[0]) \
94
+ - np.arctan2(shoulder[1] - elbow[1], shoulder[0] - elbow[0])
95
+ angle = np.abs(radians * 180.0 / np.pi)
96
+
97
+ if angle > 180.0:
98
+ angle = 360 - angle
99
+ return angle
100
+
101
+
102
+ def do_hands_relax(
103
+ pose_landmarks: list,
104
+ angle_threshold: float = 160.0,
105
+ ) -> bool:
106
+ '''
107
  Check if the hand is down.
108
 
109
  Parameters
110
  ----------
111
  hand_landmarks : list
112
  Hand landmarks.
113
+ angle_threshold : float, optional
114
+ Angle threshold, by default 160.0.
115
 
116
  Returns
117
  -------
118
  bool
119
  True if the hand is down, False otherwise.
120
+ '''
121
  if pose_landmarks is None:
122
  return True
123
 
124
  landmarks = pose_landmarks.landmark
125
+ left_shoulder = [
126
+ landmarks[pose.PoseLandmark.LEFT_SHOULDER.value].x,
127
+ landmarks[pose.PoseLandmark.LEFT_SHOULDER.value].y,
128
+ landmarks[pose.PoseLandmark.LEFT_SHOULDER.value].visibility,
129
+ ]
130
  left_elbow = [
131
  landmarks[pose.PoseLandmark.LEFT_ELBOW.value].x,
132
  landmarks[pose.PoseLandmark.LEFT_ELBOW.value].y,
 
137
  landmarks[pose.PoseLandmark.LEFT_WRIST.value].y,
138
  landmarks[pose.PoseLandmark.LEFT_SHOULDER.value].visibility,
139
  ]
140
+ left_angle = calculate_angle(left_shoulder, left_elbow, left_wrist)
141
+
142
+ right_shoulder = [
143
+ landmarks[pose.PoseLandmark.RIGHT_SHOULDER.value].x,
144
+ landmarks[pose.PoseLandmark.RIGHT_SHOULDER.value].y,
145
+ landmarks[pose.PoseLandmark.RIGHT_SHOULDER.value].visibility,
146
+ ]
147
  right_elbow = [
148
  landmarks[pose.PoseLandmark.RIGHT_ELBOW.value].x,
149
  landmarks[pose.PoseLandmark.RIGHT_ELBOW.value].y,
 
154
  landmarks[pose.PoseLandmark.RIGHT_WRIST.value].y,
155
  landmarks[pose.PoseLandmark.RIGHT_SHOULDER.value].visibility,
156
  ]
157
+ right_angle = calculate_angle(right_shoulder, right_elbow, right_wrist)
158
 
159
  is_visible = all(
160
+ [
161
+ left_shoulder[2] > 0,
162
+ left_elbow[2] > 0,
163
+ left_wrist[2] > 0,
164
+ right_shoulder[2] > 0,
165
+ right_elbow[2] > 0,
166
+ right_wrist[2] > 0,
167
+ ]
168
+ )
169
+
170
+ return all(
171
+ [
172
+ is_visible,
173
+ left_angle < angle_threshold,
174
+ right_angle < angle_threshold,
175
+ ]
176
  )
 
177
 
178
 
179
  def get_predictions(
180
  inputs: dict,
181
+ model: VideoMAEForVideoClassification,
182
  k: int = 3,
183
  ) -> list:
184
+ '''
185
+ Get the top-k predictions.
186
+
187
+ Parameters
188
+ ----------
189
+ inputs : dict
190
+ Model inputs.
191
+ model : VideoMAEForVideoClassification
192
+ Model to get predictions from.
193
+ k : int, optional
194
+ Number of predictions to return, by default 3.
195
+
196
+ Returns
197
+ -------
198
+ list
199
+ Top-k predictions.
200
+ '''
201
  if inputs is None:
202
  return []
203
 
 
228
  device: str,
229
  transform: Compose,
230
  ) -> dict:
231
+ '''
232
+ Preprocess the video.
233
+
234
+ Parameters
235
+ ----------
236
+ model_num_frames : int
237
+ Number of frames in the model.
238
+ keypoints_detector
239
+ Keypoints detector.
240
+ source : str
241
+ Video source.
242
+ model_input_height : int
243
+ Model input height.
244
+ model_input_width : int
245
+ Model input width.
246
+ device : str
247
+ Device to use.
248
+ transform : Compose
249
+ Transform to apply.
250
+
251
+ Returns
252
+ -------
253
+ dict
254
+ Model inputs.
255
+ '''
256
  skeleton_video = []
257
  did_sample_start = False
258
 
 
275
  skeleton_frame = transform(torch.tensor(skeleton_frame).permute(2, 0, 1))
276
 
277
  # Extract sign video.
278
+ if not do_hands_relax(detection_results.pose_landmarks):
279
  if not did_sample_start:
280
  did_sample_start = True
281
  elif did_sample_start: