ayushsaini1207 commited on
Commit
7fd0993
·
verified ·
1 Parent(s): c1c7d45

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +378 -0
  2. requirements.txt +13 -0
app.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import cv2
3
+
4
+ from tensorflow.keras.models import Model
5
+ from tensorflow.keras.layers import (LSTM, Dense, Dropout, Input, Flatten,
6
+ Bidirectional, Permute, multiply)
7
+
8
+ import numpy as np
9
+ import mediapipe as mp
10
+ import math
11
+
12
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode, RTCConfiguration
13
+ import av
14
+
15
+ ## Build and Load Model
16
+ def attention_block(inputs, time_steps):
17
+ """
18
+ Attention layer for deep neural network
19
+
20
+ """
21
+ # Attention weights
22
+ a = Permute((2, 1))(inputs)
23
+ a = Dense(time_steps, activation='softmax')(a)
24
+
25
+ # Attention vector
26
+ a_probs = Permute((2, 1), name='attention_vec')(a)
27
+
28
+ # Luong's multiplicative score
29
+ output_attention_mul = multiply([inputs, a_probs], name='attention_mul')
30
+
31
+ return output_attention_mul
32
+
33
+ @st.cache(allow_output_mutation=True)
34
+ def build_model(HIDDEN_UNITS=256, sequence_length=30, num_input_values=33*4, num_classes=3):
35
+ """
36
+ Function used to build the deep neural network model on startup
37
+
38
+ Args:
39
+ HIDDEN_UNITS (int, optional): Number of hidden units for each neural network hidden layer. Defaults to 256.
40
+ sequence_length (int, optional): Input sequence length (i.e., number of frames). Defaults to 30.
41
+ num_input_values (_type_, optional): Input size of the neural network model. Defaults to 33*4 (i.e., number of keypoints x number of metrics).
42
+ num_classes (int, optional): Number of classification categories (i.e., model output size). Defaults to 3.
43
+
44
+ Returns:
45
+ keras model: neural network with pre-trained weights
46
+ """
47
+ # Input
48
+ inputs = Input(shape=(sequence_length, num_input_values))
49
+ # Bi-LSTM
50
+ lstm_out = Bidirectional(LSTM(HIDDEN_UNITS, return_sequences=True))(inputs)
51
+ # Attention
52
+ attention_mul = attention_block(lstm_out, sequence_length)
53
+ attention_mul = Flatten()(attention_mul)
54
+ # Fully Connected Layer
55
+ x = Dense(2*HIDDEN_UNITS, activation='relu')(attention_mul)
56
+ x = Dropout(0.5)(x)
57
+ # Output
58
+ x = Dense(num_classes, activation='softmax')(x)
59
+ # Bring it all together
60
+ model = Model(inputs=[inputs], outputs=x)
61
+
62
+ ## Load Model Weights
63
+ load_dir = "./models/LSTM_Attention.h5"
64
+ model.load_weights(load_dir)
65
+
66
+ return model
67
+
68
+ HIDDEN_UNITS = 256
69
+ model = build_model(HIDDEN_UNITS)
70
+
71
+ ## App
72
+ st.write("# AI Personal Fitness Trainer Web App")
73
+
74
+ st.markdown("❗❗ **Development Note** ❗❗")
75
+ st.markdown("Currently, the exercise recognition model uses the the x, y, and z coordinates of each anatomical landmark from the MediaPipe Pose model. These coordinates are normalized with respect to the image frame (e.g., the top left corner represents (x=0,y=0) and the bottom right corner represents(x=1,y=1)).")
76
+ st.markdown("I'm currently developing and testing two new feature engineering strategies:")
77
+ st.markdown("- Normalizing coordinates by the detected bounding box of the user")
78
+ st.markdown("- Using joint angles rather than keypoint coordaintes as features")
79
+ st.write("Stay Tuned!")
80
+
81
+ st.write("## Settings")
82
+ threshold1 = st.slider("Minimum Keypoint Detection Confidence", 0.00, 1.00, 0.50)
83
+ threshold2 = st.slider("Minimum Tracking Confidence", 0.00, 1.00, 0.50)
84
+ threshold3 = st.slider("Minimum Activity Classification Confidence", 0.00, 1.00, 0.50)
85
+
86
+ st.write("## Activate the AI 🤖🏋️‍♂️")
87
+
88
+ ## Mediapipe
89
+ mp_pose = mp.solutions.pose # Pre-trained pose estimation model from Google Mediapipe
90
+ mp_drawing = mp.solutions.drawing_utils # Supported Mediapipe visualization tools
91
+ pose = mp_pose.Pose(min_detection_confidence=threshold1, min_tracking_confidence=threshold2) # mediapipe pose model
92
+
93
+ ## Real Time Machine Learning and Computer Vision Processes
94
+ class VideoProcessor:
95
+ def __init__(self):
96
+ # Parameters
97
+ self.actions = np.array(['curl', 'press', 'squat'])
98
+ self.sequence_length = 30
99
+ self.colors = [(245,117,16), (117,245,16), (16,117,245)]
100
+ self.threshold = threshold3
101
+
102
+ # Detection variables
103
+ self.sequence = []
104
+ self.current_action = ''
105
+
106
+ # Rep counter logic variables
107
+ self.curl_counter = 0
108
+ self.press_counter = 0
109
+ self.squat_counter = 0
110
+ self.curl_stage = None
111
+ self.press_stage = None
112
+ self.squat_stage = None
113
+
114
+ @st.cache()
115
+ def draw_landmarks(self, image, results):
116
+ """
117
+ This function draws keypoints and landmarks detected by the human pose estimation model
118
+
119
+ """
120
+ mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_pose.POSE_CONNECTIONS,
121
+ mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=2),
122
+ mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
123
+ )
124
+ return
125
+
126
+ @st.cache()
127
+ def extract_keypoints(self, results):
128
+ """
129
+ Processes and organizes the keypoints detected from the pose estimation model
130
+ to be used as inputs for the exercise decoder models
131
+
132
+ """
133
+ pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
134
+ return pose
135
+
136
+ @st.cache()
137
+ def calculate_angle(self, a,b,c):
138
+ """
139
+ Computes 3D joint angle inferred by 3 keypoints and their relative positions to one another
140
+
141
+ """
142
+ a = np.array(a) # First
143
+ b = np.array(b) # Mid
144
+ c = np.array(c) # End
145
+
146
+ radians = np.arctan2(c[1]-b[1], c[0]-b[0]) - np.arctan2(a[1]-b[1], a[0]-b[0])
147
+ angle = np.abs(radians*180.0/np.pi)
148
+
149
+ if angle > 180.0:
150
+ angle = 360-angle
151
+
152
+ return angle
153
+
154
+ @st.cache()
155
+ def get_coordinates(self, landmarks, mp_pose, side, joint):
156
+ """
157
+ Retrieves x and y coordinates of a particular keypoint from the pose estimation model
158
+
159
+ Args:
160
+ landmarks: processed keypoints from the pose estimation model
161
+ mp_pose: Mediapipe pose estimation model
162
+ side: 'left' or 'right'. Denotes the side of the body of the landmark of interest.
163
+ joint: 'shoulder', 'elbow', 'wrist', 'hip', 'knee', or 'ankle'. Denotes which body joint is associated with the landmark of interest.
164
+
165
+ """
166
+ coord = getattr(mp_pose.PoseLandmark,side.upper()+"_"+joint.upper())
167
+ x_coord_val = landmarks[coord.value].x
168
+ y_coord_val = landmarks[coord.value].y
169
+ return [x_coord_val, y_coord_val]
170
+
171
+ @st.cache()
172
+ def viz_joint_angle(self, image, angle, joint):
173
+ """
174
+ Displays the joint angle value near the joint within the image frame
175
+
176
+ """
177
+ cv2.putText(image, str(int(angle)),
178
+ tuple(np.multiply(joint, [640, 480]).astype(int)),
179
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2, cv2.LINE_AA
180
+ )
181
+ return
182
+
183
+ @st.cache()
184
+ def count_reps(self, image, landmarks, mp_pose):
185
+ """
186
+ Counts repetitions of each exercise. Global count and stage (i.e., state) variables are updated within this function.
187
+
188
+ """
189
+
190
+ if self.current_action == 'curl':
191
+ # Get coords
192
+ shoulder = self.get_coordinates(landmarks, mp_pose, 'left', 'shoulder')
193
+ elbow = self.get_coordinates(landmarks, mp_pose, 'left', 'elbow')
194
+ wrist = self.get_coordinates(landmarks, mp_pose, 'left', 'wrist')
195
+
196
+ # calculate elbow angle
197
+ angle = self.calculate_angle(shoulder, elbow, wrist)
198
+
199
+ # curl counter logic
200
+ if angle < 30:
201
+ self.curl_stage = "up"
202
+ if angle > 140 and self.curl_stage =='up':
203
+ self.curl_stage="down"
204
+ self.curl_counter +=1
205
+ self.press_stage = None
206
+ self.squat_stage = None
207
+
208
+ # Viz joint angle
209
+ self.viz_joint_angle(image, angle, elbow)
210
+
211
+ elif self.current_action == 'press':
212
+ # Get coords
213
+ shoulder = self.get_coordinates(landmarks, mp_pose, 'left', 'shoulder')
214
+ elbow = self.get_coordinates(landmarks, mp_pose, 'left', 'elbow')
215
+ wrist = self.get_coordinates(landmarks, mp_pose, 'left', 'wrist')
216
+
217
+ # Calculate elbow angle
218
+ elbow_angle = self.calculate_angle(shoulder, elbow, wrist)
219
+
220
+ # Compute distances between joints
221
+ shoulder2elbow_dist = abs(math.dist(shoulder,elbow))
222
+ shoulder2wrist_dist = abs(math.dist(shoulder,wrist))
223
+
224
+ # Press counter logic
225
+ if (elbow_angle > 130) and (shoulder2elbow_dist < shoulder2wrist_dist):
226
+ self.press_stage = "up"
227
+ if (elbow_angle < 50) and (shoulder2elbow_dist > shoulder2wrist_dist) and (self.press_stage =='up'):
228
+ self.press_stage='down'
229
+ self.press_counter += 1
230
+ self.curl_stage = None
231
+ self.squat_stage = None
232
+
233
+ # Viz joint angle
234
+ self.viz_joint_angle(image, elbow_angle, elbow)
235
+
236
+ elif self.current_action == 'squat':
237
+ # Get coords
238
+ # left side
239
+ left_shoulder = self.get_coordinates(landmarks, mp_pose, 'left', 'shoulder')
240
+ left_hip = self.get_coordinates(landmarks, mp_pose, 'left', 'hip')
241
+ left_knee = self.get_coordinates(landmarks, mp_pose, 'left', 'knee')
242
+ left_ankle = self.get_coordinates(landmarks, mp_pose, 'left', 'ankle')
243
+ # right side
244
+ right_shoulder = self.get_coordinates(landmarks, mp_pose, 'right', 'shoulder')
245
+ right_hip = self.get_coordinates(landmarks, mp_pose, 'right', 'hip')
246
+ right_knee = self.get_coordinates(landmarks, mp_pose, 'right', 'knee')
247
+ right_ankle = self.get_coordinates(landmarks, mp_pose, 'right', 'ankle')
248
+
249
+ # Calculate knee angles
250
+ left_knee_angle = self.calculate_angle(left_hip, left_knee, left_ankle)
251
+ right_knee_angle = self.calculate_angle(right_hip, right_knee, right_ankle)
252
+
253
+ # Calculate hip angles
254
+ left_hip_angle = self.calculate_angle(left_shoulder, left_hip, left_knee)
255
+ right_hip_angle = self.calculate_angle(right_shoulder, right_hip, right_knee)
256
+
257
+ # Squat counter logic
258
+ thr = 165
259
+ if (left_knee_angle < thr) and (right_knee_angle < thr) and (left_hip_angle < thr) and (right_hip_angle < thr):
260
+ self.squat_stage = "down"
261
+ if (left_knee_angle > thr) and (right_knee_angle > thr) and (left_hip_angle > thr) and (right_hip_angle > thr) and (self.squat_stage =='down'):
262
+ self.squat_stage='up'
263
+ self.squat_counter += 1
264
+ self.curl_stage = None
265
+ self.press_stage = None
266
+
267
+ # Viz joint angles
268
+ self.viz_joint_angle(image, left_knee_angle, left_knee)
269
+ self.viz_joint_angle(image, left_hip_angle, left_hip)
270
+
271
+ else:
272
+ pass
273
+ return
274
+
275
+ @st.cache()
276
+ def prob_viz(self, res, input_frame):
277
+ """
278
+ This function displays the model prediction probability distribution over the set of exercise classes
279
+ as a horizontal bar graph
280
+
281
+ """
282
+ output_frame = input_frame.copy()
283
+ for num, prob in enumerate(res):
284
+ cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), self.colors[num], -1)
285
+ cv2.putText(output_frame, self.actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
286
+
287
+ return output_frame
288
+
289
+ @st.cache()
290
+ def process(self, image):
291
+ """
292
+ Function to process the video frame from the user's webcam and run the fitness trainer AI
293
+
294
+ Args:
295
+ image (numpy array): input image from the webcam
296
+
297
+ Returns:
298
+ numpy array: processed image with keypoint detection and fitness activity classification visualized
299
+ """
300
+ # Pose detection model
301
+ image.flags.writeable = False
302
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
303
+ results = pose.process(image)
304
+
305
+ # Draw the hand annotations on the image.
306
+ image.flags.writeable = True
307
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
308
+ self.draw_landmarks(image, results)
309
+
310
+ # Prediction logic
311
+ keypoints = self.extract_keypoints(results)
312
+ self.sequence.append(keypoints.astype('float32',casting='same_kind'))
313
+ self.sequence = self.sequence[-self.sequence_length:]
314
+
315
+ if len(self.sequence) == self.sequence_length:
316
+ res = model.predict(np.expand_dims(self.sequence, axis=0), verbose=0)[0]
317
+ # interpreter.set_tensor(self.input_details[0]['index'], np.expand_dims(self.sequence, axis=0))
318
+ # interpreter.invoke()
319
+ # res = interpreter.get_tensor(self.output_details[0]['index'])
320
+
321
+ self.current_action = self.actions[np.argmax(res)]
322
+ confidence = np.max(res)
323
+
324
+ # Erase current action variable if no probability is above threshold
325
+ if confidence < self.threshold:
326
+ self.current_action = ''
327
+
328
+ # Viz probabilities
329
+ image = self.prob_viz(res, image)
330
+
331
+ # Count reps
332
+ try:
333
+ landmarks = results.pose_landmarks.landmark
334
+ self.count_reps(
335
+ image, landmarks, mp_pose)
336
+ except:
337
+ pass
338
+
339
+ # Display graphical information
340
+ cv2.rectangle(image, (0,0), (640, 40), self.colors[np.argmax(res)], -1)
341
+ cv2.putText(image, 'curl ' + str(self.curl_counter), (3,30),
342
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
343
+ cv2.putText(image, 'press ' + str(self.press_counter), (240,30),
344
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
345
+ cv2.putText(image, 'squat ' + str(self.squat_counter), (490,30),
346
+ cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
347
+
348
+ # return cv2.flip(image, 1)
349
+ return image
350
+
351
+ def recv(self, frame):
352
+ """
353
+ Receive and process video stream from webcam
354
+
355
+ Args:
356
+ frame: current video frame
357
+
358
+ Returns:
359
+ av.VideoFrame: processed video frame
360
+ """
361
+ img = frame.to_ndarray(format="bgr24")
362
+ img = self.process(img)
363
+ return av.VideoFrame.from_ndarray(img, format="bgr24")
364
+
365
+ ## Stream Webcam Video and Run Model
366
+ # Options
367
+ RTC_CONFIGURATION = RTCConfiguration(
368
+ {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
369
+ )
370
+ # Streamer
371
+ webrtc_ctx = webrtc_streamer(
372
+ key="AI trainer",
373
+ mode=WebRtcMode.SENDRECV,
374
+ rtc_configuration=RTC_CONFIGURATION,
375
+ media_stream_constraints={"video": True, "audio": False},
376
+ video_processor_factory=VideoProcessor,
377
+ async_processing=True,
378
+ )
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ streamlit_webrtc
3
+ keras==2.10.0
4
+ notebook==6.4.11
5
+ numpy==1.23.0
6
+ Markdown==3.3.7
7
+ ipykernel==6.9.1
8
+ ipython==8.3.0
9
+ mediapipe==0.9.1.0
10
+ pillow==9.1.1
11
+ opencv-python==4.6.0.66
12
+ opencv-contrib-python==4.6.0.66
13
+ tensorflow ==2.10.0