distraction_detection / pose_estimation.py
Afnan214's picture
distraction detection
e897273 unverified
"""Estimate head pose according to the facial landmarks"""
import cv2
import numpy as np
class PoseEstimator:
"""Estimate head pose according to the facial landmarks"""
def __init__(self, image_width, image_height):
"""Init a pose estimator.
Args:
image_width (int): input image width
image_height (int): input image height
"""
self.size = (image_height, image_width)
self.model_points_68 = self._get_full_model_points()
# Camera internals
self.focal_length = self.size[1]
self.camera_center = (self.size[1] / 2, self.size[0] / 2)
self.camera_matrix = np.array(
[[self.focal_length, 0, self.camera_center[0]],
[0, self.focal_length, self.camera_center[1]],
[0, 0, 1]], dtype="double")
# Assuming no lens distortion
self.dist_coeefs = np.zeros((4, 1))
# Rotation vector and translation vector
self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])
self.t_vec = np.array(
[[-14.97821226], [-10.62040383], [-2053.03596872]])
def _get_full_model_points(self, filename='assets/model.txt'):
"""Get all 68 3D model points from file"""
raw_value = []
with open(filename) as file:
for line in file:
raw_value.append(line)
model_points = np.array(raw_value, dtype=np.float32)
model_points = np.reshape(model_points, (3, -1)).T
# Transform the model into a front view.
model_points[:, 2] *= -1
return model_points
def solve(self, points):
"""Solve pose with all the 68 image points
Args:
points (np.ndarray): points on image.
Returns:
Tuple: (rotation_vector, translation_vector) as pose.
"""
if self.r_vec is None:
(_, rotation_vector, translation_vector) = cv2.solvePnP(
self.model_points_68, points, self.camera_matrix, self.dist_coeefs)
self.r_vec = rotation_vector
self.t_vec = translation_vector
(_, rotation_vector, translation_vector) = cv2.solvePnP(
self.model_points_68,
points,
self.camera_matrix,
self.dist_coeefs,
rvec=self.r_vec,
tvec=self.t_vec,
useExtrinsicGuess=True)
return (rotation_vector, translation_vector)
def visualize(self, image, pose, color=(255, 255, 255), line_width=2):
"""Draw a 3D box as annotation of pose"""
rotation_vector, translation_vector = pose
point_3d = []
rear_size = 75
rear_depth = 0
point_3d.append((-rear_size, -rear_size, rear_depth))
point_3d.append((-rear_size, rear_size, rear_depth))
point_3d.append((rear_size, rear_size, rear_depth))
point_3d.append((rear_size, -rear_size, rear_depth))
point_3d.append((-rear_size, -rear_size, rear_depth))
front_size = 100
front_depth = 100
point_3d.append((-front_size, -front_size, front_depth))
point_3d.append((-front_size, front_size, front_depth))
point_3d.append((front_size, front_size, front_depth))
point_3d.append((front_size, -front_size, front_depth))
point_3d.append((-front_size, -front_size, front_depth))
point_3d = np.array(point_3d, dtype=np.float32).reshape(-1, 3)
# Map to 2d image points
(point_2d, _) = cv2.projectPoints(point_3d,
rotation_vector,
translation_vector,
self.camera_matrix,
self.dist_coeefs)
point_2d = np.int32(point_2d.reshape(-1, 2))
# Draw all the lines
cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
cv2.line(image, tuple(point_2d[1]), tuple(
point_2d[6]), color, line_width, cv2.LINE_AA)
cv2.line(image, tuple(point_2d[2]), tuple(
point_2d[7]), color, line_width, cv2.LINE_AA)
cv2.line(image, tuple(point_2d[3]), tuple(
point_2d[8]), color, line_width, cv2.LINE_AA)
def draw_axes(self, img, pose):
R, t = pose
img = cv2.drawFrameAxes(img, self.camera_matrix,
self.dist_coeefs, R, t, 30)
def show_3d_model(self):
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D
fig = pyplot.figure()
ax = Axes3D(fig)
x = self.model_points_68[:, 0]
y = self.model_points_68[:, 1]
z = self.model_points_68[:, 2]
ax.scatter(x, y, z)
ax.axis('square')
pyplot.xlabel('x')
pyplot.ylabel('y')
pyplot.show()
###
# yhm : from chat gpt to detect distraction
###
def rotation_matrix_to_angles(self, rotation_vector):
"""Convert rotation vector to pitch, yaw, and roll angles."""
rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
sy = np.sqrt(rotation_matrix[0, 0]**2 + rotation_matrix[1, 0]**2)
singular = sy < 1e-6
if not singular:
pitch = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
yaw = np.arctan2(-rotation_matrix[2, 0], sy)
roll = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
else:
pitch = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
yaw = np.arctan2(-rotation_matrix[2, 0], sy)
roll = 0
return np.degrees(pitch), np.degrees(yaw), np.degrees(roll)
def is_distracted(self, rotation_vector):
"""Determine if the user is distracted based on head pose angles."""
pitch, yaw, roll = self.rotation_matrix_to_angles(rotation_vector)
# Define thresholds (adjust based on further testing)
pitch_threshold = (-20, 40) # Allow some variability in pitch
yaw_threshold = (-35, 30) # Reasonable range for yaw
roll_threshold = (-180, 180) # Centered around -180 degree roll
# print("pitch, yaw, roll", pitch, yaw, roll)
# Check if head is roughly considered 'facing forward'
focus_pitch = pitch_threshold[0] < pitch < pitch_threshold[1]
focus_yaw = yaw_threshold[0] < yaw < yaw_threshold[1]
focus_roll = roll_threshold[0] < roll < roll_threshold[1]
return not (focus_pitch and focus_yaw and focus_roll)
# """Determine if the user is distracted based on head pose angles."""
# pitch, yaw, roll = self.rotation_matrix_to_angles(rotation_vector)
# print("pitch, yaw, roll", pitch, yaw, roll)
# # Define thresholds (you may need to adjust these based on testing)
# pitch_threshold = 15 # Up/Down threshold
# yaw_threshold = 20 # Left/Right threshold
# roll_threshold = 10 # Tilt threshold
# # Check if head is facing roughly forward
# if abs(pitch) < pitch_threshold and abs(yaw) < yaw_threshold and abs(roll) < roll_threshold:
# return False # Focused
# else:
# return True # Distracted
def detect_distraction(self, points):
"""Solve pose and detect distraction status based on pose."""
rotation_vector, translation_vector = self.solve(points)
distraction_status = self.is_distracted(rotation_vector)
return distraction_status, (rotation_vector, translation_vector)
# second part
# def rotation_matrix_to_angles(self, rotation_vector):
# """Convert rotation vector to pitch, yaw, and roll angles."""
# # Convert the rotation vector into a rotation matrix
# rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
# # Ensure no division by zero
# sy = np.sqrt(rotation_matrix[0, 0]**2 + rotation_matrix[1, 0]**2)
# singular = sy < 1e-6
# if not singular:
# pitch = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
# yaw = np.arctan2(-rotation_matrix[2, 0], sy)
# roll = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
# else:
# pitch = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
# yaw = np.arctan2(-rotation_matrix[2, 0], sy)
# roll = 0
# # Return converted angles in degrees
# return np.degrees(pitch), np.degrees(yaw), np.degrees(roll)
# def is_distracted(self, rotation_vector):
# """Determine if the user is distracted based on head pose angles."""
# pitch, yaw, roll = self.rotation_matrix_to_angles(rotation_vector)
# # Test different thresholds based on specific requirements
# pitch_threshold = 15 # Up/Down
# yaw_threshold = 20 # Left/Right
# roll_threshold = 10 # Tilt
# # Determine distraction status
# return not (abs(pitch) < pitch_threshold and abs(yaw) < yaw_threshold and abs(roll) < roll_threshold)
# def detect_distraction(self, points):
# """Solve pose and detect distraction status based on pose."""
# rotation_vector, translation_vector = self.solve(points)
# distraction_status = self.is_distracted(rotation_vector)
# return distraction_status, (rotation_vector, translation_vector)