Spaces:

yehtutmaung
/

distractionTest

Sleeping

File size: 9,685 Bytes

b38122e

"""Estimate head pose according to the facial landmarks"""
import cv2
import numpy as np


class PoseEstimator:
    """Estimate head pose according to the facial landmarks"""

    def __init__(self, image_width, image_height):
        """Init a pose estimator.



        Args:

            image_width (int): input image width

            image_height (int): input image height

        """
        self.size = (image_height, image_width)
        self.model_points_68 = self._get_full_model_points()

        # Camera internals
        self.focal_length = self.size[1]
        self.camera_center = (self.size[1] / 2, self.size[0] / 2)
        self.camera_matrix = np.array(
            [[self.focal_length, 0, self.camera_center[0]],
             [0, self.focal_length, self.camera_center[1]],
             [0, 0, 1]], dtype="double")

        # Assuming no lens distortion
        self.dist_coeefs = np.zeros((4, 1))

        # Rotation vector and translation vector
        self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])
        self.t_vec = np.array(
            [[-14.97821226], [-10.62040383], [-2053.03596872]])

    def _get_full_model_points(self, filename='assets/model.txt'):
        """Get all 68 3D model points from file"""
        raw_value = []
        with open(filename) as file:
            for line in file:
                raw_value.append(line)
        model_points = np.array(raw_value, dtype=np.float32)
        model_points = np.reshape(model_points, (3, -1)).T

        # Transform the model into a front view.
        model_points[:, 2] *= -1

        return model_points

    def solve(self, points):
        """Solve pose with all the 68 image points

        Args:

            points (np.ndarray): points on image.



        Returns:

            Tuple: (rotation_vector, translation_vector) as pose.

        """

        if self.r_vec is None:
            (_, rotation_vector, translation_vector) = cv2.solvePnP(
                self.model_points_68, points, self.camera_matrix, self.dist_coeefs)
            self.r_vec = rotation_vector
            self.t_vec = translation_vector

        (_, rotation_vector, translation_vector) = cv2.solvePnP(
            self.model_points_68,
            points,
            self.camera_matrix,
            self.dist_coeefs,
            rvec=self.r_vec,
            tvec=self.t_vec,
            useExtrinsicGuess=True)

        return (rotation_vector, translation_vector)

    def visualize(self, image, pose, color=(255, 255, 255), line_width=2):
        """Draw a 3D box as annotation of pose"""
        rotation_vector, translation_vector = pose
        point_3d = []
        rear_size = 75
        rear_depth = 0
        point_3d.append((-rear_size, -rear_size, rear_depth))
        point_3d.append((-rear_size, rear_size, rear_depth))
        point_3d.append((rear_size, rear_size, rear_depth))
        point_3d.append((rear_size, -rear_size, rear_depth))
        point_3d.append((-rear_size, -rear_size, rear_depth))

        front_size = 100
        front_depth = 100
        point_3d.append((-front_size, -front_size, front_depth))
        point_3d.append((-front_size, front_size, front_depth))
        point_3d.append((front_size, front_size, front_depth))
        point_3d.append((front_size, -front_size, front_depth))
        point_3d.append((-front_size, -front_size, front_depth))
        point_3d = np.array(point_3d, dtype=np.float32).reshape(-1, 3)

        # Map to 2d image points
        (point_2d, _) = cv2.projectPoints(point_3d,
                                          rotation_vector,
                                          translation_vector,
                                          self.camera_matrix,
                                          self.dist_coeefs)
        point_2d = np.int32(point_2d.reshape(-1, 2))

        # Draw all the lines
        cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
        cv2.line(image, tuple(point_2d[1]), tuple(
            point_2d[6]), color, line_width, cv2.LINE_AA)
        cv2.line(image, tuple(point_2d[2]), tuple(
            point_2d[7]), color, line_width, cv2.LINE_AA)
        cv2.line(image, tuple(point_2d[3]), tuple(
            point_2d[8]), color, line_width, cv2.LINE_AA)

    def draw_axes(self, img, pose):
        R, t = pose
        img = cv2.drawFrameAxes(img, self.camera_matrix,
                                self.dist_coeefs, R, t, 30)

    def show_3d_model(self):
        from matplotlib import pyplot
        from mpl_toolkits.mplot3d import Axes3D
        fig = pyplot.figure()
        ax = Axes3D(fig)

        x = self.model_points_68[:, 0]
        y = self.model_points_68[:, 1]
        z = self.model_points_68[:, 2]

        ax.scatter(x, y, z)
        ax.axis('square')
        pyplot.xlabel('x')
        pyplot.ylabel('y')
        pyplot.show()

    ###
    # yhm : from chat gpt to detect distraction
    ###
    def rotation_matrix_to_angles(self, rotation_vector):
        """Convert rotation vector to pitch, yaw, and roll angles."""
        rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        sy = np.sqrt(rotation_matrix[0, 0]**2 + rotation_matrix[1, 0]**2)
        
        singular = sy < 1e-6
        if not singular:
            pitch = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
            yaw = np.arctan2(-rotation_matrix[2, 0], sy)
            roll = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
        else:
            pitch = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
            yaw = np.arctan2(-rotation_matrix[2, 0], sy)
            roll = 0

        return np.degrees(pitch), np.degrees(yaw), np.degrees(roll)

    def is_distracted(self, rotation_vector):
        """Determine if the user is distracted based on head pose angles."""
        pitch, yaw, roll = self.rotation_matrix_to_angles(rotation_vector)
        
        # Define thresholds (adjust based on further testing)
        pitch_threshold = (-15, 10)  # Allow some variability in pitch
        yaw_threshold = (-20, 16)     # Reasonable range for yaw
        roll_threshold = (-180, 180) # Centered around -180 degree roll
        # print("pitch, yaw, roll", pitch, yaw, roll)
        # Check if head is roughly considered 'facing forward'
        focus_pitch = pitch_threshold[0] < pitch < pitch_threshold[1]
        focus_yaw = yaw_threshold[0] < yaw < yaw_threshold[1]
        focus_roll = roll_threshold[0] < roll < roll_threshold[1]

        return not (focus_pitch and focus_yaw and focus_roll)
    
        # """Determine if the user is distracted based on head pose angles."""
        # pitch, yaw, roll = self.rotation_matrix_to_angles(rotation_vector)
        # print("pitch, yaw, roll", pitch, yaw, roll)
        # # Define thresholds (you may need to adjust these based on testing)
        # pitch_threshold = 15    # Up/Down threshold
        # yaw_threshold = 20      # Left/Right threshold
        # roll_threshold = 10     # Tilt threshold
        
        # # Check if head is facing roughly forward
        # if abs(pitch) < pitch_threshold and abs(yaw) < yaw_threshold and abs(roll) < roll_threshold:
        #     return False  # Focused
        # else:
        #     return True   # Distracted

    def detect_distraction(self, points):
        """Solve pose and detect distraction status based on pose."""
        rotation_vector, translation_vector = self.solve(points)
        distraction_status = self.is_distracted(rotation_vector)
        return distraction_status, (rotation_vector, translation_vector)


    # second part

    # def rotation_matrix_to_angles(self, rotation_vector):
    #     """Convert rotation vector to pitch, yaw, and roll angles."""
    #     # Convert the rotation vector into a rotation matrix
    #     rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
        
    #     # Ensure no division by zero
    #     sy = np.sqrt(rotation_matrix[0, 0]**2 + rotation_matrix[1, 0]**2)
    #     singular = sy < 1e-6

    #     if not singular:
    #         pitch = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2])
    #         yaw = np.arctan2(-rotation_matrix[2, 0], sy)
    #         roll = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0])
    #     else:
    #         pitch = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1])
    #         yaw = np.arctan2(-rotation_matrix[2, 0], sy)
    #         roll = 0

    #     # Return converted angles in degrees
    #     return np.degrees(pitch), np.degrees(yaw), np.degrees(roll)

    # def is_distracted(self, rotation_vector):
    #     """Determine if the user is distracted based on head pose angles."""
    #     pitch, yaw, roll = self.rotation_matrix_to_angles(rotation_vector)
        
    #     # Test different thresholds based on specific requirements
    #     pitch_threshold = 15    # Up/Down 
    #     yaw_threshold = 20      # Left/Right
    #     roll_threshold = 10     # Tilt

    #     # Determine distraction status
    #     return not (abs(pitch) < pitch_threshold and abs(yaw) < yaw_threshold and abs(roll) < roll_threshold)

    # def detect_distraction(self, points):
    #     """Solve pose and detect distraction status based on pose."""
    #     rotation_vector, translation_vector = self.solve(points)
    #     distraction_status = self.is_distracted(rotation_vector)
    #     return distraction_status, (rotation_vector, translation_vector)