Spaces:
Sleeping
Sleeping
| import cv2 | |
| import streamlit as st | |
| import tempfile | |
| import numpy as np | |
| from face_detection import FaceDetector | |
| from mark_detection import MarkDetector | |
| from pose_estimation import PoseEstimator | |
| from utils import refine | |
| from PIL import Image | |
| st.title("Head Pose Estimation") | |
| st.text("Just a heads up (pun intended)... The code used for this space is largely borrowed from https://github.com/yinguobing/head-pose-estimation. Slightly altered to fit image needs and make it work on huggingface.") | |
| # Choose between Image or Video file upload | |
| file_type = st.selectbox("Choose the type of file you want to upload", ("Image", "Video")) | |
| uploaded_file = st.file_uploader( | |
| "Upload an image or video file of your face", | |
| type=["jpg", "jpeg", "png", "mp4", "mov", "avi", "mkv"] | |
| ) | |
| # Display placeholder for real-time video output | |
| FRAME_WINDOW = st.image([]) | |
| if uploaded_file is not None: | |
| # Video processing | |
| if file_type == "Video": | |
| tfile = tempfile.NamedTemporaryFile(delete=False) | |
| tfile.write(uploaded_file.read()) | |
| cap = cv2.VideoCapture(tfile.name) | |
| st.write(f"Video source: {tfile.name}") | |
| # Getting frame sizes | |
| frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| # Initialize face detection, landmark detection, and pose estimation models | |
| face_detector = FaceDetector("assets/face_detector.onnx") | |
| mark_detector = MarkDetector("assets/face_landmarks.onnx") | |
| pose_estimator = PoseEstimator(frame_width, frame_height) | |
| # Process each frame | |
| while cap.isOpened(): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Step 1: Detect faces in the frame | |
| faces, _ = face_detector.detect(frame, 0.7) | |
| # If a face is detected, proceed with pose estimation | |
| if len(faces) > 0: | |
| # Detect landmarks for the first face | |
| face = refine(faces, frame_width, frame_height, 0.15)[0] | |
| x1, y1, x2, y2 = face[:4].astype(int) | |
| patch = frame[y1:y2, x1:x2] | |
| # Run landmark detection and convert local face area to global image | |
| marks = mark_detector.detect([patch])[0].reshape([68, 2]) | |
| marks *= (x2 - x1) | |
| marks[:, 0] += x1 | |
| marks[:, 1] += y1 | |
| # Pose estimation with the detected landmarks | |
| pose = pose_estimator.solve(marks) | |
| # Draw the pose on the frame | |
| pose_estimator.visualize(frame, pose, color=(0, 255, 0)) | |
| # Convert frame to RGB for Streamlit display | |
| frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
| FRAME_WINDOW.image(frame_rgb) | |
| cap.release() | |
| # Image processing | |
| elif file_type == "Image": | |
| # Load and process uploaded image | |
| image = np.array(Image.open(uploaded_file)) | |
| frame_height, frame_width, _ = image.shape | |
| # Initialize models for detection and pose estimation | |
| face_detector = FaceDetector("assets/face_detector.onnx") | |
| mark_detector = MarkDetector("assets/face_landmarks.onnx") | |
| pose_estimator = PoseEstimator(frame_width, frame_height) | |
| # Detect face and landmarks | |
| faces, _ = face_detector.detect(image, 0.7) | |
| if len(faces) > 0: | |
| face = refine(faces, frame_width, frame_height, 0.15)[0] | |
| x1, y1, x2, y2 = face[:4].astype(int) | |
| patch = image[y1:y2, x1:x2] | |
| # Detect landmarks and map them to global image coordinates | |
| marks = mark_detector.detect([patch])[0].reshape([68, 2]) | |
| marks *= (x2 - x1) | |
| marks[:, 0] += x1 | |
| marks[:, 1] += y1 | |
| # Estimate pose and visualize on image | |
| pose = pose_estimator.solve(marks) | |
| pose_estimator.visualize(image, pose, color=(0, 255, 0)) | |
| # Convert image to RGB and display in Streamlit | |
| image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| st.image(image_rgb, caption="Pose Estimated Image", use_column_width=True) | |