Spaces:

eusholli
/

computer-vision-playground

Sleeping

File size: 4,739 Bytes

3d3f535

import os
os.environ['OPENCV_AVFOUNDATION_SKIP_AUTH'] = '1'

import streamlit as st
import cv2
import numpy as np
from transformers import pipeline
from PIL import Image, ImageDraw

# Initialize the Hugging Face pipeline for facial emotion detection using the "trpakov/vit-face-expression" model
emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")

# Function to analyze sentiment
def analyze_sentiment(face):
    # Convert face to RGB
    rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
    # Convert the face to a PIL image
    pil_image = Image.fromarray(rgb_face)
    # Analyze sentiment using the Hugging Face pipeline
    results = emotion_pipeline(pil_image)
    # Get the dominant emotion
    dominant_emotion = max(results, key=lambda x: x['score'])['label']
    return dominant_emotion

TEXT_SIZE = 3

# Function to detect faces, analyze sentiment, and draw a red box around them
def detect_and_draw_faces(frame):
    # Convert frame to RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    # Convert the frame to a PIL image
    pil_image = Image.fromarray(rgb_frame)
    # Analyze sentiment using the Hugging Face pipeline
    results = emotion_pipeline(pil_image)

    # Print the results to understand the structure
    print(results)
    
    # Draw on the PIL image
    draw = ImageDraw.Draw(pil_image)
    
    # Iterate through detected faces
    for result in results:
        box = result['box']
        sentiment = result['label']
        
        # Draw rectangle and text
        x, y, w, h = box['left'], box['top'], box['width'], box['height']
        draw.rectangle(((x, y), (x+w, y+h)), outline="red", width=3)
        
        # Calculate position for the text background and the text itself
        text_size = draw.textsize(sentiment)
        background_tl = (x, y - text_size[1] - 5)
        background_br = (x + text_size[0], y)
        
        # Draw black rectangle as background
        draw.rectangle([background_tl, background_br], fill="black")
        # Draw white text on top
        draw.text((x, y - text_size[1]), sentiment, fill="white")
    
    # Convert back to OpenCV format
    frame_with_boxes = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
    
    return frame_with_boxes

# Function to capture video from webcam
def video_stream():
    video_capture = cv2.VideoCapture(0)
    if not video_capture.isOpened():
        st.error("Error: Could not open video capture device.")
        return

    while True:
        ret, frame = video_capture.read()
        if not ret:
            st.error("Error: Failed to read frame from video capture device.")
            break
        yield frame

    video_capture.release()

# Streamlit UI
st.markdown(
    """
    <style>
        .main {
            background-color: #FFFFFF;
        }
        .reportview-container .main .block-container{
            padding-top: 2rem;
        }
        h1 {
            color: #E60012;
            font-family: 'Arial Black', Gadget, sans-serif;
        }
        h2 {
            color: #E60012;
            font-family: 'Arial', sans-serif;
        }
        h3 {
            color: #333333;
            font-family: 'Arial', sans-serif;
        }
        .stButton button {
            background-color: #E60012;
            color: white;
            border-radius: 5px;
            font-size: 16px;
        }
    </style>
    """,
    unsafe_allow_html=True
)

st.title("Computer Vision Test Lab")
st.subheader("Facial Sentiment")

# Columns for input and output streams
col1, col2 = st.columns(2)

with col1:
    st.header("Input Stream")
    st.subheader("Webcam")
    video_placeholder = st.empty()

with col2:
    st.header("Output Stream")
    st.subheader("Analysis")
    output_placeholder = st.empty()

sentiment_placeholder = st.empty()

# Start video stream
video_capture = cv2.VideoCapture(0)
if not video_capture.isOpened():
    st.error("Error: Could not open video capture device.")
else:
    while True:
        ret, frame = video_capture.read()
        if not ret:
            st.error("Error: Failed to read frame from video capture device.")
            break

        # Detect faces, analyze sentiment, and draw red boxes with sentiment labels
        frame_with_boxes = detect_and_draw_faces(frame)
        
        # Display the input stream with the red box around the face
        video_placeholder.image(frame_with_boxes, channels="BGR")
        
        # Display the output stream (here it's the same as input, modify as needed)
        output_placeholder.image(frame_with_boxes, channels="BGR")
        
        # Add a short delay to control the frame rate
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break