|
import os |
|
import numpy as np |
|
import cv2 |
|
import librosa |
|
import joblib |
|
from deepface import DeepFace |
|
import streamlit as st |
|
from collections import Counter |
|
from moviepy import VideoFileClip |
|
|
|
|
|
emotion_map = { |
|
'angry': 0, |
|
'disgust': 1, |
|
'fear': 2, |
|
'happy': 3, |
|
'neutral': 4, |
|
'sad': 5 |
|
} |
|
|
|
|
|
def split_video_into_frames_and_analyze_emotions(video_path, frame_rate=1): |
|
cap = cv2.VideoCapture(video_path) |
|
if not cap.isOpened(): |
|
st.error("Error: Could not open video.") |
|
return |
|
|
|
frame_count = 0 |
|
success, frame = cap.read() |
|
|
|
emotion_counter = Counter() |
|
|
|
while success: |
|
if frame_count % frame_rate == 0: |
|
try: |
|
analysis = DeepFace.analyze(frame, actions=['emotion']) |
|
if isinstance(analysis, list): |
|
for result in analysis: |
|
dominant_emotion = result['dominant_emotion'] |
|
emotion_counter[dominant_emotion] += 1 |
|
else: |
|
dominant_emotion = analysis['dominant_emotion'] |
|
emotion_counter[dominant_emotion] += 1 |
|
except Exception as e: |
|
pass |
|
|
|
success, frame = cap.read() |
|
frame_count += 1 |
|
|
|
cap.release() |
|
|
|
if emotion_counter: |
|
highest_occurring_emotion = emotion_counter.most_common(1)[0][0] |
|
else: |
|
highest_occurring_emotion = None |
|
|
|
return highest_occurring_emotion |
|
|
|
def extract_audio_from_video(video_path): |
|
video_clip = VideoFileClip(video_path) |
|
audio_path = "temp_audio.wav" |
|
video_clip.audio.write_audiofile(audio_path) |
|
audio_array, sr = librosa.load(audio_path, sr=None) |
|
os.remove(audio_path) |
|
return audio_array, sr |
|
|
|
def extract_features(audio_array, sr, max_length=100): |
|
try: |
|
mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13) |
|
chroma = librosa.feature.chroma_stft(y=audio_array, sr=sr) |
|
spectral_contrast = librosa.feature.spectral_contrast(y=audio_array, sr=sr) |
|
|
|
features = np.vstack([mfccs, chroma, spectral_contrast]) |
|
if features.shape[1] < max_length: |
|
features = np.pad(features, ((0, 0), (0, max_length - features.shape[1])), mode='constant') |
|
elif features.shape[1] > max_length: |
|
features = features[:, :max_length] |
|
return features.T |
|
except Exception as e: |
|
st.error(f"Error extracting features from audio: {str(e)}") |
|
return None |
|
|
|
def main(): |
|
with open("style.css") as f: |
|
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True) |
|
st.title("Emotion Detection from Video") |
|
|
|
uploaded_file = st.file_uploader("Upload a video", type=["mp4"]) |
|
if uploaded_file is not None: |
|
video_path = "uploaded_video.mp4" |
|
with open(video_path, "wb") as f: |
|
f.write(uploaded_file.read()) |
|
|
|
st.write("Processing video...please wait") |
|
highest_emotion = split_video_into_frames_and_analyze_emotions(video_path) |
|
audio_array, sr = extract_audio_from_video(video_path) |
|
|
|
model_path = "SVMexec_modeltesting113.pkl" |
|
svm_model = joblib.load(model_path) |
|
scaler = joblib.load('scaler.pkl') |
|
|
|
features = extract_features(audio_array, sr) |
|
if features is not None: |
|
features_2d = features.reshape(1, -1) |
|
features_normalized = scaler.transform(features_2d) |
|
|
|
predicted_class = svm_model.predict(features_normalized)[0] |
|
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad'] |
|
predicted_emotion = emotion_labels[predicted_class] |
|
|
|
if highest_emotion == predicted_emotion: |
|
st.write(f"The person in the video is {predicted_emotion}.") |
|
else: |
|
st.write(f"The emotions from the frames and audio do not match, but the facial expression seems to be {highest_emotion}, while the audio emotion seems to be {predicted_emotion}.") |
|
else: |
|
st.write("Failed to extract features from the audio file.") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|