annapurnapadmaprema-ji's picture
Update app.py
94b90eb verified
raw
history blame
4.1 kB
import os
import numpy as np
import cv2
import librosa
import joblib
from deepface import DeepFace
import streamlit as st
from collections import Counter
from moviepy import VideoFileClip
emotion_map = {
'angry': 0,
'disgust': 1,
'fear': 2,
'happy': 3,
'neutral': 4,
'sad': 5
}
def split_video_into_frames_and_analyze_emotions(video_path, frame_rate=1):
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
st.error("Error: Could not open video.")
return
frame_count = 0
success, frame = cap.read()
emotion_counter = Counter()
while success:
if frame_count % frame_rate == 0:
try:
analysis = DeepFace.analyze(frame, actions=['emotion'])
if isinstance(analysis, list):
for result in analysis:
dominant_emotion = result['dominant_emotion']
emotion_counter[dominant_emotion] += 1
else:
dominant_emotion = analysis['dominant_emotion']
emotion_counter[dominant_emotion] += 1
except Exception as e:
pass
success, frame = cap.read()
frame_count += 1
cap.release()
if emotion_counter:
highest_occurring_emotion = emotion_counter.most_common(1)[0][0]
else:
highest_occurring_emotion = None
return highest_occurring_emotion
def extract_audio_from_video(video_path):
video_clip = VideoFileClip(video_path)
audio_path = "temp_audio.wav"
video_clip.audio.write_audiofile(audio_path)
audio_array, sr = librosa.load(audio_path, sr=None)
os.remove(audio_path)
return audio_array, sr
def extract_features(audio_array, sr, max_length=100):
try:
mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=13)
chroma = librosa.feature.chroma_stft(y=audio_array, sr=sr)
spectral_contrast = librosa.feature.spectral_contrast(y=audio_array, sr=sr)
features = np.vstack([mfccs, chroma, spectral_contrast])
if features.shape[1] < max_length:
features = np.pad(features, ((0, 0), (0, max_length - features.shape[1])), mode='constant')
elif features.shape[1] > max_length:
features = features[:, :max_length]
return features.T
except Exception as e:
st.error(f"Error extracting features from audio: {str(e)}")
return None
def main():
with open("style.css") as f:
st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html=True)
st.title("Emotion Detection from Video")
uploaded_file = st.file_uploader("Upload a video", type=["mp4"])
if uploaded_file is not None:
video_path = "uploaded_video.mp4"
with open(video_path, "wb") as f:
f.write(uploaded_file.read())
st.write("Processing video...please wait")
highest_emotion = split_video_into_frames_and_analyze_emotions(video_path)
audio_array, sr = extract_audio_from_video(video_path)
model_path = "SVMexec_modeltesting113.pkl"
svm_model = joblib.load(model_path)
scaler = joblib.load('scaler.pkl')
features = extract_features(audio_array, sr)
if features is not None:
features_2d = features.reshape(1, -1)
features_normalized = scaler.transform(features_2d)
predicted_class = svm_model.predict(features_normalized)[0]
emotion_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad']
predicted_emotion = emotion_labels[predicted_class]
if highest_emotion == predicted_emotion:
st.write(f"The person in the video is {predicted_emotion}.")
else:
st.write(f"The emotions from the frames and audio do not match, but the facial expression seems to be {highest_emotion}, while the audio emotion seems to be {predicted_emotion}.")
else:
st.write("Failed to extract features from the audio file.")
if __name__ == "__main__":
main()