Spaces:
Sleeping
Sleeping
import cv2 | |
import torch | |
import torchvision.transforms as transforms | |
from PIL import Image | |
import numpy as np | |
import timm | |
from tqdm import tqdm | |
import torch.nn as nn | |
import os | |
import matplotlib.pyplot as plt | |
import torch.nn.functional as F | |
import dlib | |
import pandas as pd | |
# dnn_net = cv2.dnn.readNetFromCaffe("models/deploy.prototxt", "models/res10_300x300_ssd_iter_140000.caffemodel") | |
# # Initialize dlib's facial landmark predictor | |
# predictor = dlib.shape_predictor("models/shape_predictor_68_face_landmarks.dat") | |
def extract_face(image, net, predictor): | |
# Prepare the image for DNN face detection | |
(h, w) = image.shape[:2] | |
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) | |
net.setInput(blob) | |
detections = net.forward() | |
# Loop over the detections | |
for i in range(0, detections.shape[2]): | |
confidence = detections[0, 0, i, 2] | |
# Filter out weak detections | |
if confidence > 0.5: | |
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) | |
(startX, startY, endX, endY) = box.astype("int") | |
# Convert bounding box to dlib rectangle format | |
dlib_rect = dlib.rectangle(int(startX), int(startY), int(endX), int(endY)) | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
landmarks = predictor(gray, dlib_rect) | |
landmarks_np = np.array([[p.x, p.y] for p in landmarks.parts()]) | |
x, y, w, h = cv2.boundingRect(landmarks_np) | |
x -= 25 | |
y -= 25 | |
w += 50 | |
h += 50 | |
x = max(0, x) | |
y = max(0, y) | |
w = min(w, image.shape[1] - x) | |
h = min(h, image.shape[0] - y) | |
# Crop and resize the face | |
try: | |
face_crop = cv2.resize(face_crop, (224, 224)) | |
except: | |
face_crop = cv2.resize(image, (224, 224)) | |
return face_crop | |
return None | |
class Model: | |
def __init__(self,fps,fer_model): | |
self.device="cuda" if torch.cuda.is_available() else "cpu" | |
self.transform = transforms.Compose([transforms.Resize((224, 224)), | |
transforms.ToTensor(), | |
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])] | |
) | |
self.fermodel= timm.create_model("tf_efficientnet_b0_ns", pretrained=False) | |
self.fermodel.classifier = torch.nn.Identity() | |
self.fermodel.classifier=nn.Sequential( | |
nn.Linear(in_features=1280, out_features=7) | |
) | |
self.fermodel = torch.load( | |
fer_model, | |
map_location=self.device) | |
self.fermodel.to(self.device) | |
self.class_labels = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprised"] | |
self.emotion_reorder = { | |
0: 6, | |
1: 5, | |
2: 4, | |
3: 1, | |
4: 0, | |
5: 2, | |
6: 3, | |
} | |
self.label_dict = { | |
0: "angry", | |
1: "disgust", | |
2: "fear", | |
3: "happy", | |
4: "neutral", | |
5: "sad", | |
6: "surprised", | |
} | |
self.class_wise_frame_count=None | |
self.emotion_count = [0] * 7 | |
self.frame_count=0 | |
self.fps=fps | |
self.df=None | |
self.faces_=0 | |
def predict(self,frames): | |
emotion_list=[] | |
emt=[] | |
for frame in tqdm(frames): | |
if frame is not None: | |
frame=np.copy(frame) | |
face_pil = Image.fromarray( | |
cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
) | |
face_tensor = self.transform(face_pil).unsqueeze(0).to(self.device) | |
with torch.no_grad(): | |
output = self.fermodel(face_tensor) | |
_, predicted = torch.max(output, 1) | |
emotion = self.emotion_reorder[predicted.item()] | |
if isinstance(emotion, np.ndarray): | |
emotion = ( | |
emotion.astype(float).item() | |
if emotion.size == 1 | |
else emotion.tolist() | |
) | |
emotion = torch.tensor( | |
[emotion], dtype=torch.float32 | |
) # Ensures it's a tensor | |
emotion.to(self.device) | |
emt.append(emotion) | |
self.emotion_count[predicted.item()] += 1 | |
label = f"{self.label_dict[predicted.item()]}" | |
emotion_list.append(label) | |
else: | |
emt.append('frame error') | |
emotion_list.append('frame error') | |
return emotion_list,emt | |
def get_data(self,emotion_list,emt): | |
self.class_wise_frame_count = dict(zip(self.class_labels, self.emotion_count)) | |
return emotion_list,self.class_wise_frame_count,emt | |
def fer_predict(video_frames,fps,model): | |
emotion_list,emt=model.predict(video_frames) | |
return model.get_data(emotion_list,emt) | |
def filter(list1,list2): | |
filtered_list1 = [x for i, x in enumerate(list1) if list2[i]!='fnf'] | |
filtered_list2 = [x for x in list2 if x!='fnf'] | |
return [filtered_list1,filtered_list2] | |
def plot_graph(x,y,var,path): | |
y = [value if isinstance(value, (int, float)) else np.nan for value in y] | |
print(len(y)) | |
plt.plot(range(len(x)), y, linestyle='-') | |
plt.xlabel('Frame') | |
plt.ylabel(var) | |
plt.title(f'{var} Values vs Frame') | |
plt.savefig(path) | |
plt.clf() | |
# def save_frames(frames,folder_path): | |
# for i in tqdm(range(len(frames))): | |
# frame_filename = os.path.join(folder_path, f'frame_{i+1:04d}.jpg') | |
# # Save the frame as a .jpg file | |
# frame=cv2.cvtColor(frames[i],cv2.COLOR_BGR2RGB) | |
# cv2.imwrite(frame_filename, frame) | |