speech-analyzer / app.py
abrar-adnan's picture
Update app.py
33bf45e
raw
history blame
4.7 kB
import gradio as gr
import os
import cv2
import face_recognition
from fastai.vision.all import load_learner
import time
import chardet
import base64
from deepface import DeepFace
# import pathlib
# temp = pathlib.PosixPath
# pathlib.PosixPath = pathlib.WindowsPath
backends = [
'opencv',
'ssd',
'dlib',
'mtcnn',
'retinaface',
'mediapipe'
]
model = load_learner("gaze-recognizer-v3.pkl")
def video_processing(video_file, encoded_video):
angry = 0
disgust = 0
fear = 0
happy = 0
sad = 0
surprise = 0
neutral = 0
emotion_count = 0
if encoded_video != "":
decoded_file_data = base64.b64decode(encoded_video)
with open("temp_video.mp4", "wb") as f:
f.write(decoded_file_data)
video_file = "temp_video.mp4"
start_time = time.time()
video_capture = cv2.VideoCapture(video_file)
on_camera = 0
off_camera = 0
total = 0
while True:
# Read a single frame from the video
for i in range(24*3):
ret, frame = video_capture.read()
if not ret:
break
# If there are no more frames, break out of the loop
if not ret:
break
# Convert the frame to RGB color (face_recognition uses RGB)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Find all the faces in the frame using a pre-trained convolutional neural network.
face_locations = face_recognition.face_locations(gray)
#face_locations = face_recognition.face_locations(gray, number_of_times_to_upsample=0, model="cnn")
if len(face_locations) > 0:
# Show the original frame with face rectangles drawn around the faces
for top, right, bottom, left in face_locations:
# cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
face_image = gray[top:bottom, left:right]
color_image = frame[top:bottom, left:right]
# Resize the face image to the desired size
resized_face_image = cv2.resize(face_image, (128,128))
try:
emotion = DeepFace.analyze(color_image,actions=['emotion'],detector_backend = backends[2],enforce_detection = False)# 2,3, 4 works
total_emotion += 1
except Exception as e:
pass
# Predict the class of the resized face image using the model
result = model.predict(resized_face_image)
print(result[0])
if(result[0] == 'on_camera'): on_camera = on_camera + 1
elif(result[0] == 'off_camera'): off_camera = off_camera + 1
total = total + 1
try:
# your processing code here
gaze_percentage = on_camera / total * 100
except Exception as e:
print(f"An error occurred while processing the video: {e}")
gaze_percentage = f'no face detected Total = {total},on_camera = {on_camera},off_camera = {off_camera}'
print(f'Total = {total},on_camera = {on_camera},off_camera = {off_camera}')
# print(f'focus perfectage = {on_camera/total*100}')
# Release the video capture object and close all windows
video_capture.release()
cv2.destroyAllWindows()
end_time = time.time()
print(f'Time taken: {end_time-start_time}')
if os.path.exists("temp_video.mp4"):
os.remove("temp_video.mp4")
print(gaze_percentage)
angry = angry / emotion_count
disgust = disgust / emotion_count
fear = fear / emotion_count
happy = happy / emotion_count
sad = sad / emotion_count
surprise = surprise / emotion_count
neutral = neutral / emotion_count
angry = 'total anger percentage' + angry
disgust = 'total disgust percentage' + disgust
fear = 'total fear percentage' + fear
happy = 'total happy percentage' + happy
sad = 'total sad percentage' + sad
surprise = 'total surprise percentage' + surprise
neutral = 'total neutral percentage' + neutral
print(f'total anger percentage = {angry}')
print(f'total disgust percentage = {disgust}')
print(f'total fear percentage = {fear}')
print(f'total happy percentage = {happy}')
print(f'total sad percentage = {sad}')
print(f'total surprise percentage = {surprise}')
print(f'total neutral percentage = {neutral}')
return str(gaze_percentage,angry,disgust,fear,happy,sad,surprise,neutral)
demo = gr.Interface(fn=video_processing,
inputs=["video", "text"],
outputs="text")
if __name__ == "__main__":
demo.launch()