Spaces:
Running
Running
abrar-adnan
commited on
Commit
·
378962d
1
Parent(s):
7dfcf08
Upload app.py
Browse files
app.py
CHANGED
@@ -43,9 +43,6 @@ def getTranscription(path):
|
|
43 |
# Insert Local Audio File Path
|
44 |
clip.audio.write_audiofile(r"audio.wav")
|
45 |
|
46 |
-
waveform, sample_rate = torchaudio.load("audio.wav")
|
47 |
-
waveform, sample_rate
|
48 |
-
|
49 |
waveform, sample_rate = torchaudio.load("audio.wav")
|
50 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
51 |
waveform = resampler(waveform)[0]
|
@@ -61,6 +58,37 @@ def getTranscription(path):
|
|
61 |
|
62 |
return transcription[0]
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
def video_processing(video_file, encoded_video):
|
65 |
angry = 0
|
66 |
disgust = 0
|
@@ -104,48 +132,25 @@ def video_processing(video_file, encoded_video):
|
|
104 |
# If there are no more frames, break out of the loop
|
105 |
if not ret:
|
106 |
break
|
107 |
-
|
108 |
-
# Convert the frame to RGB color (face_recognition uses RGB)
|
109 |
-
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
# Find all the faces in the frame using a pre-trained convolutional neural network.
|
115 |
-
face_locations = face_recognition.face_locations(gray)
|
116 |
-
#face_locations = face_recognition.face_locations(gray, number_of_times_to_upsample=0, model="cnn")
|
117 |
-
|
118 |
-
if len(face_locations) > 0:
|
119 |
-
# Show the original frame with face rectangles drawn around the faces
|
120 |
-
for top, right, bottom, left in face_locations:
|
121 |
-
# cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
|
122 |
-
face_image = gray[top:bottom, left:right]
|
123 |
-
color_image = frame[top:bottom, left:right]
|
124 |
-
|
125 |
-
# Resize the face image to the desired size
|
126 |
-
resized_face_image = cv2.resize(face_image, (128,128))
|
127 |
-
|
128 |
-
try:
|
129 |
-
emotion = DeepFace.analyze(color_image,actions=['emotion'],detector_backend = backends[2],enforce_detection = False)# 2,3, 4 works
|
130 |
-
emotion_count += 1
|
131 |
-
except Exception as e:
|
132 |
-
pass
|
133 |
|
134 |
-
print(emotion[0]['emotion'])
|
135 |
-
angry += emotion[0]['emotion']['angry']
|
136 |
-
disgust += emotion[0]['emotion']['disgust']
|
137 |
-
fear += emotion[0]['emotion']['fear']
|
138 |
-
happy += emotion[0]['emotion']['happy']
|
139 |
-
sad += emotion[0]['emotion']['sad']
|
140 |
-
surprise += emotion[0]['emotion']['surprise']
|
141 |
-
neutral += emotion[0]['emotion']['neutral']
|
142 |
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
try:
|
151 |
# your processing code here
|
@@ -179,7 +184,14 @@ def video_processing(video_file, encoded_video):
|
|
179 |
'sad': sad,
|
180 |
'surprise': surprise,
|
181 |
'neutral': neutral
|
182 |
-
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
# angry = 'total anger percentage' + str(angry)
|
185 |
# disgust = 'total disgust percentage' + str(disgust)
|
@@ -196,7 +208,7 @@ def video_processing(video_file, encoded_video):
|
|
196 |
print(f'total surprise percentage = {surprise}')
|
197 |
print(f'total neutral percentage = {neutral}')
|
198 |
final_result = "Gaze = "+str(gaze_percentage)+"\nFace Emotion = "+str(emotion)+"\nText Emotion = "+str(text_emotion)+"\nText transcription = "+str(transcription)+"\nText sentiment = "+str(text_sentiment)
|
199 |
-
return
|
200 |
|
201 |
|
202 |
demo = gr.Interface(fn=video_processing,
|
|
|
43 |
# Insert Local Audio File Path
|
44 |
clip.audio.write_audiofile(r"audio.wav")
|
45 |
|
|
|
|
|
|
|
46 |
waveform, sample_rate = torchaudio.load("audio.wav")
|
47 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
48 |
waveform = resampler(waveform)[0]
|
|
|
58 |
|
59 |
return transcription[0]
|
60 |
|
61 |
+
def process_frame(frame):
|
62 |
+
# Convert the frame to RGB color (face_recognition uses RGB)
|
63 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
64 |
+
|
65 |
+
# Find all the faces in the frame using a pre-trained convolutional neural network.
|
66 |
+
face_locations = face_recognition.face_locations(gray)
|
67 |
+
#face_locations = face_recognition.face_locations(gray, number_of_times_to_upsample=0, model="cnn")
|
68 |
+
|
69 |
+
if len(face_locations) > 0:
|
70 |
+
# Show the original frame with face rectangles drawn around the faces
|
71 |
+
for top, right, bottom, left in face_locations:
|
72 |
+
# cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
|
73 |
+
face_image = gray[top:bottom, left:right]
|
74 |
+
color_image = frame[top:bottom, left:right]
|
75 |
+
|
76 |
+
# Resize the face image to the desired size
|
77 |
+
resized_face_image = cv2.resize(face_image, (128,128))
|
78 |
+
|
79 |
+
try:
|
80 |
+
emotion = DeepFace.analyze(color_image,actions=['emotion'],detector_backend = backends[2],enforce_detection = False)# 2,3, 4 works
|
81 |
+
emotion_count += 1
|
82 |
+
except Exception as e:
|
83 |
+
emotion = 0
|
84 |
+
pass
|
85 |
+
|
86 |
+
# Predict the class of the resized face image using the model
|
87 |
+
result = model.predict(resized_face_image)
|
88 |
+
print(result[0])
|
89 |
+
return result[0], emotion
|
90 |
+
|
91 |
+
|
92 |
def video_processing(video_file, encoded_video):
|
93 |
angry = 0
|
94 |
disgust = 0
|
|
|
132 |
# If there are no more frames, break out of the loop
|
133 |
if not ret:
|
134 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
+
result, emotion = process_frame(frame)
|
138 |
+
if result:
|
139 |
+
if result == 'on_camera':
|
140 |
+
on_camera += 1
|
141 |
+
elif result == 'off_camera':
|
142 |
+
off_camera += 1
|
143 |
+
total += 1
|
144 |
+
|
145 |
+
if emotion != 0:
|
146 |
+
print(emotion[0]['emotion'])
|
147 |
+
angry += emotion[0]['emotion']['angry']
|
148 |
+
disgust += emotion[0]['emotion']['disgust']
|
149 |
+
fear += emotion[0]['emotion']['fear']
|
150 |
+
happy += emotion[0]['emotion']['happy']
|
151 |
+
sad += emotion[0]['emotion']['sad']
|
152 |
+
surprise += emotion[0]['emotion']['surprise']
|
153 |
+
neutral += emotion[0]['emotion']['neutral']
|
154 |
|
155 |
try:
|
156 |
# your processing code here
|
|
|
184 |
'sad': sad,
|
185 |
'surprise': surprise,
|
186 |
'neutral': neutral
|
187 |
+
},
|
188 |
+
final_result_dict = {
|
189 |
+
"gaze_percentage" : gaze_percentage,
|
190 |
+
"face_emotion" : emotion,
|
191 |
+
"text_emotion" : text_emotion,
|
192 |
+
"transcription" : transcription,
|
193 |
+
"text_sentiment" : text_sentiment
|
194 |
+
}
|
195 |
|
196 |
# angry = 'total anger percentage' + str(angry)
|
197 |
# disgust = 'total disgust percentage' + str(disgust)
|
|
|
208 |
print(f'total surprise percentage = {surprise}')
|
209 |
print(f'total neutral percentage = {neutral}')
|
210 |
final_result = "Gaze = "+str(gaze_percentage)+"\nFace Emotion = "+str(emotion)+"\nText Emotion = "+str(text_emotion)+"\nText transcription = "+str(transcription)+"\nText sentiment = "+str(text_sentiment)
|
211 |
+
return final_result_dict
|
212 |
|
213 |
|
214 |
demo = gr.Interface(fn=video_processing,
|