Spaces:
Sleeping
Sleeping
Update live.py
Browse files
live.py
CHANGED
@@ -1,323 +1,323 @@
|
|
1 |
-
import warnings
|
2 |
-
from functions.models import models_dict
|
3 |
-
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
|
4 |
-
import os
|
5 |
-
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
6 |
-
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
7 |
-
import logging
|
8 |
-
logging.getLogger('absl').setLevel(logging.ERROR)
|
9 |
-
from moviepy.editor import VideoFileClip
|
10 |
-
import pandas as pd
|
11 |
-
from tqdm import tqdm
|
12 |
-
import time
|
13 |
-
import json
|
14 |
-
import cv2
|
15 |
-
import dlib
|
16 |
-
from collections import Counter
|
17 |
-
import statistics
|
18 |
-
import shutil
|
19 |
-
import asyncio
|
20 |
-
import traceback
|
21 |
-
|
22 |
-
from functions.valence_arousal import va_predict
|
23 |
-
from functions.speech import speech_predict
|
24 |
-
from functions.eye_track import Facetrack, eye_track_predict
|
25 |
-
from functions.fer import extract_face,fer_predict,plot_graph,filter
|
26 |
-
# from app.utils.session import send_analytics, send_individual_analytics_files, send_combined_analytics_files, send_error
|
27 |
-
# from app.utils.socket import ConnectionManager
|
28 |
-
from typing import Callable
|
29 |
-
session_data={}
|
30 |
-
dnn_net=models_dict['face'][0]
|
31 |
-
predictor=models_dict['face'][1]
|
32 |
-
speech_model=models_dict['speech']
|
33 |
-
valence_dict_path=models_dict['vad'][0]
|
34 |
-
arousal_dict_path=models_dict['vad'][1]
|
35 |
-
dominance_dict_path=models_dict['vad'][2]
|
36 |
-
valence_arousal_model=models_dict['valence_fer'][1]
|
37 |
-
val_ar_feat_model=models_dict['valence_fer'][0]
|
38 |
-
fer_model=models_dict['fer']
|
39 |
-
|
40 |
-
def analyze_live_video(video_path: str, uid: str, user_id: str, count: int, final: bool, log: Callable[[str], None]):
|
41 |
-
try:
|
42 |
-
#initilalizing lists
|
43 |
-
global session_data
|
44 |
-
if uid not in session_data:
|
45 |
-
session_data[uid] = {
|
46 |
-
"vcount":[],
|
47 |
-
"duration":[],
|
48 |
-
|
49 |
-
"eye": [],
|
50 |
-
|
51 |
-
"fer": [],
|
52 |
-
"valence":[],
|
53 |
-
"arousal":[],
|
54 |
-
"stress":[],
|
55 |
-
|
56 |
-
"blinks": [],
|
57 |
-
"class_wise_frame_counts": [],
|
58 |
-
|
59 |
-
"speech_emotions": [],
|
60 |
-
"speech_data":[],
|
61 |
-
"word_weights_list": []
|
62 |
-
}
|
63 |
-
print(f"UID: {uid}, User ID: {user_id}, Count: {count}, Final: {final}, Video: {video_path}")
|
64 |
-
log(f"Analyzing video for question - {count}")
|
65 |
-
|
66 |
-
output_dir = os.path.join('output',str(uid))
|
67 |
-
print(output_dir)
|
68 |
-
if not os.path.exists(output_dir):
|
69 |
-
os.makedirs(output_dir)
|
70 |
-
# Wait for previous files to be written if final
|
71 |
-
if final and count > 1:
|
72 |
-
for i in range(1, count):
|
73 |
-
previous_file_name = os.path.join(output_dir, f"{i}.json")
|
74 |
-
print(previous_file_name)
|
75 |
-
while not os.path.exists(previous_file_name):
|
76 |
-
time.sleep(1)
|
77 |
-
|
78 |
-
video_clip = VideoFileClip(video_path)
|
79 |
-
video_clip = video_clip.set_fps(30)
|
80 |
-
print("Duration: ", video_clip.duration)
|
81 |
-
session_data[uid]['vcount'].append(count)
|
82 |
-
session_data[uid]['duration'].append(video_clip.duration)
|
83 |
-
fps = video_clip.fps
|
84 |
-
audio = video_clip.audio
|
85 |
-
audio_path = os.path.join(output_dir,'extracted_audio.wav')
|
86 |
-
audio.write_audiofile(audio_path)
|
87 |
-
video_frames = [frame for frame in video_clip.iter_frames()]
|
88 |
-
|
89 |
-
#Face extraction
|
90 |
-
print("extracting faces")
|
91 |
-
faces=[extract_face(frame,dnn_net,predictor) for frame in tqdm(video_frames)]
|
92 |
-
print(f'{len([face for face in faces if face is not None])} faces found.')
|
93 |
-
|
94 |
-
|
95 |
-
##EYE TRACKING
|
96 |
-
fc=Facetrack()
|
97 |
-
log(f"Extracting eye features for question - {count}")
|
98 |
-
eye_preds,blink_durations,total_blinks=eye_track_predict(fc,faces,fps)
|
99 |
-
print(len(eye_preds))
|
100 |
-
print("total_blinks- ",total_blinks)
|
101 |
-
session_data[uid]['eye'].append(eye_preds)
|
102 |
-
session_data[uid]['blinks'].append(blink_durations)
|
103 |
-
|
104 |
-
|
105 |
-
#FACIAL EXPRESSION RECOGNITION
|
106 |
-
log(f"Extracting facial features for question - {count}")
|
107 |
-
fer_emotions,class_wise_frame_count,em_tensors=fer_predict(faces,fps,fer_model)
|
108 |
-
print("face emotions",len(fer_emotions))
|
109 |
-
session_data[uid]['fer'].append(fer_emotions)
|
110 |
-
session_data[uid]['class_wise_frame_counts'].append(class_wise_frame_count)
|
111 |
-
|
112 |
-
#VALENCE AROUSAL STRESS
|
113 |
-
valence_list,arousal_list,stress_list=va_predict(valence_arousal_model,val_ar_feat_model,faces,list(em_tensors))
|
114 |
-
session_data[uid]['valence'].append(valence_list)
|
115 |
-
session_data[uid]['arousal'].append(arousal_list)
|
116 |
-
session_data[uid]['stress'].append(stress_list)
|
117 |
-
log(f"Extracting speech features for question - {count}")
|
118 |
-
emotions,major_emotion,word=speech_predict(audio_path,speech_model,valence_dict_path,arousal_dict_path,dominance_dict_path)
|
119 |
-
session_data[uid]['speech_emotions'].append(emotions)
|
120 |
-
session_data[uid]['word_weights_list'].append(word['word_weights'])
|
121 |
-
session_data[uid]['speech_data'].append([float(word['average_pause_length'] if word and word['average_pause_length'] else 0),float(word['articulation_rate'] if word and word['articulation_rate'] else 0),float(word['speaking_rate'] if word and word['speaking_rate'] else 0)])
|
122 |
-
log(f"Generating the metadata for question - {count}")
|
123 |
-
|
124 |
-
|
125 |
-
# Create Meta Data
|
126 |
-
meta_data={}
|
127 |
-
try:
|
128 |
-
avg_blink_duration= float(sum(blink_durations)/(len(blink_durations)))
|
129 |
-
except:
|
130 |
-
avg_blink_duration=0
|
131 |
-
meta_data['vcount']=count
|
132 |
-
meta_data['eye_emotion_recognition'] = {
|
133 |
-
"blink_durations": blink_durations,
|
134 |
-
"avg_blink_duration":avg_blink_duration,
|
135 |
-
"total_blinks": total_blinks,
|
136 |
-
"duration":video_clip.duration
|
137 |
-
}
|
138 |
-
|
139 |
-
meta_data['facial_emotion_recognition'] = {
|
140 |
-
"class_wise_frame_count": class_wise_frame_count,
|
141 |
-
}
|
142 |
-
meta_data['speech_emotion_recognition'] = {
|
143 |
-
'major_emotion':str(major_emotion),
|
144 |
-
'pause_length':float(word['average_pause_length']),
|
145 |
-
'articulation_rate':float(word['articulation_rate']),
|
146 |
-
'speaking_rate':float(word['speaking_rate']),
|
147 |
-
'word_weights':word['word_weights']
|
148 |
-
}
|
149 |
-
|
150 |
-
|
151 |
-
file_path=audio_path
|
152 |
-
if os.path.exists(file_path):
|
153 |
-
os.remove(file_path)
|
154 |
-
print(f"{file_path} deleted")
|
155 |
-
file_path='segment.wav'
|
156 |
-
if os.path.exists(file_path):
|
157 |
-
os.remove(file_path)
|
158 |
-
print(f"{file_path} deleted")
|
159 |
-
|
160 |
-
|
161 |
-
print("Individual: ", meta_data)
|
162 |
-
if not final:
|
163 |
-
print("Not final Executing")
|
164 |
-
log(f"Saving analytics for question - {count}")
|
165 |
-
# send_analytics(valence_plot, arousal_plot,{
|
166 |
-
# "uid": uid,
|
167 |
-
# "user_id": user_id,
|
168 |
-
# "individual": meta_data,
|
169 |
-
# "count": count
|
170 |
-
# })
|
171 |
-
print("Sent analytics")
|
172 |
-
# send_individual_analytics_files(uid, output_dir, count)
|
173 |
-
dummy_file_path = os.path.join(output_dir, f'{count}.json')
|
174 |
-
print("Writing dummy file: ", dummy_file_path)
|
175 |
-
with open(dummy_file_path, 'w') as dummy_file:
|
176 |
-
json.dump({"status": "completed"}, dummy_file)
|
177 |
-
return
|
178 |
-
|
179 |
-
# Process combined
|
180 |
-
log(f"Processing gathered data for final output")
|
181 |
-
|
182 |
-
vcount=session_data[uid]['vcount']
|
183 |
-
sorted_indices = sorted(range(len(vcount)), key=lambda i: vcount[i])
|
184 |
-
for key in session_data[uid]:
|
185 |
-
# Only sort lists that are the same length as vcount
|
186 |
-
if len(session_data[uid][key]) == len(vcount):
|
187 |
-
session_data[uid][key] = [session_data[uid][key][i] for i in sorted_indices]
|
188 |
-
|
189 |
-
videos=len(session_data[uid]['vcount'])
|
190 |
-
#INDIV PLOT SAVING
|
191 |
-
combined_speech=[]
|
192 |
-
combined_valence=[]
|
193 |
-
combined_arousal=[]
|
194 |
-
combined_stress=[]
|
195 |
-
combined_fer=[]
|
196 |
-
combined_eye=[]
|
197 |
-
vid_index=[]
|
198 |
-
combined_speech=[]
|
199 |
-
combined_blinks=[]
|
200 |
-
for i in range(videos):
|
201 |
-
for j in range(len(session_data[uid]['speech_emotions'][i])):
|
202 |
-
vid_index.append(i+1)
|
203 |
-
combined_speech+=session_data[uid]['speech_emotions'][i]
|
204 |
-
timestamps=[i*3 for i in range(len(combined_speech))]
|
205 |
-
df = pd.DataFrame({
|
206 |
-
'timestamps':timestamps,
|
207 |
-
'video_index':vid_index,
|
208 |
-
'speech_emotion':combined_speech
|
209 |
-
})
|
210 |
-
df.to_csv(os.path.join(output_dir,'combined_speech.csv'), index=False)
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
vid_index=[]
|
215 |
-
for i in range(videos):
|
216 |
-
timestamps=[j/30 for j in range(len(session_data[uid]['valence'][i]))]
|
217 |
-
for j in range(len(timestamps)):
|
218 |
-
vid_index.append(i+1)
|
219 |
-
folder_path=os.path.join(output_dir,f
|
220 |
-
os.makedirs(folder_path, exist_ok=True)
|
221 |
-
plot_graph(timestamps,session_data[uid]['valence'][i],'valence',os.path.join(folder_path,'valence.png'))
|
222 |
-
plot_graph(timestamps,session_data[uid]['arousal'][i],'arousal',os.path.join(folder_path,'arousal.png'))
|
223 |
-
plot_graph(timestamps,session_data[uid]['stress'][i],'stress',os.path.join(folder_path,'stress.png'))
|
224 |
-
combined_arousal+=session_data[uid]['arousal'][i]
|
225 |
-
combined_valence+=session_data[uid]['valence'][i]
|
226 |
-
combined_stress+=session_data[uid]['stress'][i]
|
227 |
-
combined_fer+=session_data[uid]['fer'][i]
|
228 |
-
combined_blinks+=session_data[uid]['blinks'][i]
|
229 |
-
# combined_class_wise_frame_count+=session_data[uid]['class_wise_frame_counts'][i]
|
230 |
-
try:
|
231 |
-
max_value=max([x for x in combined_eye if isinstance(x, (int, float))])
|
232 |
-
except:
|
233 |
-
max_value=0
|
234 |
-
session_data[uid]['eye'][i]=[x + max_value if isinstance(x, (int, float)) else x for x in session_data[uid]['eye'][i]]
|
235 |
-
combined_eye+=session_data[uid]['eye'][i]
|
236 |
-
|
237 |
-
timestamps=[i/fps for i in range(len(combined_arousal))]
|
238 |
-
plot_graph(timestamps,combined_valence,'valence',os.path.join(output_dir,'valence.png'))
|
239 |
-
plot_graph(timestamps,combined_arousal,'arousal',os.path.join(output_dir,'arousal.png'))
|
240 |
-
plot_graph(timestamps,combined_stress,'stress',os.path.join(output_dir,'stress.png'))
|
241 |
-
print(len(timestamps),len(vid_index),len(combined_fer),len(combined_valence),len(combined_arousal),len(combined_stress),len(combined_eye))
|
242 |
-
df = pd.DataFrame({
|
243 |
-
'timestamps':timestamps,
|
244 |
-
'video_index': vid_index, # Add a column for video index
|
245 |
-
'fer': combined_fer,
|
246 |
-
'valence': combined_valence,
|
247 |
-
'arousal': combined_arousal,
|
248 |
-
'stress': combined_stress,
|
249 |
-
'eye': combined_eye,
|
250 |
-
})
|
251 |
-
df.to_csv(os.path.join(output_dir,'combined_data.csv'), index=False)
|
252 |
-
|
253 |
-
#generate metadata for Combined
|
254 |
-
comb_meta_data={}
|
255 |
-
try:
|
256 |
-
avg_blink_duration= float(sum(combined_blinks)/(len(combined_blinks)))
|
257 |
-
except:
|
258 |
-
avg_blink_duration=0
|
259 |
-
|
260 |
-
total_blinks=max([x for x in combined_eye if isinstance(x, (int, float))])
|
261 |
-
|
262 |
-
comb_meta_data['eye_emotion_recognition'] = {
|
263 |
-
"avg_blink_duration":avg_blink_duration,
|
264 |
-
"total_blinks": total_blinks,
|
265 |
-
}
|
266 |
-
|
267 |
-
dict_list = session_data[uid]['class_wise_frame_counts']
|
268 |
-
|
269 |
-
result = {}
|
270 |
-
for d in dict_list:
|
271 |
-
for key,value in d.items():
|
272 |
-
result[key]=result.get(key,0)+value
|
273 |
-
comb_meta_data['facial_emotion_recognition'] = {
|
274 |
-
"class_wise_frame_count": result,
|
275 |
-
}
|
276 |
-
|
277 |
-
combined_weights = Counter()
|
278 |
-
for word_weight in session_data[uid]['word_weights_list']:
|
279 |
-
combined_weights.update(word_weight)
|
280 |
-
combined_weights_dict = dict(combined_weights)
|
281 |
-
print(combined_weights_dict)
|
282 |
-
comb_meta_data['speech_emotion_recognition'] = {
|
283 |
-
'major_emotion':str(major_emotion),
|
284 |
-
'pause_length':statistics.mean([row[0] for row in session_data[uid]['speech_data']]),
|
285 |
-
'articulation_rate':statistics.mean([row[1] for row in session_data[uid]['speech_data']]),
|
286 |
-
'speaking_rate':statistics.mean([row[2] for row in session_data[uid]['speech_data']]),
|
287 |
-
'word_weights':combined_weights_dict
|
288 |
-
}
|
289 |
-
with open(os.path.join(output_dir,'combined.json'), 'w') as json_file:
|
290 |
-
json.dump(comb_meta_data, json_file)
|
291 |
-
log(f"Saving analytics for final output")
|
292 |
-
# send_analytics(valence_plot, arousal_plot,{
|
293 |
-
# "uid": uid,
|
294 |
-
# "user_id": user_id,
|
295 |
-
# "individual": meta_data,
|
296 |
-
# "combined": combined_meta_data,
|
297 |
-
# "count": count
|
298 |
-
# })
|
299 |
-
# send_individual_analytics_files(uid, output_dir, count)
|
300 |
-
# send_combined_analytics_files(uid, output_dir)
|
301 |
-
|
302 |
-
# shutil.rmtree(output_dir)
|
303 |
-
# print(f"Deleted output directory: {output_dir}")
|
304 |
-
except Exception as e:
|
305 |
-
print("Error analyzing video...: ", e)
|
306 |
-
error_trace = traceback.format_exc()
|
307 |
-
print("Error Trace: ", error_trace)
|
308 |
-
log(f"Error analyzing video for question - {count}")
|
309 |
-
# send_error(uid, {
|
310 |
-
# "message": str(e),
|
311 |
-
# "trace": error_trace
|
312 |
-
# })
|
313 |
-
shutil.rmtree('output')
|
314 |
-
print(f"Deleted output directory: {output_dir}")
|
315 |
-
|
316 |
-
|
317 |
-
# st=time.time()
|
318 |
-
# # analyze_live_video(video_path, uid, user_id, count, final, log)
|
319 |
-
# analyze_live_video('videos/s2.webm', 1,1,1,False,print)
|
320 |
-
# analyze_live_video('videos/a4.webm', 1,1,2,True,print)
|
321 |
-
|
322 |
-
# analyze_live_video('videos/s2.webm', 1,1,2,True,print)
|
323 |
# print("time taken - ",time.time()-st)
|
|
|
1 |
+
import warnings
|
2 |
+
from functions.models import models_dict
|
3 |
+
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
|
4 |
+
import os
|
5 |
+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
6 |
+
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
|
7 |
+
import logging
|
8 |
+
logging.getLogger('absl').setLevel(logging.ERROR)
|
9 |
+
from moviepy.editor import VideoFileClip
|
10 |
+
import pandas as pd
|
11 |
+
from tqdm import tqdm
|
12 |
+
import time
|
13 |
+
import json
|
14 |
+
import cv2
|
15 |
+
import dlib
|
16 |
+
from collections import Counter
|
17 |
+
import statistics
|
18 |
+
import shutil
|
19 |
+
import asyncio
|
20 |
+
import traceback
|
21 |
+
|
22 |
+
from functions.valence_arousal import va_predict
|
23 |
+
from functions.speech import speech_predict
|
24 |
+
from functions.eye_track import Facetrack, eye_track_predict
|
25 |
+
from functions.fer import extract_face,fer_predict,plot_graph,filter
|
26 |
+
# from app.utils.session import send_analytics, send_individual_analytics_files, send_combined_analytics_files, send_error
|
27 |
+
# from app.utils.socket import ConnectionManager
|
28 |
+
from typing import Callable
|
29 |
+
session_data={}
|
30 |
+
dnn_net=models_dict['face'][0]
|
31 |
+
predictor=models_dict['face'][1]
|
32 |
+
speech_model=models_dict['speech']
|
33 |
+
valence_dict_path=models_dict['vad'][0]
|
34 |
+
arousal_dict_path=models_dict['vad'][1]
|
35 |
+
dominance_dict_path=models_dict['vad'][2]
|
36 |
+
valence_arousal_model=models_dict['valence_fer'][1]
|
37 |
+
val_ar_feat_model=models_dict['valence_fer'][0]
|
38 |
+
fer_model=models_dict['fer']
|
39 |
+
|
40 |
+
def analyze_live_video(video_path: str, uid: str, user_id: str, count: int, final: bool, log: Callable[[str], None]):
|
41 |
+
try:
|
42 |
+
#initilalizing lists
|
43 |
+
global session_data
|
44 |
+
if uid not in session_data:
|
45 |
+
session_data[uid] = {
|
46 |
+
"vcount":[],
|
47 |
+
"duration":[],
|
48 |
+
|
49 |
+
"eye": [],
|
50 |
+
|
51 |
+
"fer": [],
|
52 |
+
"valence":[],
|
53 |
+
"arousal":[],
|
54 |
+
"stress":[],
|
55 |
+
|
56 |
+
"blinks": [],
|
57 |
+
"class_wise_frame_counts": [],
|
58 |
+
|
59 |
+
"speech_emotions": [],
|
60 |
+
"speech_data":[],
|
61 |
+
"word_weights_list": []
|
62 |
+
}
|
63 |
+
print(f"UID: {uid}, User ID: {user_id}, Count: {count}, Final: {final}, Video: {video_path}")
|
64 |
+
log(f"Analyzing video for question - {count}")
|
65 |
+
|
66 |
+
output_dir = os.path.join('output',str(uid))
|
67 |
+
print(output_dir)
|
68 |
+
if not os.path.exists(output_dir):
|
69 |
+
os.makedirs(output_dir)
|
70 |
+
# Wait for previous files to be written if final
|
71 |
+
if final and count > 1:
|
72 |
+
for i in range(1, count):
|
73 |
+
previous_file_name = os.path.join(output_dir, f"{i}.json")
|
74 |
+
print(previous_file_name)
|
75 |
+
while not os.path.exists(previous_file_name):
|
76 |
+
time.sleep(1)
|
77 |
+
|
78 |
+
video_clip = VideoFileClip(video_path)
|
79 |
+
video_clip = video_clip.set_fps(30)
|
80 |
+
print("Duration: ", video_clip.duration)
|
81 |
+
session_data[uid]['vcount'].append(count)
|
82 |
+
session_data[uid]['duration'].append(video_clip.duration)
|
83 |
+
fps = video_clip.fps
|
84 |
+
audio = video_clip.audio
|
85 |
+
audio_path = os.path.join(output_dir,'extracted_audio.wav')
|
86 |
+
audio.write_audiofile(audio_path)
|
87 |
+
video_frames = [frame for frame in video_clip.iter_frames()]
|
88 |
+
|
89 |
+
#Face extraction
|
90 |
+
print("extracting faces")
|
91 |
+
faces=[extract_face(frame,dnn_net,predictor) for frame in tqdm(video_frames)]
|
92 |
+
print(f'{len([face for face in faces if face is not None])} faces found.')
|
93 |
+
|
94 |
+
|
95 |
+
##EYE TRACKING
|
96 |
+
fc=Facetrack()
|
97 |
+
log(f"Extracting eye features for question - {count}")
|
98 |
+
eye_preds,blink_durations,total_blinks=eye_track_predict(fc,faces,fps)
|
99 |
+
print(len(eye_preds))
|
100 |
+
print("total_blinks- ",total_blinks)
|
101 |
+
session_data[uid]['eye'].append(eye_preds)
|
102 |
+
session_data[uid]['blinks'].append(blink_durations)
|
103 |
+
|
104 |
+
|
105 |
+
#FACIAL EXPRESSION RECOGNITION
|
106 |
+
log(f"Extracting facial features for question - {count}")
|
107 |
+
fer_emotions,class_wise_frame_count,em_tensors=fer_predict(faces,fps,fer_model)
|
108 |
+
print("face emotions",len(fer_emotions))
|
109 |
+
session_data[uid]['fer'].append(fer_emotions)
|
110 |
+
session_data[uid]['class_wise_frame_counts'].append(class_wise_frame_count)
|
111 |
+
|
112 |
+
#VALENCE AROUSAL STRESS
|
113 |
+
valence_list,arousal_list,stress_list=va_predict(valence_arousal_model,val_ar_feat_model,faces,list(em_tensors))
|
114 |
+
session_data[uid]['valence'].append(valence_list)
|
115 |
+
session_data[uid]['arousal'].append(arousal_list)
|
116 |
+
session_data[uid]['stress'].append(stress_list)
|
117 |
+
log(f"Extracting speech features for question - {count}")
|
118 |
+
emotions,major_emotion,word=speech_predict(audio_path,speech_model,valence_dict_path,arousal_dict_path,dominance_dict_path)
|
119 |
+
session_data[uid]['speech_emotions'].append(emotions)
|
120 |
+
session_data[uid]['word_weights_list'].append(word['word_weights'])
|
121 |
+
session_data[uid]['speech_data'].append([float(word['average_pause_length'] if word and word['average_pause_length'] else 0),float(word['articulation_rate'] if word and word['articulation_rate'] else 0),float(word['speaking_rate'] if word and word['speaking_rate'] else 0)])
|
122 |
+
log(f"Generating the metadata for question - {count}")
|
123 |
+
|
124 |
+
|
125 |
+
# Create Meta Data
|
126 |
+
meta_data={}
|
127 |
+
try:
|
128 |
+
avg_blink_duration= float(sum(blink_durations)/(len(blink_durations)))
|
129 |
+
except:
|
130 |
+
avg_blink_duration=0
|
131 |
+
meta_data['vcount']=count
|
132 |
+
meta_data['eye_emotion_recognition'] = {
|
133 |
+
"blink_durations": blink_durations,
|
134 |
+
"avg_blink_duration":avg_blink_duration,
|
135 |
+
"total_blinks": total_blinks,
|
136 |
+
"duration":video_clip.duration
|
137 |
+
}
|
138 |
+
|
139 |
+
meta_data['facial_emotion_recognition'] = {
|
140 |
+
"class_wise_frame_count": class_wise_frame_count,
|
141 |
+
}
|
142 |
+
meta_data['speech_emotion_recognition'] = {
|
143 |
+
'major_emotion':str(major_emotion),
|
144 |
+
'pause_length':float(word['average_pause_length']),
|
145 |
+
'articulation_rate':float(word['articulation_rate']),
|
146 |
+
'speaking_rate':float(word['speaking_rate']),
|
147 |
+
'word_weights':word['word_weights']
|
148 |
+
}
|
149 |
+
|
150 |
+
|
151 |
+
file_path=audio_path
|
152 |
+
if os.path.exists(file_path):
|
153 |
+
os.remove(file_path)
|
154 |
+
print(f"{file_path} deleted")
|
155 |
+
file_path='segment.wav'
|
156 |
+
if os.path.exists(file_path):
|
157 |
+
os.remove(file_path)
|
158 |
+
print(f"{file_path} deleted")
|
159 |
+
|
160 |
+
|
161 |
+
print("Individual: ", meta_data)
|
162 |
+
if not final:
|
163 |
+
print("Not final Executing")
|
164 |
+
log(f"Saving analytics for question - {count}")
|
165 |
+
# send_analytics(valence_plot, arousal_plot,{
|
166 |
+
# "uid": uid,
|
167 |
+
# "user_id": user_id,
|
168 |
+
# "individual": meta_data,
|
169 |
+
# "count": count
|
170 |
+
# })
|
171 |
+
print("Sent analytics")
|
172 |
+
# send_individual_analytics_files(uid, output_dir, count)
|
173 |
+
dummy_file_path = os.path.join(output_dir, f'{count}.json')
|
174 |
+
print("Writing dummy file: ", dummy_file_path)
|
175 |
+
with open(dummy_file_path, 'w') as dummy_file:
|
176 |
+
json.dump({"status": "completed"}, dummy_file)
|
177 |
+
return
|
178 |
+
|
179 |
+
# Process combined
|
180 |
+
log(f"Processing gathered data for final output")
|
181 |
+
|
182 |
+
vcount=session_data[uid]['vcount']
|
183 |
+
sorted_indices = sorted(range(len(vcount)), key=lambda i: vcount[i])
|
184 |
+
for key in session_data[uid]:
|
185 |
+
# Only sort lists that are the same length as vcount
|
186 |
+
if len(session_data[uid][key]) == len(vcount):
|
187 |
+
session_data[uid][key] = [session_data[uid][key][i] for i in sorted_indices]
|
188 |
+
|
189 |
+
videos=len(session_data[uid]['vcount'])
|
190 |
+
#INDIV PLOT SAVING
|
191 |
+
combined_speech=[]
|
192 |
+
combined_valence=[]
|
193 |
+
combined_arousal=[]
|
194 |
+
combined_stress=[]
|
195 |
+
combined_fer=[]
|
196 |
+
combined_eye=[]
|
197 |
+
vid_index=[]
|
198 |
+
combined_speech=[]
|
199 |
+
combined_blinks=[]
|
200 |
+
for i in range(videos):
|
201 |
+
for j in range(len(session_data[uid]['speech_emotions'][i])):
|
202 |
+
vid_index.append(i+1)
|
203 |
+
combined_speech+=session_data[uid]['speech_emotions'][i]
|
204 |
+
timestamps=[i*3 for i in range(len(combined_speech))]
|
205 |
+
df = pd.DataFrame({
|
206 |
+
'timestamps':timestamps,
|
207 |
+
'video_index':vid_index,
|
208 |
+
'speech_emotion':combined_speech
|
209 |
+
})
|
210 |
+
df.to_csv(os.path.join(output_dir,'combined_speech.csv'), index=False)
|
211 |
+
|
212 |
+
|
213 |
+
|
214 |
+
vid_index=[]
|
215 |
+
for i in range(videos):
|
216 |
+
timestamps=[j/30 for j in range(len(session_data[uid]['valence'][i]))]
|
217 |
+
for j in range(len(timestamps)):
|
218 |
+
vid_index.append(i+1)
|
219 |
+
folder_path=os.path.join(output_dir,f"{session_data[uid]['vcount'][i]}")
|
220 |
+
os.makedirs(folder_path, exist_ok=True)
|
221 |
+
plot_graph(timestamps,session_data[uid]['valence'][i],'valence',os.path.join(folder_path,'valence.png'))
|
222 |
+
plot_graph(timestamps,session_data[uid]['arousal'][i],'arousal',os.path.join(folder_path,'arousal.png'))
|
223 |
+
plot_graph(timestamps,session_data[uid]['stress'][i],'stress',os.path.join(folder_path,'stress.png'))
|
224 |
+
combined_arousal+=session_data[uid]['arousal'][i]
|
225 |
+
combined_valence+=session_data[uid]['valence'][i]
|
226 |
+
combined_stress+=session_data[uid]['stress'][i]
|
227 |
+
combined_fer+=session_data[uid]['fer'][i]
|
228 |
+
combined_blinks+=session_data[uid]['blinks'][i]
|
229 |
+
# combined_class_wise_frame_count+=session_data[uid]['class_wise_frame_counts'][i]
|
230 |
+
try:
|
231 |
+
max_value=max([x for x in combined_eye if isinstance(x, (int, float))])
|
232 |
+
except:
|
233 |
+
max_value=0
|
234 |
+
session_data[uid]['eye'][i]=[x + max_value if isinstance(x, (int, float)) else x for x in session_data[uid]['eye'][i]]
|
235 |
+
combined_eye+=session_data[uid]['eye'][i]
|
236 |
+
|
237 |
+
timestamps=[i/fps for i in range(len(combined_arousal))]
|
238 |
+
plot_graph(timestamps,combined_valence,'valence',os.path.join(output_dir,'valence.png'))
|
239 |
+
plot_graph(timestamps,combined_arousal,'arousal',os.path.join(output_dir,'arousal.png'))
|
240 |
+
plot_graph(timestamps,combined_stress,'stress',os.path.join(output_dir,'stress.png'))
|
241 |
+
print(len(timestamps),len(vid_index),len(combined_fer),len(combined_valence),len(combined_arousal),len(combined_stress),len(combined_eye))
|
242 |
+
df = pd.DataFrame({
|
243 |
+
'timestamps':timestamps,
|
244 |
+
'video_index': vid_index, # Add a column for video index
|
245 |
+
'fer': combined_fer,
|
246 |
+
'valence': combined_valence,
|
247 |
+
'arousal': combined_arousal,
|
248 |
+
'stress': combined_stress,
|
249 |
+
'eye': combined_eye,
|
250 |
+
})
|
251 |
+
df.to_csv(os.path.join(output_dir,'combined_data.csv'), index=False)
|
252 |
+
|
253 |
+
#generate metadata for Combined
|
254 |
+
comb_meta_data={}
|
255 |
+
try:
|
256 |
+
avg_blink_duration= float(sum(combined_blinks)/(len(combined_blinks)))
|
257 |
+
except:
|
258 |
+
avg_blink_duration=0
|
259 |
+
|
260 |
+
total_blinks=max([x for x in combined_eye if isinstance(x, (int, float))])
|
261 |
+
|
262 |
+
comb_meta_data['eye_emotion_recognition'] = {
|
263 |
+
"avg_blink_duration":avg_blink_duration,
|
264 |
+
"total_blinks": total_blinks,
|
265 |
+
}
|
266 |
+
|
267 |
+
dict_list = session_data[uid]['class_wise_frame_counts']
|
268 |
+
|
269 |
+
result = {}
|
270 |
+
for d in dict_list:
|
271 |
+
for key,value in d.items():
|
272 |
+
result[key]=result.get(key,0)+value
|
273 |
+
comb_meta_data['facial_emotion_recognition'] = {
|
274 |
+
"class_wise_frame_count": result,
|
275 |
+
}
|
276 |
+
|
277 |
+
combined_weights = Counter()
|
278 |
+
for word_weight in session_data[uid]['word_weights_list']:
|
279 |
+
combined_weights.update(word_weight)
|
280 |
+
combined_weights_dict = dict(combined_weights)
|
281 |
+
print(combined_weights_dict)
|
282 |
+
comb_meta_data['speech_emotion_recognition'] = {
|
283 |
+
'major_emotion':str(major_emotion),
|
284 |
+
'pause_length':statistics.mean([row[0] for row in session_data[uid]['speech_data']]),
|
285 |
+
'articulation_rate':statistics.mean([row[1] for row in session_data[uid]['speech_data']]),
|
286 |
+
'speaking_rate':statistics.mean([row[2] for row in session_data[uid]['speech_data']]),
|
287 |
+
'word_weights':combined_weights_dict
|
288 |
+
}
|
289 |
+
with open(os.path.join(output_dir,'combined.json'), 'w') as json_file:
|
290 |
+
json.dump(comb_meta_data, json_file)
|
291 |
+
log(f"Saving analytics for final output")
|
292 |
+
# send_analytics(valence_plot, arousal_plot,{
|
293 |
+
# "uid": uid,
|
294 |
+
# "user_id": user_id,
|
295 |
+
# "individual": meta_data,
|
296 |
+
# "combined": combined_meta_data,
|
297 |
+
# "count": count
|
298 |
+
# })
|
299 |
+
# send_individual_analytics_files(uid, output_dir, count)
|
300 |
+
# send_combined_analytics_files(uid, output_dir)
|
301 |
+
|
302 |
+
# shutil.rmtree(output_dir)
|
303 |
+
# print(f"Deleted output directory: {output_dir}")
|
304 |
+
except Exception as e:
|
305 |
+
print("Error analyzing video...: ", e)
|
306 |
+
error_trace = traceback.format_exc()
|
307 |
+
print("Error Trace: ", error_trace)
|
308 |
+
log(f"Error analyzing video for question - {count}")
|
309 |
+
# send_error(uid, {
|
310 |
+
# "message": str(e),
|
311 |
+
# "trace": error_trace
|
312 |
+
# })
|
313 |
+
shutil.rmtree('output')
|
314 |
+
print(f"Deleted output directory: {output_dir}")
|
315 |
+
|
316 |
+
|
317 |
+
# st=time.time()
|
318 |
+
# # analyze_live_video(video_path, uid, user_id, count, final, log)
|
319 |
+
# analyze_live_video('videos/s2.webm', 1,1,1,False,print)
|
320 |
+
# analyze_live_video('videos/a4.webm', 1,1,2,True,print)
|
321 |
+
|
322 |
+
# analyze_live_video('videos/s2.webm', 1,1,2,True,print)
|
323 |
# print("time taken - ",time.time()-st)
|