abrar-adnan commited on
Commit
378962d
·
1 Parent(s): 7dfcf08

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -45
app.py CHANGED
@@ -43,9 +43,6 @@ def getTranscription(path):
43
  # Insert Local Audio File Path
44
  clip.audio.write_audiofile(r"audio.wav")
45
 
46
- waveform, sample_rate = torchaudio.load("audio.wav")
47
- waveform, sample_rate
48
-
49
  waveform, sample_rate = torchaudio.load("audio.wav")
50
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
51
  waveform = resampler(waveform)[0]
@@ -61,6 +58,37 @@ def getTranscription(path):
61
 
62
  return transcription[0]
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def video_processing(video_file, encoded_video):
65
  angry = 0
66
  disgust = 0
@@ -104,48 +132,25 @@ def video_processing(video_file, encoded_video):
104
  # If there are no more frames, break out of the loop
105
  if not ret:
106
  break
107
-
108
- # Convert the frame to RGB color (face_recognition uses RGB)
109
- gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
110
-
111
-
112
-
113
-
114
- # Find all the faces in the frame using a pre-trained convolutional neural network.
115
- face_locations = face_recognition.face_locations(gray)
116
- #face_locations = face_recognition.face_locations(gray, number_of_times_to_upsample=0, model="cnn")
117
-
118
- if len(face_locations) > 0:
119
- # Show the original frame with face rectangles drawn around the faces
120
- for top, right, bottom, left in face_locations:
121
- # cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
122
- face_image = gray[top:bottom, left:right]
123
- color_image = frame[top:bottom, left:right]
124
-
125
- # Resize the face image to the desired size
126
- resized_face_image = cv2.resize(face_image, (128,128))
127
-
128
- try:
129
- emotion = DeepFace.analyze(color_image,actions=['emotion'],detector_backend = backends[2],enforce_detection = False)# 2,3, 4 works
130
- emotion_count += 1
131
- except Exception as e:
132
- pass
133
 
134
- print(emotion[0]['emotion'])
135
- angry += emotion[0]['emotion']['angry']
136
- disgust += emotion[0]['emotion']['disgust']
137
- fear += emotion[0]['emotion']['fear']
138
- happy += emotion[0]['emotion']['happy']
139
- sad += emotion[0]['emotion']['sad']
140
- surprise += emotion[0]['emotion']['surprise']
141
- neutral += emotion[0]['emotion']['neutral']
142
 
143
- # Predict the class of the resized face image using the model
144
- result = model.predict(resized_face_image)
145
- print(result[0])
146
- if(result[0] == 'on_camera'): on_camera = on_camera + 1
147
- elif(result[0] == 'off_camera'): off_camera = off_camera + 1
148
- total = total + 1
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  try:
151
  # your processing code here
@@ -179,7 +184,14 @@ def video_processing(video_file, encoded_video):
179
  'sad': sad,
180
  'surprise': surprise,
181
  'neutral': neutral
182
- },
 
 
 
 
 
 
 
183
 
184
  # angry = 'total anger percentage' + str(angry)
185
  # disgust = 'total disgust percentage' + str(disgust)
@@ -196,7 +208,7 @@ def video_processing(video_file, encoded_video):
196
  print(f'total surprise percentage = {surprise}')
197
  print(f'total neutral percentage = {neutral}')
198
  final_result = "Gaze = "+str(gaze_percentage)+"\nFace Emotion = "+str(emotion)+"\nText Emotion = "+str(text_emotion)+"\nText transcription = "+str(transcription)+"\nText sentiment = "+str(text_sentiment)
199
- return final_result
200
 
201
 
202
  demo = gr.Interface(fn=video_processing,
 
43
  # Insert Local Audio File Path
44
  clip.audio.write_audiofile(r"audio.wav")
45
 
 
 
 
46
  waveform, sample_rate = torchaudio.load("audio.wav")
47
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
48
  waveform = resampler(waveform)[0]
 
58
 
59
  return transcription[0]
60
 
61
+ def process_frame(frame):
62
+ # Convert the frame to RGB color (face_recognition uses RGB)
63
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
64
+
65
+ # Find all the faces in the frame using a pre-trained convolutional neural network.
66
+ face_locations = face_recognition.face_locations(gray)
67
+ #face_locations = face_recognition.face_locations(gray, number_of_times_to_upsample=0, model="cnn")
68
+
69
+ if len(face_locations) > 0:
70
+ # Show the original frame with face rectangles drawn around the faces
71
+ for top, right, bottom, left in face_locations:
72
+ # cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
73
+ face_image = gray[top:bottom, left:right]
74
+ color_image = frame[top:bottom, left:right]
75
+
76
+ # Resize the face image to the desired size
77
+ resized_face_image = cv2.resize(face_image, (128,128))
78
+
79
+ try:
80
+ emotion = DeepFace.analyze(color_image,actions=['emotion'],detector_backend = backends[2],enforce_detection = False)# 2,3, 4 works
81
+ emotion_count += 1
82
+ except Exception as e:
83
+ emotion = 0
84
+ pass
85
+
86
+ # Predict the class of the resized face image using the model
87
+ result = model.predict(resized_face_image)
88
+ print(result[0])
89
+ return result[0], emotion
90
+
91
+
92
  def video_processing(video_file, encoded_video):
93
  angry = 0
94
  disgust = 0
 
132
  # If there are no more frames, break out of the loop
133
  if not ret:
134
  break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
 
 
 
 
 
 
 
 
136
 
137
+ result, emotion = process_frame(frame)
138
+ if result:
139
+ if result == 'on_camera':
140
+ on_camera += 1
141
+ elif result == 'off_camera':
142
+ off_camera += 1
143
+ total += 1
144
+
145
+ if emotion != 0:
146
+ print(emotion[0]['emotion'])
147
+ angry += emotion[0]['emotion']['angry']
148
+ disgust += emotion[0]['emotion']['disgust']
149
+ fear += emotion[0]['emotion']['fear']
150
+ happy += emotion[0]['emotion']['happy']
151
+ sad += emotion[0]['emotion']['sad']
152
+ surprise += emotion[0]['emotion']['surprise']
153
+ neutral += emotion[0]['emotion']['neutral']
154
 
155
  try:
156
  # your processing code here
 
184
  'sad': sad,
185
  'surprise': surprise,
186
  'neutral': neutral
187
+ },
188
+ final_result_dict = {
189
+ "gaze_percentage" : gaze_percentage,
190
+ "face_emotion" : emotion,
191
+ "text_emotion" : text_emotion,
192
+ "transcription" : transcription,
193
+ "text_sentiment" : text_sentiment
194
+ }
195
 
196
  # angry = 'total anger percentage' + str(angry)
197
  # disgust = 'total disgust percentage' + str(disgust)
 
208
  print(f'total surprise percentage = {surprise}')
209
  print(f'total neutral percentage = {neutral}')
210
  final_result = "Gaze = "+str(gaze_percentage)+"\nFace Emotion = "+str(emotion)+"\nText Emotion = "+str(text_emotion)+"\nText transcription = "+str(transcription)+"\nText sentiment = "+str(text_sentiment)
211
+ return final_result_dict
212
 
213
 
214
  demo = gr.Interface(fn=video_processing,