Figea commited on
Commit
974d749
·
1 Parent(s): 24fe94c

add docstring

Browse files
Dockerfile CHANGED
@@ -5,24 +5,28 @@ RUN apt-get update && \
5
  apt-get install ffmpeg libsm6 libxext6 -y && \
6
  apt-get clean
7
 
8
- # Install the dependancies
 
9
  COPY requirements.txt /
10
  RUN pip install --no-cache-dir -r requirements.txt
11
 
12
- # Will execute nltk.download('wordnet')
13
- #COPY post_install.py .
14
- #RUN python post_install.py
15
-
16
  RUN [ "python", "-c", "import nltk; nltk.download('wordnet', download_dir='/usr/local/nltk_data')" ]
17
 
18
- # Copy the code files
 
19
  COPY src /
20
 
21
- # Listen to port 7860
 
22
  EXPOSE 5000
23
 
24
- # Define the working dir in the contener
 
25
  WORKDIR /
26
 
27
- # Commande to start the app
 
28
  CMD ["gunicorn", "--bind", "0.0.0.0:5000", "main:app"]
 
5
  apt-get install ffmpeg libsm6 libxext6 -y && \
6
  apt-get clean
7
 
8
+ # ---- Install the dependancies
9
+ #
10
  COPY requirements.txt /
11
  RUN pip install --no-cache-dir -r requirements.txt
12
 
13
+ # ---- RUN python
14
+ # ---- Will execute nltk.download('wordnet')
15
+ #
 
16
  RUN [ "python", "-c", "import nltk; nltk.download('wordnet', download_dir='/usr/local/nltk_data')" ]
17
 
18
+ # ---- Copy the code files
19
+ #
20
  COPY src /
21
 
22
+ # ---- Listen to port 5000
23
+ #
24
  EXPOSE 5000
25
 
26
+ # ---- Define the working dir in the contener
27
+ #
28
  WORKDIR /
29
 
30
+ # ---- Commande to start the app
31
+ # ----
32
  CMD ["gunicorn", "--bind", "0.0.0.0:5000", "main:app"]
src/display_gloss.py CHANGED
@@ -2,36 +2,44 @@ import cv2
2
  import json
3
  import numpy as np
4
  import pandas as pd
5
- import os
6
  import time
7
 
 
8
  def draw_hands_connections(frame, hand_landmarks):
9
  '''
10
- Draw white lines between relevant points of hands landmarks
11
-
12
  Parameters
13
  ----------
14
- frame: numpy array, corresponding to the frame on which we want to draw
15
- hand_landmarks: dictionnary, collecting the hands landmarks
 
 
 
16
 
17
- Return
18
- ------
19
- frame: numpy array, with the newly drawing of the hands
 
20
  '''
21
- # define hand_connections between keypoints
 
 
22
  hand_connections = [[0, 1], [1, 2], [2, 3], [3, 4],
23
  [5, 6], [6, 7], [7, 8],
24
  [9, 10], [10, 11], [11, 12],
25
  [13, 14], [14, 15], [15, 16],
26
  [17, 18], [18, 19], [19, 20]] #[5, 2], [0, 17]]
27
 
28
- # loop to draw left hand connection
 
29
  for connection in hand_connections:
30
  landmark_start = hand_landmarks['left_hand'].get(str(connection[0]))
31
  landmark_end = hand_landmarks['left_hand'].get(str(connection[1]))
32
  cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
33
 
34
- # loop to to draw right hand connection
 
35
  for connection in hand_connections:
36
  landmark_start = hand_landmarks['right_hand'].get(str(connection[0]))
37
  landmark_end = hand_landmarks['right_hand'].get(str(connection[1]))
@@ -41,20 +49,28 @@ def draw_hands_connections(frame, hand_landmarks):
41
 
42
  def draw_pose_connections(frame, pose_landmarks):
43
  '''
44
- Draw white lines between relevant points of pose landmarks
45
-
46
  Parameters
47
  ----------
48
- frame: numpy array, corresponding to the frame on which we want to draw
49
- hand_landmarks: dictionnary, collecting the pose landmarks
50
-
51
- Return
52
- ------
53
- frame: numpy array, with the newly drawing of the pose
 
 
 
 
54
  '''
55
- # define pose connections
 
 
56
  pose_connections = [[11, 12], [11, 13], [12, 14], [13, 15], [14, 16]]
57
 
 
 
58
  for connection in pose_connections:
59
  landmark_start = pose_landmarks.get(str(connection[0]))
60
  landmark_end = pose_landmarks.get(str(connection[1]))
@@ -64,31 +80,38 @@ def draw_pose_connections(frame, pose_landmarks):
64
 
65
  def draw_face_connections(frame, face_landmarks):
66
  '''
67
- Draw white lines between relevant points of face landmarks
68
-
69
  Parameters
70
  ----------
71
- frame: numpy array, corresponding to the frame on which we want to draw
72
- hand_landmarks: dictionnary, collecting the face landmarks
73
-
74
- Return
75
- ------
76
- frame: numpy array, with the newly drawing of the face
 
 
 
 
77
  '''
78
- # define pose connections
 
79
  connections_dict = {'lipsUpperInner_connections' : [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],\
80
- 'lipsLowerInner_connections' : [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308],\
81
- 'rightEyeUpper0_connections': [246, 161, 160, 159, 158, 157, 173],\
82
- 'rightEyeLower0' : [33, 7, 163, 144, 145, 153, 154, 155, 133],\
83
- 'rightEyebrowLower' : [35, 124, 46, 53, 52, 65],\
84
- 'leftEyeUpper0' : [466, 388, 387, 386, 385, 384, 398],\
85
- 'leftEyeLower0' : [263, 249, 390, 373, 374, 380, 381, 382, 362],\
86
- 'leftEyebrowLower' : [265, 353, 276, 283, 282, 295],\
87
- 'noseTip_midwayBetweenEye' : [1, 168],\
88
- 'noseTip_noseRightCorner' : [1, 98],\
89
- 'noseTip_LeftCorner' : [1, 327]\
90
- }
91
 
 
 
92
  for keypoints_list in connections_dict.values():
93
  for index in range(len(keypoints_list)):
94
  if index + 1 < len(keypoints_list):
@@ -98,20 +121,78 @@ def draw_face_connections(frame, face_landmarks):
98
  return frame
99
 
100
  def resize_landmarks(landmarks, resize_rate_width, resize_rate_height):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  for keypoint in landmarks.keys():
102
  landmark_x, landmark_y = landmarks[keypoint]
103
  landmarks[keypoint] = [int(resize_rate_width * landmark_x), int(resize_rate_height*landmark_y)]
 
104
  return landmarks
105
 
106
  def generate_video(gloss_list, dataset, vocabulary_list):
107
- # size of video of signer 11
108
- # FIXED_WIDTH, FIXED_HEIGHT, = 288, 192,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  FIXED_WIDTH, FIXED_HEIGHT = 576, 384
110
- fps = 25
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  for gloss in gloss_list:
 
 
113
  if not check_gloss_in_vocabulary(gloss, vocabulary_list):
114
  continue
 
 
 
115
  video_id = select_video_id_from_gloss(gloss, dataset)
116
  video_landmarks_path = dataset.loc[dataset['video_id'] == video_id, 'video_landmarks_path'].values[0]
117
  with open(video_landmarks_path, 'r') as f:
@@ -119,76 +200,120 @@ def generate_video(gloss_list, dataset, vocabulary_list):
119
  width = video_landmarks[-1].get('width')
120
  height = video_landmarks[-1].get('height')
121
 
122
- # calculate resize rate
 
123
  resize_rate_width, resize_rate_height = FIXED_WIDTH / width, FIXED_HEIGHT/height
124
 
125
- text = gloss
126
- font = cv2.FONT_HERSHEY_SIMPLEX
127
- font_scale = 1
128
- font_color = (0, 255, 0)
129
- thickness = 2
130
- line_type = cv2.LINE_AA
131
-
132
  for frame_landmarks in video_landmarks[:-1]:
 
 
133
  blank_image = np.zeros((FIXED_HEIGHT, FIXED_WIDTH, 3), dtype=np.uint8)
134
  frame_hands_landmarks = frame_landmarks['hands_landmarks']
135
  frame_pose_landmarks = frame_landmarks['pose_landmarks']
136
  frame_face_landmarks = frame_landmarks['face_landmarks']
137
 
138
- #left_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['left_hand'].values()]
139
- #right_hand_landmarks_xy = [(x, y) for x, y in frame_hands_landmarks['right_hand'].values()]
140
-
141
- #for x, y in left_hand_landmarks_xy:
142
- # cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
143
- #for x, y in right_hand_landmarks_xy:
144
- # cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
145
-
146
- # pose_landmarks_xy = [(x, y) for x, y in frame_pose_landmarks.values()]
147
- # for x, y in pose_landmarks_xy:
148
- # cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
149
-
150
- # face_landmarks_xy = [(x, y) for x, y in frame_face_landmarks.values()]
151
- # for x, y in face_landmarks_xy:
152
- # cv2.circle(blank_image, (x, y), 1, (255, 255, 255), -1)
153
  frame_hands_landmarks_rs = {
154
  'left_hand': resize_landmarks(frame_hands_landmarks['left_hand'], resize_rate_width, resize_rate_height),
155
  'right_hand': resize_landmarks(frame_hands_landmarks['right_hand'], resize_rate_width, resize_rate_height)
156
  }
157
  frame_pose_landmarks_rs = resize_landmarks(frame_pose_landmarks, resize_rate_width, resize_rate_height)
158
  frame_face_landmarks_rs = resize_landmarks(frame_face_landmarks, resize_rate_width, resize_rate_height)
 
 
 
159
  draw_hands_connections(blank_image, frame_hands_landmarks_rs)
160
  draw_pose_connections(blank_image, frame_pose_landmarks_rs)
161
  draw_face_connections(blank_image, frame_face_landmarks_rs)
162
 
163
- text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
 
 
164
  text_x = (FIXED_WIDTH - text_size[0]) // 2
165
  text_y = FIXED_HEIGHT - 10
166
- cv2.putText(blank_image, text, (text_x, text_y), font, font_scale, font_color, thickness, line_type)
167
 
168
- # Convertir l'image en JPEG encodé
 
169
  _, buffer = cv2.imencode('.jpg', blank_image)
170
  frame = buffer.tobytes()
171
 
172
  yield (b'--frame\r\n'
173
  b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
174
 
175
- time.sleep(1 / fps)
 
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
- def load_data(dataset_path='local_dataset'):
179
  filepath = dataset_path
180
  data_df = pd.read_csv(filepath, dtype={'video_id': str})
181
  vocabulary_list = data_df['gloss'].tolist()
 
182
  return data_df, vocabulary_list
183
 
184
 
185
  def check_gloss_in_vocabulary(gloss, vocabulary_list):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  return gloss in vocabulary_list
187
 
 
188
  def select_video_id_from_gloss(gloss, dataset):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  filtered_data_id_11 = dataset.loc[dataset['signer_id'] == 11]
 
190
  if gloss in filtered_data_id_11['gloss'].tolist():
191
  video_id = filtered_data_id_11.loc[filtered_data_id_11['gloss'] == gloss, 'video_id'].values
192
  else:
193
  video_id = dataset.loc[dataset['gloss'] == gloss, 'video_id'].values
 
194
  return video_id[0]
 
2
  import json
3
  import numpy as np
4
  import pandas as pd
 
5
  import time
6
 
7
+
8
  def draw_hands_connections(frame, hand_landmarks):
9
  '''
10
+ Draw white lines on the given frame between relevant hand keypoints.
11
+
12
  Parameters
13
  ----------
14
+ frame: numpy array
15
+ The frame on which we want to draw.
16
+ hand_landmarks: dict
17
+ Dictionary mapping keypoint IDs (integers) to hand landmarks
18
+ (lists of two floats corresponding to the coordinates) for both hands.
19
 
20
+ Returns
21
+ -------
22
+ frame: numpy array
23
+ The frame with the newly drawn hand connections.
24
  '''
25
+
26
+ # ---- Define hand_connections between keypoints to draw
27
+ #
28
  hand_connections = [[0, 1], [1, 2], [2, 3], [3, 4],
29
  [5, 6], [6, 7], [7, 8],
30
  [9, 10], [10, 11], [11, 12],
31
  [13, 14], [14, 15], [15, 16],
32
  [17, 18], [18, 19], [19, 20]] #[5, 2], [0, 17]]
33
 
34
+ # ---- loop to draw left hand connections
35
+ #
36
  for connection in hand_connections:
37
  landmark_start = hand_landmarks['left_hand'].get(str(connection[0]))
38
  landmark_end = hand_landmarks['left_hand'].get(str(connection[1]))
39
  cv2.line(frame, landmark_start, landmark_end, (255, 255, 255), 2)
40
 
41
+ # ---- loop to to draw right hand connections
42
+ #
43
  for connection in hand_connections:
44
  landmark_start = hand_landmarks['right_hand'].get(str(connection[0]))
45
  landmark_end = hand_landmarks['right_hand'].get(str(connection[1]))
 
49
 
50
  def draw_pose_connections(frame, pose_landmarks):
51
  '''
52
+ Draw white lines on the given frame between relevant posture keypoints.
53
+
54
  Parameters
55
  ----------
56
+ frame: numpy array
57
+ The frame on which we want to draw.
58
+ pose_landmarks: dict
59
+ Dictionary mapping keypoint IDs (integers) to posture landmarks
60
+ (lists of two floats corresponding to the coordinates).
61
+
62
+ Returns
63
+ -------
64
+ frame: numpy array
65
+ The frame with the newly drawn posture connections.
66
  '''
67
+
68
+ # ---- define posture connections between keypoints to draw
69
+ #
70
  pose_connections = [[11, 12], [11, 13], [12, 14], [13, 15], [14, 16]]
71
 
72
+ # ---- loop to to draw posture connections
73
+ #
74
  for connection in pose_connections:
75
  landmark_start = pose_landmarks.get(str(connection[0]))
76
  landmark_end = pose_landmarks.get(str(connection[1]))
 
80
 
81
  def draw_face_connections(frame, face_landmarks):
82
  '''
83
+ Draw white lines on the given frame between relevant face keypoints.
84
+
85
  Parameters
86
  ----------
87
+ frame: numpy array
88
+ The frame on which we want to draw.
89
+ face_landmarks: dict
90
+ Dictionary mapping keypoint IDs (integers) to face landmarks
91
+ (lists of two floats corresponding to the coordinates).
92
+
93
+ Returns
94
+ -------
95
+ frame: numpy array
96
+ The frame with the newly drawn face connections.
97
  '''
98
+ # ---- define pose connections
99
+ #
100
  connections_dict = {'lipsUpperInner_connections' : [78, 191, 80, 81, 82, 13, 312, 311, 310, 415, 308],\
101
+ 'lipsLowerInner_connections' : [78, 95, 88, 178, 87, 14, 317, 402, 318, 324, 308],\
102
+ 'rightEyeUpper0_connections': [246, 161, 160, 159, 158, 157, 173],\
103
+ 'rightEyeLower0' : [33, 7, 163, 144, 145, 153, 154, 155, 133],\
104
+ 'rightEyebrowLower' : [35, 124, 46, 53, 52, 65],\
105
+ 'leftEyeUpper0' : [466, 388, 387, 386, 385, 384, 398],\
106
+ 'leftEyeLower0' : [263, 249, 390, 373, 374, 380, 381, 382, 362],\
107
+ 'leftEyebrowLower' : [265, 353, 276, 283, 282, 295],\
108
+ 'noseTip_midwayBetweenEye' : [1, 168],\
109
+ 'noseTip_noseRightCorner' : [1, 98],\
110
+ 'noseTip_LeftCorner' : [1, 327]\
111
+ }
112
 
113
+ # ---- loop to to draw face connections
114
+ #
115
  for keypoints_list in connections_dict.values():
116
  for index in range(len(keypoints_list)):
117
  if index + 1 < len(keypoints_list):
 
121
  return frame
122
 
123
  def resize_landmarks(landmarks, resize_rate_width, resize_rate_height):
124
+ '''
125
+ Resize landmark coordinates by applying specific scaling factors
126
+ to both the width and height of the frame.
127
+
128
+ Parameters
129
+ ----------
130
+ landmarks: dict
131
+ Dictionary mapping keypoint IDs (integers) to landmarks
132
+ (lists of two floats corresponding to the coordinates).
133
+ resize_rate_width: float
134
+ Scaling factor applied to the x-coordinate (width).
135
+ resize_rate_height: float
136
+ Scaling factor applied to the y-coordinate (height).
137
+
138
+ Returns
139
+ -------
140
+ landmarks: dict
141
+ Dictionary mapping keypoint IDs (integers) to the newly resized landmarks
142
+ (lists of two integers corresponding to the coordinates).
143
+ '''
144
+
145
  for keypoint in landmarks.keys():
146
  landmark_x, landmark_y = landmarks[keypoint]
147
  landmarks[keypoint] = [int(resize_rate_width * landmark_x), int(resize_rate_height*landmark_y)]
148
+
149
  return landmarks
150
 
151
  def generate_video(gloss_list, dataset, vocabulary_list):
152
+ '''
153
+ Generate a video stream from a list of glosses.
154
+
155
+ Parameters
156
+ ----------
157
+ gloss_list: list of str
158
+ List of glosses from which the signing video will be generated.
159
+ dataset: pandas.DataFrame
160
+ Dataset containing information about each gloss, including paths to landmark data.
161
+ vocabulary_list: list of str
162
+ List of tokens that have associated landmarks collected.
163
+
164
+ Yields
165
+ ------
166
+ frame: bytes
167
+ JPEG-encoded frame for streaming.
168
+ '''
169
+ # ---- Fix size of the frame to the most common size of video we have in the dataset
170
+ # (corresponding to signer ID 11 who has the maximum number of videos).
171
+ #
172
  FIXED_WIDTH, FIXED_HEIGHT = 576, 384
 
173
 
174
+ # ---- Fix the Frames Per Second (FPS) to match the videos collected in the dataset.
175
+ #
176
+ FPS = 25
177
+
178
+ # ---- Define carachteristics for text display.
179
+ #
180
+ font = cv2.FONT_HERSHEY_SIMPLEX
181
+ font_scale = 1
182
+ font_color = (0, 255, 0)
183
+ thickness = 2
184
+ line_type = cv2.LINE_AA
185
+
186
+ # ---- Loop over each gloss
187
+ #
188
  for gloss in gloss_list:
189
+ # ---- Skip if gloss not in the vocabulary_list.
190
+ #
191
  if not check_gloss_in_vocabulary(gloss, vocabulary_list):
192
  continue
193
+
194
+ # ---- Get landmarks of all the frame in the dataset corresponding to the appropriate gloss.
195
+ #
196
  video_id = select_video_id_from_gloss(gloss, dataset)
197
  video_landmarks_path = dataset.loc[dataset['video_id'] == video_id, 'video_landmarks_path'].values[0]
198
  with open(video_landmarks_path, 'r') as f:
 
200
  width = video_landmarks[-1].get('width')
201
  height = video_landmarks[-1].get('height')
202
 
203
+ # ---- Calculate resize rate for future landmark rescaling.
204
+ #
205
  resize_rate_width, resize_rate_height = FIXED_WIDTH / width, FIXED_HEIGHT/height
206
 
207
+ # ---- Loop over each frame
208
+ #
 
 
 
 
 
209
  for frame_landmarks in video_landmarks[:-1]:
210
+ # ---- Initialize blank image and get all landmarks of the given frame.
211
+ #
212
  blank_image = np.zeros((FIXED_HEIGHT, FIXED_WIDTH, 3), dtype=np.uint8)
213
  frame_hands_landmarks = frame_landmarks['hands_landmarks']
214
  frame_pose_landmarks = frame_landmarks['pose_landmarks']
215
  frame_face_landmarks = frame_landmarks['face_landmarks']
216
 
217
+ # ---- Resize landmarks.
218
+ #
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  frame_hands_landmarks_rs = {
220
  'left_hand': resize_landmarks(frame_hands_landmarks['left_hand'], resize_rate_width, resize_rate_height),
221
  'right_hand': resize_landmarks(frame_hands_landmarks['right_hand'], resize_rate_width, resize_rate_height)
222
  }
223
  frame_pose_landmarks_rs = resize_landmarks(frame_pose_landmarks, resize_rate_width, resize_rate_height)
224
  frame_face_landmarks_rs = resize_landmarks(frame_face_landmarks, resize_rate_width, resize_rate_height)
225
+
226
+ # ---- Draw relevant connections between keypoints on the frame.
227
+ #
228
  draw_hands_connections(blank_image, frame_hands_landmarks_rs)
229
  draw_pose_connections(blank_image, frame_pose_landmarks_rs)
230
  draw_face_connections(blank_image, frame_face_landmarks_rs)
231
 
232
+ # ---- Display text corresponding to the gloss on the frame.
233
+ #
234
+ text_size, _ = cv2.getTextSize(gloss, font, font_scale, thickness)
235
  text_x = (FIXED_WIDTH - text_size[0]) // 2
236
  text_y = FIXED_HEIGHT - 10
237
+ cv2.putText(blank_image, gloss, (text_x, text_y), font, font_scale, font_color, thickness, line_type)
238
 
239
+ # ---- JPEG-encode the frame for streaming.
240
+ #
241
  _, buffer = cv2.imencode('.jpg', blank_image)
242
  frame = buffer.tobytes()
243
 
244
  yield (b'--frame\r\n'
245
  b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')
246
 
247
+ time.sleep(1 / FPS)
248
+
249
 
250
+ def load_data(dataset_path='enhanced_dataset'):
251
+ '''
252
+ Load the dataset that contains all information about glosses.
253
+
254
+ Parameters
255
+ ----------
256
+ dataset_path: str
257
+ Local path to the dataset.
258
+
259
+ Returns
260
+ -------
261
+ data_df: pandas.DataFrame
262
+ DataFrame containing the dataset with information about each gloss.
263
+ vocabulary_list: list of str
264
+ List of glosses (tokens) that have associated landmarks collected.
265
+ '''
266
 
 
267
  filepath = dataset_path
268
  data_df = pd.read_csv(filepath, dtype={'video_id': str})
269
  vocabulary_list = data_df['gloss'].tolist()
270
+
271
  return data_df, vocabulary_list
272
 
273
 
274
  def check_gloss_in_vocabulary(gloss, vocabulary_list):
275
+ '''
276
+ Check if the given gloss is in the vocabulary list.
277
+
278
+ Parameters
279
+ ----------
280
+ gloss: str
281
+ The gloss to check.
282
+ vocabulary_list: list of str
283
+ List of glosses (tokens) that have associated landmarks collected.
284
+
285
+ Returns
286
+ -------
287
+ bool
288
+ True if the gloss is in the vocabulary list, False otherwise.
289
+ '''
290
+
291
  return gloss in vocabulary_list
292
 
293
+
294
  def select_video_id_from_gloss(gloss, dataset):
295
+ '''
296
+ Selects a video ID corresponding to the given gloss from the dataset.
297
+
298
+ Parameters
299
+ ----------
300
+ gloss : str
301
+ The gloss for which to retrieve the video ID.
302
+ dataset : pandas.DataFrame
303
+ A DataFrame containing information about each gloss, including 'signer_id', 'gloss', and 'video_id'.
304
+
305
+ Returns
306
+ -------
307
+ int
308
+ The video ID corresponding to the given gloss. If the gloss is found for 'signer_id' 11, the video ID for that signer is returned; otherwise, the video ID for the gloss from the entire dataset is returned.
309
+ '''
310
+ # ---- Choose preferentialy ID 11 because this signer with this ID signed the more video
311
+ #
312
  filtered_data_id_11 = dataset.loc[dataset['signer_id'] == 11]
313
+
314
  if gloss in filtered_data_id_11['gloss'].tolist():
315
  video_id = filtered_data_id_11.loc[filtered_data_id_11['gloss'] == gloss, 'video_id'].values
316
  else:
317
  video_id = dataset.loc[dataset['gloss'] == gloss, 'video_id'].values
318
+
319
  return video_id[0]
src/{local_dataset → enhanced_dataset} RENAMED
File without changes
src/main.py CHANGED
@@ -1,40 +1,60 @@
1
  import display_gloss as dg
2
- import numpy as np
3
  import synonyms_preprocess as sp
4
  from NLP_Spacy_base_translator import NlpSpacyBaseTranslator
5
- from flask import Flask, render_template, Response, request
6
-
7
 
 
 
8
  app = Flask(__name__)
9
 
10
-
 
11
  @app.route('/')
12
  def index():
 
13
  return render_template('index.html')
14
 
 
 
15
  @app.route('/translate/', methods=['POST'])
16
  def result():
 
 
 
17
  nlp, dict_docs_spacy = sp.load_spacy_values()
18
  _, list_2000_tokens = dg.load_data()
19
 
20
  if request.method == 'POST':
 
 
 
21
  sentence = request.form['inputSentence']
22
  eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=sentence)
23
  generated_gloss = eng_to_asl_translator.translate_to_gloss()
24
  gloss_list_lower = [gloss.lower() for gloss in generated_gloss.split() if gloss.isalnum() ]
25
  gloss_sentence_before_synonym = " ".join(gloss_list_lower)
 
 
 
26
  gloss_list = [sp.find_synonyms(gloss, nlp, dict_docs_spacy, list_2000_tokens) for gloss in gloss_list_lower]
27
  gloss_sentence_after_synonym = " ".join(gloss_list)
 
 
 
28
  return render_template('translate.html',\
29
  sentence=sentence,\
30
  gloss_sentence_before_synonym=gloss_sentence_before_synonym,\
31
  gloss_sentence_after_synonym=gloss_sentence_after_synonym)
32
 
 
 
33
  @app.route('/video_feed')
34
  def video_feed():
 
35
  dataset, list_2000_tokens = dg.load_data()
36
  sentence = request.args.get('gloss_sentence_to_display', '')
37
  gloss_list = sentence.split()
 
38
  return Response(dg.generate_video(gloss_list, dataset, list_2000_tokens), mimetype='multipart/x-mixed-replace; boundary=frame')
39
 
40
  if __name__ == "__main__":
 
1
  import display_gloss as dg
 
2
  import synonyms_preprocess as sp
3
  from NLP_Spacy_base_translator import NlpSpacyBaseTranslator
4
+ from flask import Flask, render_template, Response, request
 
5
 
6
+ # ---- Initialise Flask App
7
+ #
8
  app = Flask(__name__)
9
 
10
+ # ---- Render the homepage template
11
+ #
12
  @app.route('/')
13
  def index():
14
+
15
  return render_template('index.html')
16
 
17
+ # ---- Translate english input sentence into gloss sentence
18
+ #
19
  @app.route('/translate/', methods=['POST'])
20
  def result():
21
+
22
+ # ---- Load NLP models and data
23
+ #
24
  nlp, dict_docs_spacy = sp.load_spacy_values()
25
  _, list_2000_tokens = dg.load_data()
26
 
27
  if request.method == 'POST':
28
+
29
+ # ---- Get the raw sentence and translate it to gloss
30
+ #
31
  sentence = request.form['inputSentence']
32
  eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=sentence)
33
  generated_gloss = eng_to_asl_translator.translate_to_gloss()
34
  gloss_list_lower = [gloss.lower() for gloss in generated_gloss.split() if gloss.isalnum() ]
35
  gloss_sentence_before_synonym = " ".join(gloss_list_lower)
36
+
37
+ # ---- Substitute gloss tokens with synonyms if not in the common token list
38
+ #
39
  gloss_list = [sp.find_synonyms(gloss, nlp, dict_docs_spacy, list_2000_tokens) for gloss in gloss_list_lower]
40
  gloss_sentence_after_synonym = " ".join(gloss_list)
41
+
42
+ # ---- Render the result template with both versions of the gloss sentence
43
+ #
44
  return render_template('translate.html',\
45
  sentence=sentence,\
46
  gloss_sentence_before_synonym=gloss_sentence_before_synonym,\
47
  gloss_sentence_after_synonym=gloss_sentence_after_synonym)
48
 
49
+ # ---- Generate video streaming from gloss_sentence
50
+ #
51
  @app.route('/video_feed')
52
  def video_feed():
53
+
54
  dataset, list_2000_tokens = dg.load_data()
55
  sentence = request.args.get('gloss_sentence_to_display', '')
56
  gloss_list = sentence.split()
57
+
58
  return Response(dg.generate_video(gloss_list, dataset, list_2000_tokens), mimetype='multipart/x-mixed-replace; boundary=frame')
59
 
60
  if __name__ == "__main__":
src/synonyms_preprocess.py CHANGED
@@ -4,10 +4,34 @@ from nltk.corpus import wordnet
4
 
5
 
6
  def load_spacy_values(filepath_model_spacy='model_spacy_synonyms', filepath_docs_spacy = 'dict_spacy_object.pkl'):
 
 
7
 
8
- nlp = spacy.load(filepath_model_spacy)
 
 
 
 
 
 
 
 
 
 
 
 
9
 
 
 
 
 
10
 
 
 
 
 
 
 
11
  with open(filepath_docs_spacy, 'rb') as file:
12
  dict_docs_spacy_bytes = pickle.load(file)
13
 
@@ -15,33 +39,86 @@ def load_spacy_values(filepath_model_spacy='model_spacy_synonyms', filepath_docs
15
 
16
  return nlp, dict_docs_spacy
17
 
 
18
  def find_antonyms(word):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  antonyms = set()
 
 
 
20
  syn_set = wordnet.synsets(word)
 
 
 
21
  for syn in syn_set:
 
 
22
  for lemma in syn.lemmas():
 
 
23
  if lemma.antonyms():
24
  antonyms.add(lemma.antonyms()[0].name())
 
25
  return antonyms
26
 
27
- def find_synonyms(word, model, dict_embedding, dict_2000_tokens): #cluster_to_words, dbscan_model):
28
- """
29
- This function finds the most similar word in the same cluster, and excludes antonyms
30
- """
31
 
32
- if word in dict_2000_tokens:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  return word
34
  else:
 
 
35
  antonyms = find_antonyms(word)
36
- dict_2000_tokens_less_antonyms = [token for token in dict_2000_tokens if token not in antonyms]
37
 
 
 
38
  word_embedding = model(word)
39
-
40
  similarities=[]
41
 
42
- for token in dict_2000_tokens_less_antonyms:
43
  similarities.append((token, dict_embedding.get(token).similarity(word_embedding)))
44
-
 
 
45
  most_similar_token = sorted(similarities, key=lambda item: -item[1])[0][0]
46
 
47
  return most_similar_token
 
4
 
5
 
6
  def load_spacy_values(filepath_model_spacy='model_spacy_synonyms', filepath_docs_spacy = 'dict_spacy_object.pkl'):
7
+ '''
8
+ Loads a spaCy model and a dictionary of spaCy Doc objects from a pickle file.
9
 
10
+ Parameters
11
+ ----------
12
+ filepath_model_spacy : str
13
+ The local path to the spaCy model used for synonym detection.
14
+
15
+ filepath_docs_spacy : str
16
+ The local path to the pickle file containing a dictionary where the keys are tokens
17
+ and the values are the corresponding spaCy Doc objects serialized as bytes.
18
+
19
+ Returns
20
+ -------
21
+ nlp : spacy.language.Language
22
+ The loaded spaCy language model.
23
 
24
+ dict_docs_spacy : dict
25
+ A dictionary where the keys are tokens (str) and the values are spaCy Doc objects,
26
+ reconstructed from the serialized bytes.
27
+ '''
28
 
29
+ # ---- Load the spaCy NLP model
30
+ #
31
+ nlp = spacy.load(filepath_model_spacy)
32
+
33
+ # ---- Load pickle file and reconstruct the dictionary with tokens as keys and spaCy Doc objects as values
34
+ #
35
  with open(filepath_docs_spacy, 'rb') as file:
36
  dict_docs_spacy_bytes = pickle.load(file)
37
 
 
39
 
40
  return nlp, dict_docs_spacy
41
 
42
+
43
  def find_antonyms(word):
44
+ '''
45
+ Generate a set of all the antonyms of a given word
46
+
47
+ Parameters
48
+ ----------
49
+ word : str
50
+ The word that we want to find the antonyms
51
+
52
+ Returns
53
+ -------
54
+ antonyms : set of str
55
+ A set of all the antonym detected using nltk and WordNet
56
+ '''
57
+
58
  antonyms = set()
59
+
60
+ # ---- Load all the set of synonyms of the word recorded from wordnet
61
+ #
62
  syn_set = wordnet.synsets(word)
63
+
64
+ # ---- Loop over each set of synonyms
65
+ #
66
  for syn in syn_set:
67
+ # ---- Loop over each synonym
68
+ #
69
  for lemma in syn.lemmas():
70
+ # ---- Add antonyms of the synonyms to the antonyms set
71
+ #
72
  if lemma.antonyms():
73
  antonyms.add(lemma.antonyms()[0].name())
74
+
75
  return antonyms
76
 
 
 
 
 
77
 
78
+ def find_synonyms(word, model, dict_embedding, list_2000_tokens):
79
+ '''
80
+ Finds the most similar token to a given word.
81
+
82
+ Parameters
83
+ ----------
84
+ word : str
85
+ The word that we want to find the most similar word
86
+
87
+ model : spacy.language.Language
88
+ spaCy language model to use for the detection of the synonym
89
+
90
+ dict_embedding: dict
91
+ A dictionary where the keys are tokens (str) and the values are spaCy Doc objects
92
+
93
+ list_2000_tokens : list of str
94
+ A list of 2000 tokens against which the gloss will be checked.
95
+
96
+ Returns
97
+ -------
98
+ most_similar_token : str
99
+ The most similar token to the given word
100
+ '''
101
+
102
+ # ---- Skip synonym detection if the word is already in the list_2000_token
103
+ #
104
+ if word in list_2000_tokens:
105
  return word
106
  else:
107
+ # ---- Remove antonyms of the given word of the list_2000_tokens (a word and an antonym might be similar in embedding representation)
108
+ #
109
  antonyms = find_antonyms(word)
110
+ list_2000_tokens_less_antonyms = [token for token in list_2000_tokens if token not in antonyms]
111
 
112
+ # ---- Generate a list of tuple (token, similarities values between the embedding of the given word and the embedding of each token of the list_2000_tokens)
113
+ #
114
  word_embedding = model(word)
 
115
  similarities=[]
116
 
117
+ for token in list_2000_tokens_less_antonyms:
118
  similarities.append((token, dict_embedding.get(token).similarity(word_embedding)))
119
+
120
+ # ---- Extract the most similar token of the list
121
+ #
122
  most_similar_token = sorted(similarities, key=lambda item: -item[1])[0][0]
123
 
124
  return most_similar_token