juanpablomesa commited on
Commit
374a5b3
·
1 Parent(s): f33eeea

Changed back to set, with jsondumps for serializing

Browse files
Files changed (1) hide show
  1. handler.py +11 -3
handler.py CHANGED
@@ -15,6 +15,7 @@ from decord import cpu
15
 
16
  import timeit
17
  import easyocr
 
18
 
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
 
@@ -161,6 +162,11 @@ class EndpointHandler:
161
  # self.logger.info("Returning batch_emb list")
162
  return batch_emb
163
 
 
 
 
 
 
164
  def process_video(self, video_url, video_metadata):
165
  try:
166
  self.logger.info("Downloading video as bytes.")
@@ -190,10 +196,12 @@ class EndpointHandler:
190
  self.logger.info("Extracting text from frames.")
191
  text_extraction_start_time = timeit.default_timer()
192
  frame_texts = [self.reader.readtext(frame, detail=0) for frame in frames]
193
- all_texts_list = []
194
  for text_list in frame_texts:
195
- [all_texts_list.append(text) for text in text_list]
196
- video_metadata["extracted_text"] = all_texts_list
 
 
197
  text_extraction_end_time = timeit.default_timer()
198
  self.logger.info(
199
  f"Text extraction took {text_extraction_end_time - text_extraction_start_time} seconds"
 
15
 
16
  import timeit
17
  import easyocr
18
+ import json
19
 
20
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
 
 
162
  # self.logger.info("Returning batch_emb list")
163
  return batch_emb
164
 
165
+ def set_default(self, obj):
166
+ if isinstance(obj, set):
167
+ return list(obj)
168
+ raise TypeError
169
+
170
  def process_video(self, video_url, video_metadata):
171
  try:
172
  self.logger.info("Downloading video as bytes.")
 
196
  self.logger.info("Extracting text from frames.")
197
  text_extraction_start_time = timeit.default_timer()
198
  frame_texts = [self.reader.readtext(frame, detail=0) for frame in frames]
199
+ texts_set = set()
200
  for text_list in frame_texts:
201
+ [texts_set.add(text) for text in text_list]
202
+ video_metadata["extracted_text"] = json.dumps(
203
+ texts_set, default=self.set_default
204
+ )
205
  text_extraction_end_time = timeit.default_timer()
206
  self.logger.info(
207
  f"Text extraction took {text_extraction_end_time - text_extraction_start_time} seconds"