juanpablomesa
commited on
Commit
·
374a5b3
1
Parent(s):
f33eeea
Changed back to set, with jsondumps for serializing
Browse files- handler.py +11 -3
handler.py
CHANGED
@@ -15,6 +15,7 @@ from decord import cpu
|
|
15 |
|
16 |
import timeit
|
17 |
import easyocr
|
|
|
18 |
|
19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
|
@@ -161,6 +162,11 @@ class EndpointHandler:
|
|
161 |
# self.logger.info("Returning batch_emb list")
|
162 |
return batch_emb
|
163 |
|
|
|
|
|
|
|
|
|
|
|
164 |
def process_video(self, video_url, video_metadata):
|
165 |
try:
|
166 |
self.logger.info("Downloading video as bytes.")
|
@@ -190,10 +196,12 @@ class EndpointHandler:
|
|
190 |
self.logger.info("Extracting text from frames.")
|
191 |
text_extraction_start_time = timeit.default_timer()
|
192 |
frame_texts = [self.reader.readtext(frame, detail=0) for frame in frames]
|
193 |
-
|
194 |
for text_list in frame_texts:
|
195 |
-
[
|
196 |
-
video_metadata["extracted_text"] =
|
|
|
|
|
197 |
text_extraction_end_time = timeit.default_timer()
|
198 |
self.logger.info(
|
199 |
f"Text extraction took {text_extraction_end_time - text_extraction_start_time} seconds"
|
|
|
15 |
|
16 |
import timeit
|
17 |
import easyocr
|
18 |
+
import json
|
19 |
|
20 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
21 |
|
|
|
162 |
# self.logger.info("Returning batch_emb list")
|
163 |
return batch_emb
|
164 |
|
165 |
+
def set_default(self, obj):
|
166 |
+
if isinstance(obj, set):
|
167 |
+
return list(obj)
|
168 |
+
raise TypeError
|
169 |
+
|
170 |
def process_video(self, video_url, video_metadata):
|
171 |
try:
|
172 |
self.logger.info("Downloading video as bytes.")
|
|
|
196 |
self.logger.info("Extracting text from frames.")
|
197 |
text_extraction_start_time = timeit.default_timer()
|
198 |
frame_texts = [self.reader.readtext(frame, detail=0) for frame in frames]
|
199 |
+
texts_set = set()
|
200 |
for text_list in frame_texts:
|
201 |
+
[texts_set.add(text) for text in text_list]
|
202 |
+
video_metadata["extracted_text"] = json.dumps(
|
203 |
+
texts_set, default=self.set_default
|
204 |
+
)
|
205 |
text_extraction_end_time = timeit.default_timer()
|
206 |
self.logger.info(
|
207 |
f"Text extraction took {text_extraction_end_time - text_extraction_start_time} seconds"
|