juanpablomesa commited on
Commit
0dbfcb8
·
1 Parent(s): cd14c77

Added easyocr for videoframes

Browse files
Files changed (2) hide show
  1. handler.py +15 -0
  2. requirements.txt +2 -1
handler.py CHANGED
@@ -14,6 +14,7 @@ from decord import VideoReader
14
  from decord import cpu
15
 
16
  import timeit
 
17
 
18
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19
 
@@ -32,6 +33,7 @@ class EndpointHandler:
32
 
33
  logging.set_verbosity_debug()
34
  self.logger = logging.get_logger(__name__)
 
35
  # Check if CUDA (GPU support) is available
36
  if torch.cuda.is_available():
37
  self.logger.info("GPU is available for inference.")
@@ -184,6 +186,19 @@ class EndpointHandler:
184
  self.logger.info(
185
  f"Embedding calculation took {embedding_end_time - embedding_start_time} seconds"
186
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  video_metadata["url"] = video_url
188
  self.logger.info("Returning embeddings and metadata.")
189
  return frame_embeddings, video_metadata
 
14
  from decord import cpu
15
 
16
  import timeit
17
+ import easyocr
18
 
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
 
 
33
 
34
  logging.set_verbosity_debug()
35
  self.logger = logging.get_logger(__name__)
36
+ self.reader = easyocr.Reader(["de", "en"]) # Add more languages if needed
37
  # Check if CUDA (GPU support) is available
38
  if torch.cuda.is_available():
39
  self.logger.info("GPU is available for inference.")
 
186
  self.logger.info(
187
  f"Embedding calculation took {embedding_end_time - embedding_start_time} seconds"
188
  )
189
+ # Extract text from each frame using EasyOCR
190
+ self.logger.info("Extracting text from frames.")
191
+ text_extraction_start_time = timeit.default_timer()
192
+ frame_texts = [self.reader.readtext(frame, detail=0) for frame in frames]
193
+ texts_set = set()
194
+ for text_list in frame_texts:
195
+ [texts_set.add(text) for text in text_list]
196
+ video_metadata["extracted_text"] = texts_set
197
+ text_extraction_end_time = timeit.default_timer()
198
+ self.logger.info(
199
+ f"Text extraction took {text_extraction_end_time - text_extraction_start_time} seconds"
200
+ )
201
+
202
  video_metadata["url"] = video_url
203
  self.logger.info("Returning embeddings and metadata.")
204
  return frame_embeddings, video_metadata
requirements.txt CHANGED
@@ -23,4 +23,5 @@ tqdm==4.66.1
23
  transformers==4.27.2
24
  typing_extensions==4.8.0
25
  urllib3==2.0.7
26
- decord==0.6.0
 
 
23
  transformers==4.27.2
24
  typing_extensions==4.8.0
25
  urllib3==2.0.7
26
+ decord==0.6.0
27
+ easyocr==1.7.1