juanpablomesa
commited on
Commit
·
0dbfcb8
1
Parent(s):
cd14c77
Added easyocr for videoframes
Browse files- handler.py +15 -0
- requirements.txt +2 -1
handler.py
CHANGED
@@ -14,6 +14,7 @@ from decord import VideoReader
|
|
14 |
from decord import cpu
|
15 |
|
16 |
import timeit
|
|
|
17 |
|
18 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
19 |
|
@@ -32,6 +33,7 @@ class EndpointHandler:
|
|
32 |
|
33 |
logging.set_verbosity_debug()
|
34 |
self.logger = logging.get_logger(__name__)
|
|
|
35 |
# Check if CUDA (GPU support) is available
|
36 |
if torch.cuda.is_available():
|
37 |
self.logger.info("GPU is available for inference.")
|
@@ -184,6 +186,19 @@ class EndpointHandler:
|
|
184 |
self.logger.info(
|
185 |
f"Embedding calculation took {embedding_end_time - embedding_start_time} seconds"
|
186 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
video_metadata["url"] = video_url
|
188 |
self.logger.info("Returning embeddings and metadata.")
|
189 |
return frame_embeddings, video_metadata
|
|
|
14 |
from decord import cpu
|
15 |
|
16 |
import timeit
|
17 |
+
import easyocr
|
18 |
|
19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
|
|
|
33 |
|
34 |
logging.set_verbosity_debug()
|
35 |
self.logger = logging.get_logger(__name__)
|
36 |
+
self.reader = easyocr.Reader(["de", "en"]) # Add more languages if needed
|
37 |
# Check if CUDA (GPU support) is available
|
38 |
if torch.cuda.is_available():
|
39 |
self.logger.info("GPU is available for inference.")
|
|
|
186 |
self.logger.info(
|
187 |
f"Embedding calculation took {embedding_end_time - embedding_start_time} seconds"
|
188 |
)
|
189 |
+
# Extract text from each frame using EasyOCR
|
190 |
+
self.logger.info("Extracting text from frames.")
|
191 |
+
text_extraction_start_time = timeit.default_timer()
|
192 |
+
frame_texts = [self.reader.readtext(frame, detail=0) for frame in frames]
|
193 |
+
texts_set = set()
|
194 |
+
for text_list in frame_texts:
|
195 |
+
[texts_set.add(text) for text in text_list]
|
196 |
+
video_metadata["extracted_text"] = texts_set
|
197 |
+
text_extraction_end_time = timeit.default_timer()
|
198 |
+
self.logger.info(
|
199 |
+
f"Text extraction took {text_extraction_end_time - text_extraction_start_time} seconds"
|
200 |
+
)
|
201 |
+
|
202 |
video_metadata["url"] = video_url
|
203 |
self.logger.info("Returning embeddings and metadata.")
|
204 |
return frame_embeddings, video_metadata
|
requirements.txt
CHANGED
@@ -23,4 +23,5 @@ tqdm==4.66.1
|
|
23 |
transformers==4.27.2
|
24 |
typing_extensions==4.8.0
|
25 |
urllib3==2.0.7
|
26 |
-
decord==0.6.0
|
|
|
|
23 |
transformers==4.27.2
|
24 |
typing_extensions==4.8.0
|
25 |
urllib3==2.0.7
|
26 |
+
decord==0.6.0
|
27 |
+
easyocr==1.7.1
|