Spaces:

AhmadXGaballah
/

Fact_Checker1

Sleeping

App Files Files Community

AhmadXGaballah commited on 18 days ago

Commit

b9325a4

verified ·

1 Parent(s): 5953834

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -22

app.py CHANGED Viewed

@@ -61,7 +61,7 @@ whisper = _try_import("whisper")
 _openai = _try_import("openai")
 _has_openai_key = bool(os.environ.get("OPENAI_API_KEY"))
-# ---- ASR guarded imports (add these)
 try:
     from faster_whisper import WhisperModel as FWWhisperModel
 except Exception:
@@ -72,6 +72,23 @@ try:
 except Exception:
     OpenAIWhisper = None
 def has_llm() -> bool:
     return (not FORCE_BASELINE) and _openai is not None and _has_openai_key
@@ -378,6 +395,35 @@ def run_whisper_asr(audio_path: str, model_size: str = "base", language: Optiona
     # Nothing available
     raise RuntimeError("No ASR backend available (install faster-whisper or openai-whisper).")
 def download_video(url: str, out_dir: str = "videos") -> str:
     # yt-dlp is installed via requirements; call binary
@@ -404,26 +450,50 @@ def preprocess_for_ocr(img_path: str):
                                cv2.THRESH_BINARY, 31, 9)
     return th
-def run_easyocr_on_frames(frames: List[str], languages: List[str] = ["en"], gpu: Optional[bool] = None, max_images: Optional[int] = None) -> List[str]:
-    if easyocr is None:
-        raise RuntimeError("EasyOCR not available. Ensure easyocr + opencv-python-headless are installed.")
-    if gpu is None:
-        gpu = True if (os.environ.get("SPACE_ID") or shutil.which("nvidia-smi")) else False
-    reader = easyocr.Reader(languages, gpu=gpu)
-    texts, count = [], 0
-    for fp in frames:
-        if max_images and count >= max_images: break
-        img = preprocess_for_ocr(fp)
-        if img is None: continue
-        for (_bbox, txt, conf) in reader.readtext(img):
-            txt = normalize_ws(txt)
-            if txt and conf >= 0.35: texts.append(txt)
-        count += 1
-    uniq, seen = [], set()
-    for t in texts:
-        k = t.lower()
-        if k not in seen: uniq.append(t); seen.add(k)
-    return uniq
 def aggregate_text(asr_text: str, ocr_lines: List[str]) -> str:
     parts = []
@@ -450,7 +520,11 @@ def process_video(video_file: Optional[str] = None, video_url: Optional[str] = N
     frames_dir = os.path.join(workdir, "frames")
     frames = sample_frames_ffmpeg(vp, out_dir=frames_dir, fps=fps)
     langs = [x.strip() for x in ocr_langs.split(",") if x.strip()]
-    ocr_lines = run_easyocr_on_frames(frames, languages=langs, gpu=None, max_images=int(max_ocr_images))
     open(os.path.join(workdir, "transcript_ocr.txt"), "w").write("\n".join(ocr_lines))
     agg = aggregate_text(asr_text, ocr_lines)
     open(os.path.join(workdir, "transcript_aggregated.txt"), "w").write(agg)

 _openai = _try_import("openai")
 _has_openai_key = bool(os.environ.get("OPENAI_API_KEY"))
+# ---- ASR guarded imports
 try:
     from faster_whisper import WhisperModel as FWWhisperModel
 except Exception:
 except Exception:
     OpenAIWhisper = None
+# ---- OCR guarded imports
+try:
+    import easyocr as _easyocr
+except Exception:
+    _easyocr = None
+try:
+    import pytesseract as _pyt
+except Exception:
+    _pyt = None
+try:
+    import cv2
+except Exception:
+    cv2 = None
 def has_llm() -> bool:
     return (not FORCE_BASELINE) and _openai is not None and _has_openai_key
     # Nothing available
     raise RuntimeError("No ASR backend available (install faster-whisper or openai-whisper).")
+def _ocr_with_tesseract(frames: list[str], langs_csv: str, max_images: int | None) -> list[str]:
+    if _pyt is None or cv2 is None:
+        return []
+    lang = _tess_langs(langs_csv)
+    texts, count = [], 0
+    for fp in frames:
+        if max_images and count >= max_images: break
+        img = preprocess_for_ocr(fp)
+        if img is None:
+            count += 1;
+            continue
+        try:
+            raw = _pyt.image_to_string(img, lang=lang)  # returns a blob of text
+        except Exception:
+            raw = ""
+        for line in (raw or "").splitlines():
+            line = normalize_ws(line)
+            if len(line) >= 3:
+                texts.append(line)
+        count += 1
+    # dedupe
+    uniq, seen = [], set()
+    for t in texts:
+        k = t.lower()
+        if k not in seen:
+            uniq.append(t); seen.add(k)
+    return uniq
 def download_video(url: str, out_dir: str = "videos") -> str:
     # yt-dlp is installed via requirements; call binary
                                cv2.THRESH_BINARY, 31, 9)
     return th
+def run_ocr_on_frames(frames: list[str], languages: list[str] | str = "en", gpu: bool | None = None, max_images: int | None = None) -> list[str]:
+    # Normalize languages input
+    if isinstance(languages, list):
+        langs_csv = ",".join(languages)
+    else:
+        langs_csv = languages or "en"
+    # 1) Try EasyOCR
+    if _easyocr is not None and cv2 is not None:
+        try:
+            if gpu is None:
+                gpu = True if (os.environ.get("SPACE_ID") or shutil.which("nvidia-smi")) else False
+            reader = _easyocr.Reader([c.strip() for c in langs_csv.split(",") if c.strip()], gpu=gpu)
+            texts, count = [], 0
+            for fp in frames:
+                if max_images and count >= max_images: break
+                img = preprocess_for_ocr(fp)
+                if img is None:
+                    count += 1;
+                    continue
+                for (_bbox, txt, conf) in reader.readtext(img):
+                    txt = normalize_ws(txt)
+                    if txt and conf >= 0.35:
+                        texts.append(txt)
+                count += 1
+            # dedupe
+            uniq, seen = [], set()
+            for t in texts:
+                k = t.lower()
+                if k not in seen:
+                    uniq.append(t); seen.add(k)
+            if uniq:
+                return uniq
+        except Exception:
+            pass  # fall through to tesseract
+    # 2) Fallback: Tesseract
+    t_res = _ocr_with_tesseract(frames, langs_csv, max_images)
+    if t_res:
+        return t_res
+    # 3) Nothing available
+    return []
 def aggregate_text(asr_text: str, ocr_lines: List[str]) -> str:
     parts = []
     frames_dir = os.path.join(workdir, "frames")
     frames = sample_frames_ffmpeg(vp, out_dir=frames_dir, fps=fps)
     langs = [x.strip() for x in ocr_langs.split(",") if x.strip()]
+    ocr_langs_csv = ",".join(langs)
+    ocr_lines = run_ocr_on_frames(frames, languages=ocr_langs_csv, gpu=None, max_images=int(max_ocr_images))
+    if not ocr_lines:
+        ocr_lines = ["[OCR skipped: no backend available]"]
     open(os.path.join(workdir, "transcript_ocr.txt"), "w").write("\n".join(ocr_lines))
     agg = aggregate_text(asr_text, ocr_lines)
     open(os.path.join(workdir, "transcript_aggregated.txt"), "w").write(agg)