Spaces:

Aumkeshchy2003
/

Gradio-OCR

Sleeping

Aumkeshchy2003 commited on Sep 30, 2024

Commit

fb7988f

verified ·

1 Parent(s): ffe6d99

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,11 +3,18 @@ from PIL import Image
 import pytesseract
 import re
-pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
-def perform_ocr(image):
-    extracted_text = pytesseract.image_to_string(image, lang='hin+eng')
-    return extracted_text
 def search_and_highlight(text, keyword):
     highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)

 import pytesseract
 import re
+def tesseract_ocr(filepath: str, languages: List[str]):
+    image = Image.open(filepath)
+    return pytesseract.image_to_string(image=image, lang=', '.join(languages))
+title = "Tesseract OCR"
+description = "Gradio demo for Tesseract. Tesseract is an open source text recognition (OCR) Engine."
+article = "<p style='text-align: center'><a href='https://tesseract-ocr.github.io/' target='_blank'>Tesseract documentation</a> | <a href='https://github.com/tesseract-ocr/tesseract' target='_blank'>Github Repo</a></p>"
+examples = [
+        ['examples/eurotext.png', ['eng']],
+        ['examples/tesseract_sample.png', ['jpn', 'eng']],
+        ['examples/chi.jpg', ['HanS', 'HanT']]
+    ]
 def search_and_highlight(text, keyword):
     highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)