Aumkeshchy2003 commited on
Commit
fb7988f
·
verified ·
1 Parent(s): ffe6d99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -3,11 +3,18 @@ from PIL import Image
3
  import pytesseract
4
  import re
5
 
6
- pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
 
 
7
 
8
- def perform_ocr(image):
9
- extracted_text = pytesseract.image_to_string(image, lang='hin+eng')
10
- return extracted_text
 
 
 
 
 
11
 
12
  def search_and_highlight(text, keyword):
13
  highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)
 
3
  import pytesseract
4
  import re
5
 
6
+ def tesseract_ocr(filepath: str, languages: List[str]):
7
+ image = Image.open(filepath)
8
+ return pytesseract.image_to_string(image=image, lang=', '.join(languages))
9
 
10
+ title = "Tesseract OCR"
11
+ description = "Gradio demo for Tesseract. Tesseract is an open source text recognition (OCR) Engine."
12
+ article = "<p style='text-align: center'><a href='https://tesseract-ocr.github.io/' target='_blank'>Tesseract documentation</a> | <a href='https://github.com/tesseract-ocr/tesseract' target='_blank'>Github Repo</a></p>"
13
+ examples = [
14
+ ['examples/eurotext.png', ['eng']],
15
+ ['examples/tesseract_sample.png', ['jpn', 'eng']],
16
+ ['examples/chi.jpg', ['HanS', 'HanT']]
17
+ ]
18
 
19
  def search_and_highlight(text, keyword):
20
  highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", text, flags=re.IGNORECASE)