saidivyesh commited on
Commit
8785ffa
·
verified ·
1 Parent(s): caccf76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -15
app.py CHANGED
@@ -1,23 +1,18 @@
1
  import streamlit as st
2
  from PIL import Image
3
- from transformers import TrOCRProcessor, VisionEncoderDecoderModel
4
- import torch
5
 
6
- # Load the pre-trained models from Hugging Face
7
- processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
8
- model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
9
 
10
  # Function to process image and perform OCR
11
  def process_image(image):
12
- # Open image
13
- img = Image.open(image).convert("RGB")
14
- # Prepare image for the model
15
- pixel_values = processor(images=img, return_tensors="pt").pixel_values
16
- # Generate text
17
- generated_ids = model.generate(pixel_values)
18
- # Decode generated text
19
- generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
20
- return generated_text
21
 
22
  # Function to highlight keywords in extracted text
23
  def highlight_keywords(text, keyword):
@@ -51,4 +46,4 @@ if uploaded_file is not None:
51
  # Highlight the search keyword
52
  st.subheader("Search Results:")
53
  result = highlight_keywords(extracted_text, query)
54
- st.write(result)
 
1
  import streamlit as st
2
  from PIL import Image
3
+ import easyocr
 
4
 
5
+ # Initialize the EasyOCR reader
6
+ reader = easyocr.Reader(['en', 'hi'], gpu=False) # 'en' for English, 'hi' for Hindi
 
7
 
8
  # Function to process image and perform OCR
9
  def process_image(image):
10
+ img = Image.open(image)
11
+ # Perform OCR
12
+ result = reader.readtext(img, detail=0, paragraph=False) # Return detailed results
13
+ # Join the extracted text with spaces and separate words with new lines
14
+ words = [word for block in result for word in block.split()]
15
+ return "\n".join(words)
 
 
 
16
 
17
  # Function to highlight keywords in extracted text
18
  def highlight_keywords(text, keyword):
 
46
  # Highlight the search keyword
47
  st.subheader("Search Results:")
48
  result = highlight_keywords(extracted_text, query)
49
+ st.write(result)