Spaces:

saidivyesh
/

OCR1

Sleeping

saidivyesh commited on Sep 26, 2024

Commit

8785ffa

verified ·

1 Parent(s): caccf76

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,23 +1,18 @@
 import streamlit as st
 from PIL import Image
-from transformers import TrOCRProcessor, VisionEncoderDecoderModel
-import torch
-# Load the pre-trained models from Hugging Face
-processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
-model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
 # Function to process image and perform OCR
 def process_image(image):
-    # Open image
-    img = Image.open(image).convert("RGB")
-    # Prepare image for the model
-    pixel_values = processor(images=img, return_tensors="pt").pixel_values
-    # Generate text
-    generated_ids = model.generate(pixel_values)
-    # Decode generated text
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return generated_text
 # Function to highlight keywords in extracted text
 def highlight_keywords(text, keyword):
@@ -51,4 +46,4 @@ if uploaded_file is not None:
         # Highlight the search keyword
         st.subheader("Search Results:")
         result = highlight_keywords(extracted_text, query)
-        st.write(result)

 import streamlit as st
 from PIL import Image
+import easyocr
+# Initialize the EasyOCR reader
+reader = easyocr.Reader(['en', 'hi'], gpu=False)  # 'en' for English, 'hi' for Hindi
 # Function to process image and perform OCR
 def process_image(image):
+    img = Image.open(image)
+    # Perform OCR
+    result = reader.readtext(img, detail=0, paragraph=False)  # Return detailed results
+    # Join the extracted text with spaces and separate words with new lines
+    words = [word for block in result for word in block.split()]
+    return "\n".join(words)
 # Function to highlight keywords in extracted text
 def highlight_keywords(text, keyword):
         # Highlight the search keyword
         st.subheader("Search Results:")
         result = highlight_keywords(extracted_text, query)
+        st.write(result)