Spaces:

Kabilash10
/

sample-OCR

Sleeping

Kabilash10 commited on Sep 19, 2024

Commit

f643c9b

verified ·

1 Parent(s): c86c8e2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,13 +3,13 @@ from PIL import Image
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 import torch
-# Load the OCR model and processor
-model_name = "microsoft/trocr-base-stage1"
 processor = TrOCRProcessor.from_pretrained(model_name)
 model = VisionEncoderDecoderModel.from_pretrained(model_name)
 # Streamlit app title
-st.title("OCR with TrOCR")
 # Upload image section
 uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"])
@@ -19,10 +19,13 @@ if uploaded_image is not None:
     image = Image.open(uploaded_image).convert("RGB")  # Ensure image is in RGB format
     st.image(image, caption="Uploaded Image", use_column_width=True)
     # Convert image to a suitable format and ensure it's a batch (list of images)
     try:
         # Convert image to the right format for the processor
-        inputs = processor(images=[image], return_tensors="pt")  # Put image in a list
         # Perform OCR
         with torch.no_grad():

 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 import torch
+# Load the OCR model and processor (switching to a larger model)
+model_name = "microsoft/trocr-large-stage1"  # You can try this larger model for better accuracy
 processor = TrOCRProcessor.from_pretrained(model_name)
 model = VisionEncoderDecoderModel.from_pretrained(model_name)
 # Streamlit app title
+st.title("OCR with TrOCR (Improved Accuracy)")
 # Upload image section
 uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"])
     image = Image.open(uploaded_image).convert("RGB")  # Ensure image is in RGB format
     st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Resize the image to improve OCR accuracy
+    resized_image = image.resize((224, 224))  # Resize to a standard resolution
     # Convert image to a suitable format and ensure it's a batch (list of images)
     try:
         # Convert image to the right format for the processor
+        inputs = processor(images=[resized_image], return_tensors="pt")  # Put image in a list
         # Perform OCR
         with torch.no_grad():