Spaces:
Sleeping
Sleeping
Kabilash10
commited on
Commit
•
f643c9b
1
Parent(s):
c86c8e2
Update app.py
Browse files
app.py
CHANGED
@@ -3,13 +3,13 @@ from PIL import Image
|
|
3 |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
4 |
import torch
|
5 |
|
6 |
-
# Load the OCR model and processor
|
7 |
-
model_name = "microsoft/trocr-
|
8 |
processor = TrOCRProcessor.from_pretrained(model_name)
|
9 |
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
10 |
|
11 |
# Streamlit app title
|
12 |
-
st.title("OCR with TrOCR")
|
13 |
|
14 |
# Upload image section
|
15 |
uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"])
|
@@ -19,10 +19,13 @@ if uploaded_image is not None:
|
|
19 |
image = Image.open(uploaded_image).convert("RGB") # Ensure image is in RGB format
|
20 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
21 |
|
|
|
|
|
|
|
22 |
# Convert image to a suitable format and ensure it's a batch (list of images)
|
23 |
try:
|
24 |
# Convert image to the right format for the processor
|
25 |
-
inputs = processor(images=[
|
26 |
|
27 |
# Perform OCR
|
28 |
with torch.no_grad():
|
|
|
3 |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
4 |
import torch
|
5 |
|
6 |
+
# Load the OCR model and processor (switching to a larger model)
|
7 |
+
model_name = "microsoft/trocr-large-stage1" # You can try this larger model for better accuracy
|
8 |
processor = TrOCRProcessor.from_pretrained(model_name)
|
9 |
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
10 |
|
11 |
# Streamlit app title
|
12 |
+
st.title("OCR with TrOCR (Improved Accuracy)")
|
13 |
|
14 |
# Upload image section
|
15 |
uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"])
|
|
|
19 |
image = Image.open(uploaded_image).convert("RGB") # Ensure image is in RGB format
|
20 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
21 |
|
22 |
+
# Resize the image to improve OCR accuracy
|
23 |
+
resized_image = image.resize((224, 224)) # Resize to a standard resolution
|
24 |
+
|
25 |
# Convert image to a suitable format and ensure it's a batch (list of images)
|
26 |
try:
|
27 |
# Convert image to the right format for the processor
|
28 |
+
inputs = processor(images=[resized_image], return_tensors="pt") # Put image in a list
|
29 |
|
30 |
# Perform OCR
|
31 |
with torch.no_grad():
|