Kabilash10 commited on
Commit
f643c9b
1 Parent(s): c86c8e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -3,13 +3,13 @@ from PIL import Image
3
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
4
  import torch
5
 
6
- # Load the OCR model and processor
7
- model_name = "microsoft/trocr-base-stage1"
8
  processor = TrOCRProcessor.from_pretrained(model_name)
9
  model = VisionEncoderDecoderModel.from_pretrained(model_name)
10
 
11
  # Streamlit app title
12
- st.title("OCR with TrOCR")
13
 
14
  # Upload image section
15
  uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"])
@@ -19,10 +19,13 @@ if uploaded_image is not None:
19
  image = Image.open(uploaded_image).convert("RGB") # Ensure image is in RGB format
20
  st.image(image, caption="Uploaded Image", use_column_width=True)
21
 
 
 
 
22
  # Convert image to a suitable format and ensure it's a batch (list of images)
23
  try:
24
  # Convert image to the right format for the processor
25
- inputs = processor(images=[image], return_tensors="pt") # Put image in a list
26
 
27
  # Perform OCR
28
  with torch.no_grad():
 
3
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
4
  import torch
5
 
6
+ # Load the OCR model and processor (switching to a larger model)
7
+ model_name = "microsoft/trocr-large-stage1" # You can try this larger model for better accuracy
8
  processor = TrOCRProcessor.from_pretrained(model_name)
9
  model = VisionEncoderDecoderModel.from_pretrained(model_name)
10
 
11
  # Streamlit app title
12
+ st.title("OCR with TrOCR (Improved Accuracy)")
13
 
14
  # Upload image section
15
  uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"])
 
19
  image = Image.open(uploaded_image).convert("RGB") # Ensure image is in RGB format
20
  st.image(image, caption="Uploaded Image", use_column_width=True)
21
 
22
+ # Resize the image to improve OCR accuracy
23
+ resized_image = image.resize((224, 224)) # Resize to a standard resolution
24
+
25
  # Convert image to a suitable format and ensure it's a batch (list of images)
26
  try:
27
  # Convert image to the right format for the processor
28
+ inputs = processor(images=[resized_image], return_tensors="pt") # Put image in a list
29
 
30
  # Perform OCR
31
  with torch.no_grad():