Spaces:
Sleeping
Sleeping
File size: 1,591 Bytes
e7c4852 b090a54 e7c4852 f643c9b e7c4852 f643c9b e7c4852 c86c8e2 e7c4852 f643c9b c86c8e2 f643c9b c86c8e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import streamlit as st
from PIL import Image
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import torch
# Load the OCR model and processor (switching to a larger model)
model_name = "microsoft/trocr-large-stage1" # You can try this larger model for better accuracy
processor = TrOCRProcessor.from_pretrained(model_name)
model = VisionEncoderDecoderModel.from_pretrained(model_name)
# Streamlit app title
st.title("OCR with TrOCR (Improved Accuracy)")
# Upload image section
uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"])
if uploaded_image is not None:
# Open and display the uploaded image
image = Image.open(uploaded_image).convert("RGB") # Ensure image is in RGB format
st.image(image, caption="Uploaded Image", use_column_width=True)
# Resize the image to improve OCR accuracy
resized_image = image.resize((224, 224)) # Resize to a standard resolution
# Convert image to a suitable format and ensure it's a batch (list of images)
try:
# Convert image to the right format for the processor
inputs = processor(images=[resized_image], return_tensors="pt") # Put image in a list
# Perform OCR
with torch.no_grad():
outputs = model.generate(**inputs)
# Decode the generated text
text = processor.decode(outputs[0], skip_special_tokens=True)
# Display the OCR result
st.write("Extracted Text:")
st.text(text)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
|