Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PIL import Image | |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
import torch | |
# Load the OCR model and processor (switching to a larger model) | |
model_name = "microsoft/trocr-large-stage1" # You can try this larger model for better accuracy | |
processor = TrOCRProcessor.from_pretrained(model_name) | |
model = VisionEncoderDecoderModel.from_pretrained(model_name) | |
# Streamlit app title | |
st.title("OCR with TrOCR (Improved Accuracy)") | |
# Upload image section | |
uploaded_image = st.file_uploader("Upload an image for OCR", type=["jpg", "jpeg", "png"]) | |
if uploaded_image is not None: | |
# Open and display the uploaded image | |
image = Image.open(uploaded_image).convert("RGB") # Ensure image is in RGB format | |
st.image(image, caption="Uploaded Image", use_column_width=True) | |
# Resize the image to improve OCR accuracy | |
resized_image = image.resize((224, 224)) # Resize to a standard resolution | |
# Convert image to a suitable format and ensure it's a batch (list of images) | |
try: | |
# Convert image to the right format for the processor | |
inputs = processor(images=[resized_image], return_tensors="pt") # Put image in a list | |
# Perform OCR | |
with torch.no_grad(): | |
outputs = model.generate(**inputs) | |
# Decode the generated text | |
text = processor.decode(outputs[0], skip_special_tokens=True) | |
# Display the OCR result | |
st.write("Extracted Text:") | |
st.text(text) | |
except Exception as e: | |
st.error(f"An error occurred: {str(e)}") | |