OCR_Tutorial / app.py
llmat's picture
Update app.py
1e202d5 verified
raw
history blame
5.05 kB
import gradio as gr
import cv2
import easyocr
from PIL import Image
import numpy as np
# Functions for OCR steps
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
def thresholding(src, threshold_value):
return cv2.threshold(src, threshold_value, 255, cv2.THRESH_BINARY)[1]
def ocr_with_easy(img_path):
reader = easyocr.Reader(['en'])
bounds = reader.readtext(img_path, paragraph="False", detail=0)
bounds = ''.join(bounds)
return bounds
def process_image(img, steps, threshold_value):
for step in steps:
if step == "Grayscale Conversion":
img = get_grayscale(img)
elif step == "Thresholding":
img = thresholding(img, threshold_value)
cv2.imwrite('processed_image.png', img)
return 'processed_image.png'
def generate_ocr(img, steps, threshold_value):
text_output = ''
if img is not None and img.any():
processed_image_path = process_image(img, steps, threshold_value)
text_output = ocr_with_easy(processed_image_path)
else:
raise gr.Error("Please upload an image and select the processing steps!")
return text_output
# Interactive tutorial steps
tutorial_steps = [
"Grayscale Conversion",
"Thresholding"
]
# Interactive questions
questions = [
{
"question": "What is the first step in OCR?",
"options": ["Binarization", "Grayscale Conversion", "Edge Detection"],
"answer": "Grayscale Conversion"
},
{
"question": "What is the purpose of thresholding in OCR?",
"options": ["To detect edges", "To convert image to grayscale", "To binarize the image"],
"answer": "To binarize the image"
},
{
"question": "Which library is used for OCR in this app?",
"options": ["Tesseract", "EasyOCR", "OpenCV"],
"answer": "EasyOCR"
},
{
"question": "What format is the image saved in after preprocessing?",
"options": ["JPG", "PNG", "TIFF"],
"answer": "PNG"
},
{
"question": "What does OCR stand for?",
"options": ["Optical Character Recognition", "Optical Character Reading", "Optical Code Recognition"],
"answer": "Optical Character Recognition"
}
]
def quiz_interface():
def check_answer(question_idx, selected):
if questions[question_idx]["answer"] == selected:
return "Correct!"
else:
return "Incorrect. The correct answer is: " + questions[question_idx]["answer"]
interfaces = []
for idx, question in enumerate(questions):
radio = gr.Radio(choices=question["options"], label=question["question"])
button = gr.Button("Submit")
output = gr.Textbox(label="Result")
def create_submit_fn(idx):
def submit(selected):
return check_answer(idx, selected)
return submit
interfaces.append(gr.Interface(
create_submit_fn(idx),
radio,
output,
live=True
))
return interfaces
# Explanation text
explanation_text = """
**Welcome to the OCR Tutorial!**
Optical Character Recognition (OCR) is a technology used to convert different types of documents, such as scanned paper documents, PDF files, or images captured by a digital camera, into editable and searchable data.
**Steps in the OCR Process:**
1. **Grayscale Conversion:** The first step in OCR is converting the image to grayscale. This simplifies the image and reduces the amount of data the OCR algorithm needs to process.
2. **Thresholding:** This step converts the grayscale image into a binary image, where the text is in black, and the background is in white. This makes it easier for the OCR algorithm to distinguish text from the background.
3. **OCR using EasyOCR:** We use the EasyOCR library to recognize and extract text from the preprocessed image.
**Interactive Tutorial:**
Please upload an image and select the correct order of steps to perform OCR. You can also adjust the threshold value using the slider.
"""
example_image_path = "origin.png" # Provide the path to your example image
# Load the example image
example_image = np.array(Image.open(example_image_path))
image = gr.Image(value=example_image, label="Upload or edit image for OCR")
steps = gr.CheckboxGroup(choices=tutorial_steps, label="Select and order the steps for OCR")
threshold = gr.Slider(0, 255, value=127, step=1, label="Threshold Value")
output = gr.Textbox(label="OCR Output")
explanation = gr.Markdown(explanation_text)
ocr_app = gr.Interface(
fn=generate_ocr,
inputs=[image, steps, threshold],
outputs=output,
title="Optical Character Recognition",
description=explanation_text,
css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}"
)
quiz_app = gr.TabbedInterface(
[ocr_app] + quiz_interface(),
["OCR Tool"] + [f"Question {i+1}" for i in range(len(questions))],
title="OCR Tutorial and Quiz"
)
quiz_app.launch()