Spaces:
Sleeping
Sleeping
import gradio as gr | |
import cv2 | |
import easyocr | |
from PIL import Image | |
import numpy as np | |
# Functions for OCR steps | |
def get_grayscale(image): | |
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
def thresholding(src, threshold_value): | |
return cv2.threshold(src, threshold_value, 255, cv2.THRESH_BINARY)[1] | |
def ocr_with_easy(img_path): | |
reader = easyocr.Reader(['en']) | |
bounds = reader.readtext(img_path, paragraph="False", detail=0) | |
bounds = ''.join(bounds) | |
return bounds | |
def process_image(img, steps, threshold_value): | |
for step in steps: | |
if step == "Grayscale Conversion": | |
img = get_grayscale(img) | |
elif step == "Thresholding": | |
img = thresholding(img, threshold_value) | |
cv2.imwrite('processed_image.png', img) | |
return 'processed_image.png' | |
def generate_ocr(img, steps, threshold_value): | |
text_output = '' | |
if img is not None and img.any(): | |
processed_image_path = process_image(img, steps, threshold_value) | |
text_output = ocr_with_easy(processed_image_path) | |
else: | |
raise gr.Error("Please upload an image and select the processing steps!") | |
return text_output | |
# Interactive tutorial steps | |
tutorial_steps = [ | |
"Grayscale Conversion", | |
"Thresholding" | |
] | |
# Interactive questions | |
questions = [ | |
{ | |
"question": "What is the first step in OCR?", | |
"options": ["Binarization", "Grayscale Conversion", "Edge Detection"], | |
"answer": "Grayscale Conversion" | |
}, | |
{ | |
"question": "What is the purpose of thresholding in OCR?", | |
"options": ["To detect edges", "To convert image to grayscale", "To binarize the image"], | |
"answer": "To binarize the image" | |
}, | |
{ | |
"question": "Which library is used for OCR in this app?", | |
"options": ["Tesseract", "EasyOCR", "OpenCV"], | |
"answer": "EasyOCR" | |
}, | |
{ | |
"question": "What format is the image saved in after preprocessing?", | |
"options": ["JPG", "PNG", "TIFF"], | |
"answer": "PNG" | |
}, | |
{ | |
"question": "What does OCR stand for?", | |
"options": ["Optical Character Recognition", "Optical Character Reading", "Optical Code Recognition"], | |
"answer": "Optical Character Recognition" | |
} | |
] | |
def quiz_interface(): | |
def check_answer(question_idx, selected): | |
if questions[question_idx]["answer"] == selected: | |
return "Correct!" | |
else: | |
return "Incorrect. The correct answer is: " + questions[question_idx]["answer"] | |
interfaces = [] | |
for idx, question in enumerate(questions): | |
radio = gr.Radio(choices=question["options"], label=question["question"]) | |
button = gr.Button("Submit") | |
output = gr.Textbox(label="Result") | |
def create_submit_fn(idx): | |
def submit(selected): | |
return check_answer(idx, selected) | |
return submit | |
interfaces.append(gr.Interface( | |
create_submit_fn(idx), | |
radio, | |
output, | |
live=True | |
)) | |
return interfaces | |
# Explanation text | |
explanation_text = """ | |
**Welcome to the OCR Tutorial!** | |
Optical Character Recognition (OCR) is a technology used to convert different types of documents, such as scanned paper documents, PDF files, or images captured by a digital camera, into editable and searchable data. | |
**Steps in the OCR Process:** | |
1. **Grayscale Conversion:** The first step in OCR is converting the image to grayscale. This simplifies the image and reduces the amount of data the OCR algorithm needs to process. | |
2. **Thresholding:** This step converts the grayscale image into a binary image, where the text is in black, and the background is in white. This makes it easier for the OCR algorithm to distinguish text from the background. | |
3. **OCR using EasyOCR:** We use the EasyOCR library to recognize and extract text from the preprocessed image. | |
**Interactive Tutorial:** | |
Please upload an image and select the correct order of steps to perform OCR. You can also adjust the threshold value using the slider. | |
""" | |
example_image_path = "origin.png" # Provide the path to your example image | |
# Load the example image | |
example_image = np.array(Image.open(example_image_path)) | |
image = gr.Image(value=example_image, label="Upload or edit image for OCR") | |
steps = gr.CheckboxGroup(choices=tutorial_steps, label="Select and order the steps for OCR") | |
threshold = gr.Slider(0, 255, value=127, step=1, label="Threshold Value") | |
output = gr.Textbox(label="OCR Output") | |
explanation = gr.Markdown(explanation_text) | |
ocr_app = gr.Interface( | |
fn=generate_ocr, | |
inputs=[image, steps, threshold], | |
outputs=output, | |
title="Optical Character Recognition", | |
description=explanation_text, | |
css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}" | |
) | |
quiz_app = gr.TabbedInterface( | |
[ocr_app] + quiz_interface(), | |
["OCR Tool"] + [f"Question {i+1}" for i in range(len(questions))], | |
title="OCR Tutorial and Quiz" | |
) | |
quiz_app.launch() | |