import gradio as gr import cv2 import easyocr from PIL import Image import numpy as np # Functions for OCR steps def get_grayscale(image): return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) def thresholding(src, threshold_value): return cv2.threshold(src, threshold_value, 255, cv2.THRESH_BINARY)[1] def ocr_with_easy(img_path): reader = easyocr.Reader(['en']) bounds = reader.readtext(img_path, paragraph="False", detail=0) bounds = ''.join(bounds) return bounds def process_image(img, steps, threshold_value): for step in steps: if step == "Grayscale Conversion": img = get_grayscale(img) elif step == "Thresholding": img = thresholding(img, threshold_value) cv2.imwrite('processed_image.png', img) return 'processed_image.png' def generate_ocr(img, steps, threshold_value): text_output = '' if img is not None and img.any(): processed_image_path = process_image(img, steps, threshold_value) text_output = ocr_with_easy(processed_image_path) else: raise gr.Error("Please upload an image and select the processing steps!") return text_output # Interactive tutorial steps tutorial_steps = [ "Grayscale Conversion", "Thresholding" ] # Interactive questions questions = [ { "question": "What is the first step in OCR?", "options": ["Binarization", "Grayscale Conversion", "Edge Detection"], "answer": "Grayscale Conversion" }, { "question": "What is the purpose of thresholding in OCR?", "options": ["To detect edges", "To convert image to grayscale", "To binarize the image"], "answer": "To binarize the image" }, { "question": "Which library is used for OCR in this app?", "options": ["Tesseract", "EasyOCR", "OpenCV"], "answer": "EasyOCR" }, { "question": "What format is the image saved in after preprocessing?", "options": ["JPG", "PNG", "TIFF"], "answer": "PNG" }, { "question": "What does OCR stand for?", "options": ["Optical Character Recognition", "Optical Character Reading", "Optical Code Recognition"], "answer": "Optical Character Recognition" } ] def quiz_interface(): def check_answer(question_idx, selected): if questions[question_idx]["answer"] == selected: return "Correct!" else: return "Incorrect. The correct answer is: " + questions[question_idx]["answer"] interfaces = [] for idx, question in enumerate(questions): radio = gr.Radio(choices=question["options"], label=question["question"]) button = gr.Button("Submit") output = gr.Textbox(label="Result") def create_submit_fn(idx): def submit(selected): return check_answer(idx, selected) return submit interfaces.append(gr.Interface( create_submit_fn(idx), radio, output, live=True )) return interfaces # Explanation text explanation_text = """ **Welcome to the OCR Tutorial!** Optical Character Recognition (OCR) is a technology used to convert different types of documents, such as scanned paper documents, PDF files, or images captured by a digital camera, into editable and searchable data. **Steps in the OCR Process:** 1. **Grayscale Conversion:** The first step in OCR is converting the image to grayscale. This simplifies the image and reduces the amount of data the OCR algorithm needs to process. 2. **Thresholding:** This step converts the grayscale image into a binary image, where the text is in black, and the background is in white. This makes it easier for the OCR algorithm to distinguish text from the background. 3. **OCR using EasyOCR:** We use the EasyOCR library to recognize and extract text from the preprocessed image. **Interactive Tutorial:** Please upload an image and select the correct order of steps to perform OCR. You can also adjust the threshold value using the slider. """ example_image_path = "origin.png" # Provide the path to your example image # Load the example image example_image = np.array(Image.open(example_image_path)) image = gr.Image(value=example_image, label="Upload or edit image for OCR") steps = gr.CheckboxGroup(choices=tutorial_steps, label="Select and order the steps for OCR") threshold = gr.Slider(0, 255, value=127, step=1, label="Threshold Value") output = gr.Textbox(label="OCR Output") explanation = gr.Markdown(explanation_text) ocr_app = gr.Interface( fn=generate_ocr, inputs=[image, steps, threshold], outputs=output, title="Optical Character Recognition", description=explanation_text, css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}" ) quiz_app = gr.TabbedInterface( [ocr_app] + quiz_interface(), ["OCR Tool"] + [f"Question {i+1}" for i in range(len(questions))], title="OCR Tutorial and Quiz" ) quiz_app.launch()