Spaces:
Sleeping
Sleeping
File size: 1,564 Bytes
1461356 214c45f 1461356 214c45f 1461356 214c45f ad286b1 1461356 214c45f 1461356 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
import cv2
import pytesseract
import numpy as np
import pkg_resources
# Function to log installed packages (for debugging purposes)
def log_installed_packages():
installed_packages = {pkg.key: pkg.version for pkg in pkg_resources.working_set}
return installed_packages
# Function to process the image
def preprocess_image_for_tesseract(image):
# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
return gray
def ocr_and_search(image, keyword):
processed_image = preprocess_image_for_tesseract(image)
# Ensure Tesseract is available
if not pytesseract.pytesseract.get_tesseract_version():
return "Tesseract is not available in PATH.", []
try:
# Extract text in both Hindi and English
extracted_text = pytesseract.image_to_string(processed_image, lang='hin+eng')
except Exception as e:
return f"Error during OCR: {str(e)}", []
# Search for the keyword in the extracted text (case insensitive)
search_results = [line for line in extracted_text.split('\n') if keyword.lower() in line.lower()]
return extracted_text, search_results
# Create Gradio interface
iface = gr.Interface(
fn=ocr_and_search,
inputs=[gr.Image(type="numpy"), gr.Textbox(label="Keyword")],
outputs=["text", "text"],
title="OCR and Keyword Search",
description="Upload an image with text and search for a keyword."
)
# Optionally log installed packages
print(log_installed_packages())
# Launch the interface
iface.launch(share=True)
|