import gradio as gr import cv2 import pytesseract import numpy as np import pkg_resources # Function to log installed packages (for debugging purposes) def log_installed_packages(): installed_packages = {pkg.key: pkg.version for pkg in pkg_resources.working_set} return installed_packages # Function to process the image def preprocess_image_for_tesseract(image): # Convert the image to grayscale gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) return gray def ocr_and_search(image, keyword): processed_image = preprocess_image_for_tesseract(image) # Ensure Tesseract is available if not pytesseract.pytesseract.get_tesseract_version(): return "Tesseract is not available in PATH.", [] try: # Extract text in both Hindi and English extracted_text = pytesseract.image_to_string(processed_image, lang='hin+eng') except Exception as e: return f"Error during OCR: {str(e)}", [] # Search for the keyword in the extracted text (case insensitive) search_results = [line for line in extracted_text.split('\n') if keyword.lower() in line.lower()] return extracted_text, search_results # Create Gradio interface iface = gr.Interface( fn=ocr_and_search, inputs=[gr.Image(type="numpy"), gr.Textbox(label="Keyword")], outputs=["text", "text"], title="OCR and Keyword Search", description="Upload an image with text and search for a keyword." ) # Optionally log installed packages print(log_installed_packages()) # Launch the interface iface.launch(share=True)