Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import cv2 | |
| import pytesseract | |
| import numpy as np | |
| import pkg_resources | |
| # Function to log installed packages (for debugging purposes) | |
| def log_installed_packages(): | |
| installed_packages = {pkg.key: pkg.version for pkg in pkg_resources.working_set} | |
| return installed_packages | |
| # Function to process the image | |
| def preprocess_image_for_tesseract(image): | |
| # Convert the image to grayscale | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| return gray | |
| def ocr_and_search(image, keyword): | |
| processed_image = preprocess_image_for_tesseract(image) | |
| # Ensure Tesseract is available | |
| if not pytesseract.pytesseract.get_tesseract_version(): | |
| return "Tesseract is not available in PATH.", [] | |
| try: | |
| # Extract text in both Hindi and English | |
| extracted_text = pytesseract.image_to_string(processed_image, lang='hin+eng') | |
| except Exception as e: | |
| return f"Error during OCR: {str(e)}", [] | |
| # Search for the keyword in the extracted text (case insensitive) | |
| search_results = [line for line in extracted_text.split('\n') if keyword.lower() in line.lower()] | |
| return extracted_text, search_results | |
| # Create Gradio interface | |
| iface = gr.Interface( | |
| fn=ocr_and_search, | |
| inputs=[gr.Image(type="numpy"), gr.Textbox(label="Keyword")], | |
| outputs=["text", "text"], | |
| title="OCR and Keyword Search", | |
| description="Upload an image with text and search for a keyword." | |
| ) | |
| # Optionally log installed packages | |
| print(log_installed_packages()) | |
| # Launch the interface | |
| iface.launch(share=True) | |