Spaces:

ImranzamanML
/

image_to_text_ocr_hf

Sleeping

App Files Files Community

ImranzamanML commited on Aug 20, 2024

Commit

562ff76

verified ·

1 Parent(s): 831ca76

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -73

app.py CHANGED Viewed

@@ -1,34 +1,18 @@
 import os
 import numpy as np
 import json
-import shutil
 import requests
-import re as r
 from urllib.request import urlopen
 from datetime import datetime
 import gradio as gr
-import tensorflow as tf
-import keras_ocr
-import cv2
-import csv
-import pandas as pd
-import huggingface_hub
-from huggingface_hub import Repository, upload_file
-import scipy.ndimage.interpolation as inter
-import easyocr
-from datasets import load_dataset, Image
-from PIL import Image as PILImage
 from paddleocr import PaddleOCR
-import pytesseract
-import torch
 import spaces
 # Global Variables
 HF_TOKEN = os.environ.get("HF_TOKEN")
-DATASET_NAME = "image_to_text_ocr"
 DATASET_REPO_URL = "https://huggingface.co/ImranzamanML/image_to_text_ocr"
-DATA_FILENAME = "ocr_data.csv"
-DATA_FILE_PATH = os.path.join("ocr_data", DATA_FILENAME)
 DATASET_REPO_ID = "ImranzamanML/image_to_text_ocr"
 REPOSITORY_DIR = "data"
 LOCAL_DIR = 'data_local'
@@ -38,48 +22,15 @@ os.makedirs(LOCAL_DIR, exist_ok=True)
 OCR using PaddleOCR
 """
 @spaces.GPU
-def paddle_ocr_processor(image):
     final_text = ''
     ocr = PaddleOCR(use_gpu=True, lang='en', use_angle_cls=True)
     result = ocr.ocr(image)
-    for i in range(len(result[0])):
-        text = result[0][i][1][0]
         final_text += ' ' + text
     return final_text
-"""
-OCR using Keras OCR
-"""
-@spaces.GPU
-def keras_ocr_processor(image):
-    output_text = ''
-    pipeline = keras_ocr.pipeline.Pipeline()
-    images = [keras_ocr.tools.read(image)]
-    predictions = pipeline.recognize(images)
-    first_prediction = predictions[0]
-    for text, box in first_prediction:
-        output_text += ' ' + text
-    return output_text
-"""
-OCR using EasyOCR
-"""
-def convert_to_grayscale(image):
-    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-def apply_thresholding(src):
-    return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]
-@spaces.GPU
-def easy_ocr_processor(image):
-    gray_image = convert_to_grayscale(image)
-    apply_thresholding(gray_image)
-    cv2.imwrite('processed_image.png', gray_image)
-    reader = easyocr.Reader(['th', 'en'])
-    detected_text = reader.readtext('processed_image.png', paragraph="False", detail=0)
-    detected_text = ''.join(detected_text)
-    return detected_text
 """
 Utility Functions
 """
@@ -90,7 +41,7 @@ def save_json(data, filepath):
 def get_ip_address():
     try:
         response = str(urlopen('http://checkip.dyndns.com/').read())
-        return r.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(response).group(1)
     except Exception as e:
         print("Error while getting IP address -->", e)
         return ''
@@ -170,12 +121,7 @@ OCR Generation
 def generate_ocr_text(method, image):
     text_output = ''
     if image.any():
-        if method == 'EasyOCR':
-            text_output = easy_ocr_processor(image)
-        elif method == 'KerasOCR':
-            text_output = keras_ocr_processor(image)
-        elif method == 'PaddleOCR':
-            text_output = paddle_ocr_processor(image)
         try:
             log_ocr_data(method, text_output, image)
@@ -188,22 +134,46 @@ def generate_ocr_text(method, image):
 """
 Create user interface for OCR demo
 """
-image_input = gr.Image(label="Upload Image")
-method_input = gr.Radio(["PaddleOCR", "EasyOCR", "KerasOCR"], value="PaddleOCR", label="Select OCR Method")
-output_textbox = gr.Textbox(label="Recognized Text")
 demo = gr.Interface(
     fn=generate_ocr_text,
-    inputs=[method_input, image_input],
     outputs=output_textbox,
-    title="Enhanced OCR Demo",
-    description="Choose an OCR method and upload an image to extract text.",
-    theme="huggingface",
     css="""
-    .gradio-container {background-color: #f5f5f5; font-family: Arial, sans-serif;}
-    #method_input {background-color: #FFC107; font-size: 18px; padding: 10px;}
-    #output_textbox {font-size: 16px; color: #333;}
-    """
-)
 demo.launch()

 import os
 import numpy as np
 import json
 import requests
 from urllib.request import urlopen
 from datetime import datetime
 import gradio as gr
 from paddleocr import PaddleOCR
+from PIL import Image as PILImage
+from huggingface_hub import Repository, upload_file
 import spaces
 # Global Variables
 HF_TOKEN = os.environ.get("HF_TOKEN")
 DATASET_REPO_URL = "https://huggingface.co/ImranzamanML/image_to_text_ocr"
 DATASET_REPO_ID = "ImranzamanML/image_to_text_ocr"
 REPOSITORY_DIR = "data"
 LOCAL_DIR = 'data_local'
 OCR using PaddleOCR
 """
 @spaces.GPU
+def process_image_with_paddleocr(image):
     final_text = ''
     ocr = PaddleOCR(use_gpu=True, lang='en', use_angle_cls=True)
     result = ocr.ocr(image)
+    for line in result[0]:
+        text = line[1][0]
         final_text += ' ' + text
     return final_text
 """
 Utility Functions
 """
 def get_ip_address():
     try:
         response = str(urlopen('http://checkip.dyndns.com/').read())
+        return re.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(response).group(1)
     except Exception as e:
         print("Error while getting IP address -->", e)
         return ''
 def generate_ocr_text(method, image):
     text_output = ''
     if image.any():
+        text_output = process_image_with_paddleocr(image)
         try:
             log_ocr_data(method, text_output, image)
 """
 Create user interface for OCR demo
 """
+image_input = gr.Image(label="Upload Image", type="numpy", tool="editor")
+output_textbox = gr.Textbox(label="Recognized Text", lines=5, placeholder="OCR results will appear here...")
 demo = gr.Interface(
     fn=generate_ocr_text,
+    inputs=[gr.Hidden("PaddleOCR"), image_input],
     outputs=output_textbox,
+    title="PaddleOCR - Optical Character Recognition",
+    description="Upload an image and extract text using PaddleOCR. This tool supports multiple languages and handles complex layouts.",
+    theme="default",
     css="""
+    .gradio-container {
+        background-color: #f0f4f8;
+        font-family: 'Roboto', sans-serif;
+        padding: 20px;
+        border-radius: 10px;
+        box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+    }
+    .gr-button {
+        background-color: #007bff;
+        color: white;
+        border-radius: 5px;
+        padding: 10px 20px;
+        font-size: 16px;
+        cursor: pointer;
+    }
+    .gr-button:hover {
+        background-color: #0056b3;
+    }
+    .gr-textbox {
+        background-color: #ffffff;
+        border: 1px solid #ced4da;
+        border-radius: 5px;
+        padding: 10px;
+        font-size: 16px;
+    }
+    .gr-textbox:focus {
+        border-color: #007bff;
+        box-shadow: 0 0 0 0.2rem rgba(0,123,255,.25);
+    }
+    """)
 demo.launch()