Spaces:
Sleeping
Sleeping
# import os | |
# import cv2 | |
# import re | |
# import numpy as np | |
# from PIL import Image, ImageDraw, ImageFont | |
# from paddleocr import PaddleOCR | |
# from pdf2image import convert_from_path | |
# import gradio as gr | |
# # Specify the path to the Poppler bin directory | |
# poppler_path = r"C:\\poppler\\poppler-24.08.0\\Library\\bin" | |
# # Function to check proximity of bounding boxes | |
# def are_boxes_close(box1, box2, y_threshold=50): | |
# y1_center = (box1[0][1] + box1[2][1]) / 2 | |
# y2_center = (box2[0][1] + box2[2][1]) / 2 | |
# return abs(y1_center - y2_center) <= y_threshold | |
# # Function to extract terms with specific rules | |
# def extract_specific_terms(ocr_results): | |
# extracted_terms = [] | |
# for line in ocr_results[0]: | |
# detected_text = line[1][0] # Extracted text | |
# box = line[0] # Bounding box of the detected text | |
# if re.match(r"Bill of Lading:\s*\d+", detected_text): | |
# extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) | |
# elif re.match(r"Page:\s*\w+", detected_text): | |
# extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) | |
# elif detected_text in ["Shipper", "Receiver", "Carrier"]: | |
# extracted_terms.append({'detected_text': detected_text + " Signature", 'bounding_box': box}) | |
# elif detected_text == "Signature": | |
# extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) | |
# return extracted_terms | |
# # Function to annotate image with detected terms | |
# def annotate_image_with_terms(image, terms): | |
# pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
# draw = ImageDraw.Draw(pil_image) | |
# font_size = 40 | |
# try: | |
# font = ImageFont.truetype("arial.ttf", font_size) | |
# except IOError: | |
# font = ImageFont.load_default() | |
# for term in terms: | |
# box = term['bounding_box'] | |
# detected_text = term['detected_text'] | |
# points = [(int(x[0]), int(x[1])) for x in box] | |
# draw.polygon(points, outline="blue", width=2) | |
# position = (points[0][0], points[0][1] - font_size - 5) | |
# draw.text(position, detected_text, fill="red", font=font) | |
# return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) | |
# # Main processing function | |
# def process_file(file): | |
# ocr = PaddleOCR(lang='en') | |
# extracted_terms = [] | |
# if file.name.endswith(".pdf"): | |
# images = convert_from_path(file.name, poppler_path=poppler_path) | |
# processed_images = [] | |
# for image in images: | |
# image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
# ocr_results = ocr.ocr(image_np, cls=True) | |
# extracted_terms = extract_specific_terms(ocr_results) | |
# annotated_image = annotate_image_with_terms(image_np, extracted_terms) | |
# processed_images.append(annotated_image) | |
# return [Image.fromarray(img) for img in processed_images] | |
# else: | |
# image = cv2.imread(file.name) | |
# ocr_results = ocr.ocr(image, cls=True) | |
# extracted_terms = extract_specific_terms(ocr_results) | |
# annotated_image = annotate_image_with_terms(image, extracted_terms) | |
# return Image.fromarray(annotated_image) | |
# # Gradio Interface | |
# def gradio_interface(file): | |
# result = process_file(file) | |
# if isinstance(result, list): | |
# return result[0] # Display only the first page | |
# return result | |
# iface = gr.Interface( | |
# fn=gradio_interface, | |
# inputs=gr.File(label="Upload an Image or PDF", file_types=[".pdf", ".png", ".jpg", ".jpeg"]), | |
# outputs="image", | |
# live=True, | |
# title="OCR Term Extraction", | |
# description="Upload an image or PDF containing text to detect and annotate terms such as 'Bill of Lading', 'Page', and signatures.", | |
# allow_flagging="never" | |
# ) | |
# iface.launch() | |
import os | |
import cv2 | |
import re | |
import numpy as np | |
from PIL import Image, ImageDraw, ImageFont | |
from paddleocr import PaddleOCR | |
import gradio as gr | |
# Function to check proximity of bounding boxes | |
def are_boxes_close(box1, box2, y_threshold=50): | |
y1_center = (box1[0][1] + box1[2][1]) / 2 | |
y2_center = (box2[0][1] + box2[2][1]) / 2 | |
return abs(y1_center - y2_center) <= y_threshold | |
# Function to extract terms with specific rules | |
def extract_specific_terms(ocr_results): | |
extracted_terms = [] | |
for line in ocr_results[0]: | |
detected_text = line[1][0] # Extracted text | |
box = line[0] # Bounding box of the detected text | |
if re.match(r"Bill of Lading:\s*\d+", detected_text): | |
extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) | |
elif re.match(r"Page:\s*\w+", detected_text): | |
extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) | |
elif detected_text in ["Shipper", "Receiver", "Carrier"]: | |
extracted_terms.append({'detected_text': detected_text + " Signature", 'bounding_box': box}) | |
elif detected_text == "Signature": | |
extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) | |
return extracted_terms | |
# Function to annotate image with detected terms | |
def annotate_image_with_terms(image, terms): | |
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
draw = ImageDraw.Draw(pil_image) | |
font_size = 20 | |
try: | |
font = ImageFont.truetype("arial.ttf", font_size) | |
except IOError: | |
font = ImageFont.load_default() | |
for term in terms: | |
box = term['bounding_box'] | |
detected_text = term['detected_text'] | |
points = [(int(x[0]), int(x[1])) for x in box] | |
draw.polygon(points, outline="blue", width=2) | |
position = (points[0][0], points[0][1] - font_size - 5) | |
draw.text(position, detected_text, fill="red", font=font) | |
return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) | |
# Main processing function | |
def process_file(file): | |
ocr = PaddleOCR(lang='en') | |
extracted_terms = [] | |
# Handle image files (PNG, JPG, JPEG) | |
image = cv2.imread(file.name) | |
ocr_results = ocr.ocr(image, cls=True) | |
extracted_terms = extract_specific_terms(ocr_results) | |
annotated_image = annotate_image_with_terms(image, extracted_terms) | |
return Image.fromarray(annotated_image) | |
# Gradio Interface | |
def gradio_interface(file): | |
result = process_file(file) | |
return result | |
iface = gr.Interface( | |
fn=gradio_interface, | |
inputs=gr.File(label="Upload an Image", file_types=[".png", ".jpg", ".jpeg"]), | |
outputs="image", | |
live=True, | |
title="OCR Term Extraction", | |
description="Upload an image containing text to detect and annotate terms such as 'Bill of Lading', 'Page', and signatures.", | |
allow_flagging="never" | |
) | |
iface.launch() | |