ArslanRobo's picture
Upload 2 files
fdb32ca verified
# import os
# import cv2
# import re
# import numpy as np
# from PIL import Image, ImageDraw, ImageFont
# from paddleocr import PaddleOCR
# from pdf2image import convert_from_path
# import gradio as gr
# # Specify the path to the Poppler bin directory
# poppler_path = r"C:\\poppler\\poppler-24.08.0\\Library\\bin"
# # Function to check proximity of bounding boxes
# def are_boxes_close(box1, box2, y_threshold=50):
# y1_center = (box1[0][1] + box1[2][1]) / 2
# y2_center = (box2[0][1] + box2[2][1]) / 2
# return abs(y1_center - y2_center) <= y_threshold
# # Function to extract terms with specific rules
# def extract_specific_terms(ocr_results):
# extracted_terms = []
# for line in ocr_results[0]:
# detected_text = line[1][0] # Extracted text
# box = line[0] # Bounding box of the detected text
# if re.match(r"Bill of Lading:\s*\d+", detected_text):
# extracted_terms.append({'detected_text': detected_text, 'bounding_box': box})
# elif re.match(r"Page:\s*\w+", detected_text):
# extracted_terms.append({'detected_text': detected_text, 'bounding_box': box})
# elif detected_text in ["Shipper", "Receiver", "Carrier"]:
# extracted_terms.append({'detected_text': detected_text + " Signature", 'bounding_box': box})
# elif detected_text == "Signature":
# extracted_terms.append({'detected_text': detected_text, 'bounding_box': box})
# return extracted_terms
# # Function to annotate image with detected terms
# def annotate_image_with_terms(image, terms):
# pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# draw = ImageDraw.Draw(pil_image)
# font_size = 40
# try:
# font = ImageFont.truetype("arial.ttf", font_size)
# except IOError:
# font = ImageFont.load_default()
# for term in terms:
# box = term['bounding_box']
# detected_text = term['detected_text']
# points = [(int(x[0]), int(x[1])) for x in box]
# draw.polygon(points, outline="blue", width=2)
# position = (points[0][0], points[0][1] - font_size - 5)
# draw.text(position, detected_text, fill="red", font=font)
# return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
# # Main processing function
# def process_file(file):
# ocr = PaddleOCR(lang='en')
# extracted_terms = []
# if file.name.endswith(".pdf"):
# images = convert_from_path(file.name, poppler_path=poppler_path)
# processed_images = []
# for image in images:
# image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
# ocr_results = ocr.ocr(image_np, cls=True)
# extracted_terms = extract_specific_terms(ocr_results)
# annotated_image = annotate_image_with_terms(image_np, extracted_terms)
# processed_images.append(annotated_image)
# return [Image.fromarray(img) for img in processed_images]
# else:
# image = cv2.imread(file.name)
# ocr_results = ocr.ocr(image, cls=True)
# extracted_terms = extract_specific_terms(ocr_results)
# annotated_image = annotate_image_with_terms(image, extracted_terms)
# return Image.fromarray(annotated_image)
# # Gradio Interface
# def gradio_interface(file):
# result = process_file(file)
# if isinstance(result, list):
# return result[0] # Display only the first page
# return result
# iface = gr.Interface(
# fn=gradio_interface,
# inputs=gr.File(label="Upload an Image or PDF", file_types=[".pdf", ".png", ".jpg", ".jpeg"]),
# outputs="image",
# live=True,
# title="OCR Term Extraction",
# description="Upload an image or PDF containing text to detect and annotate terms such as 'Bill of Lading', 'Page', and signatures.",
# allow_flagging="never"
# )
# iface.launch()
import os
import cv2
import re
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from paddleocr import PaddleOCR
import gradio as gr
# Function to check proximity of bounding boxes
def are_boxes_close(box1, box2, y_threshold=50):
y1_center = (box1[0][1] + box1[2][1]) / 2
y2_center = (box2[0][1] + box2[2][1]) / 2
return abs(y1_center - y2_center) <= y_threshold
# Function to extract terms with specific rules
def extract_specific_terms(ocr_results):
extracted_terms = []
for line in ocr_results[0]:
detected_text = line[1][0] # Extracted text
box = line[0] # Bounding box of the detected text
if re.match(r"Bill of Lading:\s*\d+", detected_text):
extracted_terms.append({'detected_text': detected_text, 'bounding_box': box})
elif re.match(r"Page:\s*\w+", detected_text):
extracted_terms.append({'detected_text': detected_text, 'bounding_box': box})
elif detected_text in ["Shipper", "Receiver", "Carrier"]:
extracted_terms.append({'detected_text': detected_text + " Signature", 'bounding_box': box})
elif detected_text == "Signature":
extracted_terms.append({'detected_text': detected_text, 'bounding_box': box})
return extracted_terms
# Function to annotate image with detected terms
def annotate_image_with_terms(image, terms):
pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(pil_image)
font_size = 20
try:
font = ImageFont.truetype("arial.ttf", font_size)
except IOError:
font = ImageFont.load_default()
for term in terms:
box = term['bounding_box']
detected_text = term['detected_text']
points = [(int(x[0]), int(x[1])) for x in box]
draw.polygon(points, outline="blue", width=2)
position = (points[0][0], points[0][1] - font_size - 5)
draw.text(position, detected_text, fill="red", font=font)
return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
# Main processing function
def process_file(file):
ocr = PaddleOCR(lang='en')
extracted_terms = []
# Handle image files (PNG, JPG, JPEG)
image = cv2.imread(file.name)
ocr_results = ocr.ocr(image, cls=True)
extracted_terms = extract_specific_terms(ocr_results)
annotated_image = annotate_image_with_terms(image, extracted_terms)
return Image.fromarray(annotated_image)
# Gradio Interface
def gradio_interface(file):
result = process_file(file)
return result
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.File(label="Upload an Image", file_types=[".png", ".jpg", ".jpeg"]),
outputs="image",
live=True,
title="OCR Term Extraction",
description="Upload an image containing text to detect and annotate terms such as 'Bill of Lading', 'Page', and signatures.",
allow_flagging="never"
)
iface.launch()