# import os # import cv2 # import re # import numpy as np # from PIL import Image, ImageDraw, ImageFont # from paddleocr import PaddleOCR # from pdf2image import convert_from_path # import gradio as gr # # Specify the path to the Poppler bin directory # poppler_path = r"C:\\poppler\\poppler-24.08.0\\Library\\bin" # # Function to check proximity of bounding boxes # def are_boxes_close(box1, box2, y_threshold=50): # y1_center = (box1[0][1] + box1[2][1]) / 2 # y2_center = (box2[0][1] + box2[2][1]) / 2 # return abs(y1_center - y2_center) <= y_threshold # # Function to extract terms with specific rules # def extract_specific_terms(ocr_results): # extracted_terms = [] # for line in ocr_results[0]: # detected_text = line[1][0] # Extracted text # box = line[0] # Bounding box of the detected text # if re.match(r"Bill of Lading:\s*\d+", detected_text): # extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) # elif re.match(r"Page:\s*\w+", detected_text): # extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) # elif detected_text in ["Shipper", "Receiver", "Carrier"]: # extracted_terms.append({'detected_text': detected_text + " Signature", 'bounding_box': box}) # elif detected_text == "Signature": # extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) # return extracted_terms # # Function to annotate image with detected terms # def annotate_image_with_terms(image, terms): # pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # draw = ImageDraw.Draw(pil_image) # font_size = 40 # try: # font = ImageFont.truetype("arial.ttf", font_size) # except IOError: # font = ImageFont.load_default() # for term in terms: # box = term['bounding_box'] # detected_text = term['detected_text'] # points = [(int(x[0]), int(x[1])) for x in box] # draw.polygon(points, outline="blue", width=2) # position = (points[0][0], points[0][1] - font_size - 5) # draw.text(position, detected_text, fill="red", font=font) # return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) # # Main processing function # def process_file(file): # ocr = PaddleOCR(lang='en') # extracted_terms = [] # if file.name.endswith(".pdf"): # images = convert_from_path(file.name, poppler_path=poppler_path) # processed_images = [] # for image in images: # image_np = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) # ocr_results = ocr.ocr(image_np, cls=True) # extracted_terms = extract_specific_terms(ocr_results) # annotated_image = annotate_image_with_terms(image_np, extracted_terms) # processed_images.append(annotated_image) # return [Image.fromarray(img) for img in processed_images] # else: # image = cv2.imread(file.name) # ocr_results = ocr.ocr(image, cls=True) # extracted_terms = extract_specific_terms(ocr_results) # annotated_image = annotate_image_with_terms(image, extracted_terms) # return Image.fromarray(annotated_image) # # Gradio Interface # def gradio_interface(file): # result = process_file(file) # if isinstance(result, list): # return result[0] # Display only the first page # return result # iface = gr.Interface( # fn=gradio_interface, # inputs=gr.File(label="Upload an Image or PDF", file_types=[".pdf", ".png", ".jpg", ".jpeg"]), # outputs="image", # live=True, # title="OCR Term Extraction", # description="Upload an image or PDF containing text to detect and annotate terms such as 'Bill of Lading', 'Page', and signatures.", # allow_flagging="never" # ) # iface.launch() import os import cv2 import re import numpy as np from PIL import Image, ImageDraw, ImageFont from paddleocr import PaddleOCR import gradio as gr # Function to check proximity of bounding boxes def are_boxes_close(box1, box2, y_threshold=50): y1_center = (box1[0][1] + box1[2][1]) / 2 y2_center = (box2[0][1] + box2[2][1]) / 2 return abs(y1_center - y2_center) <= y_threshold # Function to extract terms with specific rules def extract_specific_terms(ocr_results): extracted_terms = [] for line in ocr_results[0]: detected_text = line[1][0] # Extracted text box = line[0] # Bounding box of the detected text if re.match(r"Bill of Lading:\s*\d+", detected_text): extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) elif re.match(r"Page:\s*\w+", detected_text): extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) elif detected_text in ["Shipper", "Receiver", "Carrier"]: extracted_terms.append({'detected_text': detected_text + " Signature", 'bounding_box': box}) elif detected_text == "Signature": extracted_terms.append({'detected_text': detected_text, 'bounding_box': box}) return extracted_terms # Function to annotate image with detected terms def annotate_image_with_terms(image, terms): pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(pil_image) font_size = 20 try: font = ImageFont.truetype("arial.ttf", font_size) except IOError: font = ImageFont.load_default() for term in terms: box = term['bounding_box'] detected_text = term['detected_text'] points = [(int(x[0]), int(x[1])) for x in box] draw.polygon(points, outline="blue", width=2) position = (points[0][0], points[0][1] - font_size - 5) draw.text(position, detected_text, fill="red", font=font) return cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR) # Main processing function def process_file(file): ocr = PaddleOCR(lang='en') extracted_terms = [] # Handle image files (PNG, JPG, JPEG) image = cv2.imread(file.name) ocr_results = ocr.ocr(image, cls=True) extracted_terms = extract_specific_terms(ocr_results) annotated_image = annotate_image_with_terms(image, extracted_terms) return Image.fromarray(annotated_image) # Gradio Interface def gradio_interface(file): result = process_file(file) return result iface = gr.Interface( fn=gradio_interface, inputs=gr.File(label="Upload an Image", file_types=[".png", ".jpg", ".jpeg"]), outputs="image", live=True, title="OCR Term Extraction", description="Upload an image containing text to detect and annotate terms such as 'Bill of Lading', 'Page', and signatures.", allow_flagging="never" ) iface.launch()