import os import tempfile import gradio as gr from gradio_pdf import PDF from remittance_pdf_processor import process_pdf_with_flow from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal from decimal import Decimal, InvalidOperation def is_valid_decimal(s: str) -> bool: try: Decimal(s) return True except InvalidOperation: return False def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers: ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(',')))) return [num for num in invoice_numbers if num.lower() in ground_truth_set] def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool: return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount) def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str): # Input validation if not ground_truth_invoices.strip(): return "Error: Ground Truth Invoices field cannot be empty." if not ground_truth_amount.strip(): return "Error: Ground Truth Amount field cannot be empty." if not is_valid_decimal(ground_truth_amount.strip()): return "Error: Ground Truth Amount must be a valid decimal number." try: invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices) invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount) extracted_result: ProcessedPDFResult = process_pdf_with_flow( pdf_file_path, invoice_verifier=invoice_verifier, invoice_and_amount_verifier=invoice_and_amount_verifier ) verified_candidate, unverified_candidate = extracted_result result = "" # result += f"Extracted invoice numbers and payment amounts:\n" result += f"Ground Truth Invoices: {ground_truth_invoices}\n" result += f"Ground Truth Amount: {ground_truth_amount}\n" result += "-" * 40 + "\n" # Print Invoice Numbers result += "Extracted Invoice Numbers:\n" for i, invoice_numbers in enumerate(unverified_candidate[0], 1): status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified" result += f" Candidate {i} ({status}): {', '.join(invoice_numbers)}\n" result += "\n" # Print Payment Amounts result += "Extracted Payment Amounts:\n" for i, amount in enumerate(unverified_candidate[1], 1): status = "Verified" if amount in verified_candidate[1] else "Unverified" result += f" Candidate {i} ({status}): {amount}\n" return result except Exception as e: error_message = f"An error occurred while processing the PDF: {str(e)}" remittance_logger.error(error_message, exc_info=True) return error_message # Create the Gradio interface with gr.Blocks() as iface: gr.Markdown("# Remittance PDF Processor") gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.") with gr.Row(): with gr.Column(): pdf_input = PDF(label="Upload PDF", visible=True, height=900) with gr.Column(): ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)") ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount") submit_button = gr.Button("Process PDF") output = gr.Textbox(label="Processing Results", lines=20) submit_button.click( process_pdf_file, inputs=[pdf_input, ground_truth_invoices, ground_truth_amount], outputs=[output] ) # Launch the interface iface.launch()