File size: 3,591 Bytes
7850a69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import tempfile
import gradio as gr
from gradio_pdf import PDF
from remittance_pdf_processor import process_pdf_with_flow
from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult
from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal


def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers:
    ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(','))))
    return [num for num in invoice_numbers if num.lower() in ground_truth_set]    

def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool:
    return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount)

def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str):
    try:        
        invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices)
        invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount)

        extracted_result: ProcessedPDFResult = process_pdf_with_flow(
            pdf_file_path, 
            invoice_verifier=invoice_verifier,
            invoice_and_amount_verifier=invoice_and_amount_verifier
        )
        
        verified_candidate, unverified_candidate = extracted_result

        result = ""
        result += f"Extracted invoice numbers and payment amounts:\n"
        result += f"Ground Truth Invoices: {ground_truth_invoices}\n"
        result += f"Ground Truth Amount: {ground_truth_amount}\n"
        result += "-" * 40 + "\n"

        # Print Invoice Numbers
        result += "Invoice Numbers:\n"
        for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
            status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified"
            result += f"  Candidate {i} ({status}): {', '.join(invoice_numbers)}\n"
        result += "\n"

        # Print Payment Amounts
        result += "Payment Amounts:\n"
        for i, amount in enumerate(unverified_candidate[1], 1):
            status = "Verified" if amount in verified_candidate[1] else "Unverified"
            result += f"  Candidate {i} ({status}): {amount}\n"

        return result
    except Exception as e:
        error_message = f"An error occurred while processing the PDF: {str(e)}"
        remittance_logger.error(error_message, exc_info=True)
        return error_message

# Create the Gradio interface
with gr.Blocks() as iface:
    gr.Markdown("# Remittance PDF Processor")
    gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.")
    
    with gr.Row():
        with gr.Column(scale=1):
            pdf_input = PDF(label="Upload PDF", visible=True)
        
        with gr.Column(scale=1):
            ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)")
            ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount")
            
            submit_button = gr.Button("Process PDF")
            
            output = gr.Textbox(label="Processing Results", lines=20)
    
    submit_button.click(
        process_pdf_file,
        inputs=[pdf_input, ground_truth_invoices, ground_truth_amount],
        outputs=[output]
    )

# Launch the interface
iface.launch()