Alejandro-STC's picture
Adjust column sizes
21cc687 verified
raw
history blame
3.78 kB
import os
import tempfile
import gradio as gr
from gradio_pdf import PDF
from remittance_pdf_processor import process_pdf_with_flow
from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult
from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal
from decimal import Decimal, InvalidOperation
def is_valid_decimal(s: str) -> bool:
try:
Decimal(s)
return True
except InvalidOperation:
return False
def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers:
ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(','))))
return [num for num in invoice_numbers if num.lower() in ground_truth_set]
def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool:
return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount)
def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str):
# Input validation
if not ground_truth_invoices.strip():
return "Error: Ground Truth Invoices field cannot be empty."
if not ground_truth_amount.strip():
return "Error: Ground Truth Amount field cannot be empty."
if not is_valid_decimal(ground_truth_amount.strip()):
return "Error: Ground Truth Amount must be a valid decimal number."
try:
invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices)
invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount)
extracted_result: ProcessedPDFResult = process_pdf_with_flow(
pdf_file_path,
invoice_verifier=invoice_verifier,
invoice_and_amount_verifier=invoice_and_amount_verifier
)
verified_candidate, unverified_candidate = extracted_result
result = ""
# result += f"Extracted invoice numbers and payment amounts:\n"
result += f"Ground Truth Invoices: {ground_truth_invoices}\n"
result += f"Ground Truth Amount: {ground_truth_amount}\n"
result += "-" * 40 + "\n"
# Print Invoice Numbers
result += "Extracted Invoice Numbers:\n"
for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified"
result += f" Candidate {i} ({status}): {', '.join(invoice_numbers)}\n"
result += "\n"
# Print Payment Amounts
result += "Extracted Payment Amounts:\n"
for i, amount in enumerate(unverified_candidate[1], 1):
status = "Verified" if amount in verified_candidate[1] else "Unverified"
result += f" Candidate {i} ({status}): {amount}\n"
return result
except Exception as e:
error_message = f"An error occurred while processing the PDF: {str(e)}"
remittance_logger.error(error_message, exc_info=True)
return error_message
# Create the Gradio interface
with gr.Blocks() as iface:
gr.Markdown("# Remittance PDF Processor")
gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.")
with gr.Row():
with gr.Column():
pdf_input = PDF(label="Upload PDF", visible=True, height=900)
with gr.Column():
ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)")
ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount")
submit_button = gr.Button("Process PDF")
output = gr.Textbox(label="Processing Results", lines=20)
submit_button.click(
process_pdf_file,
inputs=[pdf_input, ground_truth_invoices, ground_truth_amount],
outputs=[output]
)
# Launch the interface
iface.launch()