# File: remittance_pdf_processing_cli.py import argparse from remittance_pdf_processor import process_pdf, extract_text_from_pdf from ccmt_verifier_aus import verify_invoice_numbers from remittance_pdf_processing_types import Candidate, ProcessedPDFResult def main(): parser = argparse.ArgumentParser(description="Extract text and invoice numbers from a PDF file.") parser.add_argument("pdf_file", help="Path to the PDF file") parser.add_argument("-o", "--output", help="Output file path for extracted text (optional)") parser.add_argument("-v", "--verbose", action="store_true", help="Print extracted text from PDF") parser.add_argument("-f", "--force-image", action="store_true", help="Force processing PDF as image") parser.add_argument("--verify", action="store_true", help="Enable invoice number verification") parser.add_argument("--multi-hop", action="store_true", help="Force multi-hop processing") args = parser.parse_args() extracted_text = extract_text_from_pdf(args.pdf_file, wrap_pages=True) # Determine the invoice verifier function invoice_verifier = verify_invoice_numbers if args.verify else None extracted_result: ProcessedPDFResult = process_pdf(args.pdf_file, force_image_processing=args.force_image, force_multi_hop=args.multi_hop, invoice_verifier=invoice_verifier) verified_candidate, unverified_candidate = extracted_result if args.verbose: print("Extracted text from PDF:") print("-" * 40) print(extracted_text) print("-" * 40) print() print("Extracted invoice numbers and payment amounts:") print(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}") print(f"Verification: {'Enabled' if args.verify else 'Disabled'}") print("-" * 40) print("Verified Results:") if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1: print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}") print(f" Verified Amount: {verified_candidate[1][0]}") elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0: print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}") print(" Amount doesn't match. This might be a partial payment.") elif len(verified_candidate[0]) >= 2: print(" Verified Invoice Numbers:") for i, invoice_numbers in enumerate(verified_candidate[0], 1): print(f" List {i}: {', '.join(invoice_numbers)}") else: print(" No verified results.") print("\nUnverified Invoice Numbers:") for i, invoice_numbers in enumerate(unverified_candidate[0], 1): print(f" Candidate {i}: {', '.join(invoice_numbers)}") print("\nUnverified Payment Amounts:") for i, amount in enumerate(unverified_candidate[1], 1): print(f" Candidate {i}: {amount}") print("-" * 40) if args.output: with open(args.output, 'w', encoding='utf-8') as f: f.write("Extracted text:\n") f.write(extracted_text) f.write("\n\nExtracted invoice numbers and payment amounts:\n") f.write(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}\n") f.write(f"Verification: {'Enabled' if args.verify else 'Disabled'}\n") f.write("Verified Results:\n") if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1: f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n") f.write(f" Verified Amount: {verified_candidate[1][0]}\n") elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0: f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n") f.write(" Amount doesn't match. This might be a partial payment.\n") elif len(verified_candidate[0]) >= 2: f.write(" Verified Invoice Numbers:\n") for i, invoice_numbers in enumerate(verified_candidate[0], 1): f.write(f" List {i}: {', '.join(invoice_numbers)}\n") else: f.write(" No verified results.\n") f.write("\nUnverified Invoice Numbers:\n") for i, invoice_numbers in enumerate(unverified_candidate[0], 1): f.write(f" Candidate {i}: {', '.join(invoice_numbers)}\n") f.write("\nUnverified Payment Amounts:\n") for i, amount in enumerate(unverified_candidate[1], 1): f.write(f" Candidate {i}: {amount}\n") print(f"Extracted text and invoice numbers written to {args.output}") if __name__ == "__main__": main()