|
|
|
|
|
import argparse |
|
from remittance_pdf_processor import process_pdf, extract_text_from_pdf |
|
from ccmt_verifier_aus import verify_invoice_numbers |
|
from remittance_pdf_processing_types import Candidate, ProcessedPDFResult |
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description="Extract text and invoice numbers from a PDF file.") |
|
parser.add_argument("pdf_file", help="Path to the PDF file") |
|
parser.add_argument("-o", "--output", help="Output file path for extracted text (optional)") |
|
parser.add_argument("-v", "--verbose", action="store_true", help="Print extracted text from PDF") |
|
parser.add_argument("-f", "--force-image", action="store_true", help="Force processing PDF as image") |
|
parser.add_argument("--verify", action="store_true", help="Enable invoice number verification") |
|
parser.add_argument("--multi-hop", action="store_true", help="Force multi-hop processing") |
|
|
|
args = parser.parse_args() |
|
|
|
extracted_text = extract_text_from_pdf(args.pdf_file, wrap_pages=True) |
|
|
|
|
|
invoice_verifier = verify_invoice_numbers if args.verify else None |
|
|
|
extracted_result: ProcessedPDFResult = process_pdf(args.pdf_file, |
|
force_image_processing=args.force_image, |
|
force_multi_hop=args.multi_hop, |
|
invoice_verifier=invoice_verifier) |
|
|
|
verified_candidate, unverified_candidate = extracted_result |
|
|
|
if args.verbose: |
|
print("Extracted text from PDF:") |
|
print("-" * 40) |
|
print(extracted_text) |
|
print("-" * 40) |
|
print() |
|
|
|
print("Extracted invoice numbers and payment amounts:") |
|
print(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}") |
|
print(f"Verification: {'Enabled' if args.verify else 'Disabled'}") |
|
print("-" * 40) |
|
|
|
print("Verified Results:") |
|
if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1: |
|
print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}") |
|
print(f" Verified Amount: {verified_candidate[1][0]}") |
|
elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0: |
|
print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}") |
|
print(" Amount doesn't match. This might be a partial payment.") |
|
elif len(verified_candidate[0]) >= 2: |
|
print(" Verified Invoice Numbers:") |
|
for i, invoice_numbers in enumerate(verified_candidate[0], 1): |
|
print(f" List {i}: {', '.join(invoice_numbers)}") |
|
else: |
|
print(" No verified results.") |
|
|
|
print("\nUnverified Invoice Numbers:") |
|
for i, invoice_numbers in enumerate(unverified_candidate[0], 1): |
|
print(f" Candidate {i}: {', '.join(invoice_numbers)}") |
|
|
|
print("\nUnverified Payment Amounts:") |
|
for i, amount in enumerate(unverified_candidate[1], 1): |
|
print(f" Candidate {i}: {amount}") |
|
|
|
print("-" * 40) |
|
|
|
if args.output: |
|
with open(args.output, 'w', encoding='utf-8') as f: |
|
f.write("Extracted text:\n") |
|
f.write(extracted_text) |
|
f.write("\n\nExtracted invoice numbers and payment amounts:\n") |
|
f.write(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}\n") |
|
f.write(f"Verification: {'Enabled' if args.verify else 'Disabled'}\n") |
|
f.write("Verified Results:\n") |
|
if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1: |
|
f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n") |
|
f.write(f" Verified Amount: {verified_candidate[1][0]}\n") |
|
elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0: |
|
f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n") |
|
f.write(" Amount doesn't match. This might be a partial payment.\n") |
|
elif len(verified_candidate[0]) >= 2: |
|
f.write(" Verified Invoice Numbers:\n") |
|
for i, invoice_numbers in enumerate(verified_candidate[0], 1): |
|
f.write(f" List {i}: {', '.join(invoice_numbers)}\n") |
|
else: |
|
f.write(" No verified results.\n") |
|
f.write("\nUnverified Invoice Numbers:\n") |
|
for i, invoice_numbers in enumerate(unverified_candidate[0], 1): |
|
f.write(f" Candidate {i}: {', '.join(invoice_numbers)}\n") |
|
f.write("\nUnverified Payment Amounts:\n") |
|
for i, amount in enumerate(unverified_candidate[1], 1): |
|
f.write(f" Candidate {i}: {amount}\n") |
|
print(f"Extracted text and invoice numbers written to {args.output}") |
|
|
|
if __name__ == "__main__": |
|
main() |