remittance-poc-with-verifier / remittance_pdf_processing_cli.py
eelang's picture
Upload 8 files
7850a69 verified
raw
history blame
4.91 kB
# File: remittance_pdf_processing_cli.py
import argparse
from remittance_pdf_processor import process_pdf, extract_text_from_pdf
from ccmt_verifier_aus import verify_invoice_numbers
from remittance_pdf_processing_types import Candidate, ProcessedPDFResult
def main():
parser = argparse.ArgumentParser(description="Extract text and invoice numbers from a PDF file.")
parser.add_argument("pdf_file", help="Path to the PDF file")
parser.add_argument("-o", "--output", help="Output file path for extracted text (optional)")
parser.add_argument("-v", "--verbose", action="store_true", help="Print extracted text from PDF")
parser.add_argument("-f", "--force-image", action="store_true", help="Force processing PDF as image")
parser.add_argument("--verify", action="store_true", help="Enable invoice number verification")
parser.add_argument("--multi-hop", action="store_true", help="Force multi-hop processing")
args = parser.parse_args()
extracted_text = extract_text_from_pdf(args.pdf_file, wrap_pages=True)
# Determine the invoice verifier function
invoice_verifier = verify_invoice_numbers if args.verify else None
extracted_result: ProcessedPDFResult = process_pdf(args.pdf_file,
force_image_processing=args.force_image,
force_multi_hop=args.multi_hop,
invoice_verifier=invoice_verifier)
verified_candidate, unverified_candidate = extracted_result
if args.verbose:
print("Extracted text from PDF:")
print("-" * 40)
print(extracted_text)
print("-" * 40)
print()
print("Extracted invoice numbers and payment amounts:")
print(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}")
print(f"Verification: {'Enabled' if args.verify else 'Disabled'}")
print("-" * 40)
print("Verified Results:")
if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1:
print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}")
print(f" Verified Amount: {verified_candidate[1][0]}")
elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0:
print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}")
print(" Amount doesn't match. This might be a partial payment.")
elif len(verified_candidate[0]) >= 2:
print(" Verified Invoice Numbers:")
for i, invoice_numbers in enumerate(verified_candidate[0], 1):
print(f" List {i}: {', '.join(invoice_numbers)}")
else:
print(" No verified results.")
print("\nUnverified Invoice Numbers:")
for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
print(f" Candidate {i}: {', '.join(invoice_numbers)}")
print("\nUnverified Payment Amounts:")
for i, amount in enumerate(unverified_candidate[1], 1):
print(f" Candidate {i}: {amount}")
print("-" * 40)
if args.output:
with open(args.output, 'w', encoding='utf-8') as f:
f.write("Extracted text:\n")
f.write(extracted_text)
f.write("\n\nExtracted invoice numbers and payment amounts:\n")
f.write(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}\n")
f.write(f"Verification: {'Enabled' if args.verify else 'Disabled'}\n")
f.write("Verified Results:\n")
if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1:
f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n")
f.write(f" Verified Amount: {verified_candidate[1][0]}\n")
elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0:
f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n")
f.write(" Amount doesn't match. This might be a partial payment.\n")
elif len(verified_candidate[0]) >= 2:
f.write(" Verified Invoice Numbers:\n")
for i, invoice_numbers in enumerate(verified_candidate[0], 1):
f.write(f" List {i}: {', '.join(invoice_numbers)}\n")
else:
f.write(" No verified results.\n")
f.write("\nUnverified Invoice Numbers:\n")
for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
f.write(f" Candidate {i}: {', '.join(invoice_numbers)}\n")
f.write("\nUnverified Payment Amounts:\n")
for i, amount in enumerate(unverified_candidate[1], 1):
f.write(f" Candidate {i}: {amount}\n")
print(f"Extracted text and invoice numbers written to {args.output}")
if __name__ == "__main__":
main()