eelang commited on
Commit
7850a69
1 Parent(s): 30ea92f

Upload 8 files

Browse files
anthropic_api_invoice_extractor.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import anthropic
2
+ import os
3
+ from remittance_pdf_processing_types import InvoiceNumbers, PaymentAmount
4
+ from remittance_pdf_processing_utils import remittance_logger, remove_duplicate_lists
5
+ from anthropic.types import ContentBlock, ImageBlockParam
6
+
7
+
8
+ def extract_invoice_numbers_with_anthropic_ai(base64_images: list[str], multi_hop: bool = False) -> list[InvoiceNumbers]:
9
+ """
10
+ Extracts invoice numbers from one or more base64-encoded images using Anthropic's Claude 3.5 Sonnet model.
11
+
12
+ Args:
13
+ base64_images (list[str]): A list of base64-encoded image strings.
14
+ multi_hop (bool): Whether to use multi-hop processing.
15
+
16
+ Returns:
17
+ list[InvoiceNumbers]: A list containing lists of extracted invoice numbers.
18
+ """
19
+ if multi_hop:
20
+ return extract_invoice_numbers_with_anthropic_ai_multi_hop(base64_images)
21
+ else:
22
+ return extract_invoice_numbers_with_anthropic_ai_single_hop(base64_images)
23
+
24
+ def extract_invoice_numbers_with_anthropic_ai_single_hop(base64_images: list[str]) -> list[InvoiceNumbers]:
25
+ client = anthropic.Anthropic(
26
+ api_key=os.environ.get("ANTHROPIC_API_KEY"),
27
+ )
28
+
29
+ content: list[ContentBlock] = [
30
+ {
31
+ "type": "image",
32
+ "source": {
33
+ "type": "base64",
34
+ "media_type": "image/png",
35
+ "data": image
36
+ }
37
+ } for image in base64_images
38
+ ]
39
+
40
+ message = client.messages.create(
41
+ model="claude-3-5-sonnet-20240620",
42
+ max_tokens=1024,
43
+ temperature=0,
44
+ system="Given the remittance letter images, extract all invoice numbers. Respond with a comma-separated list only.",
45
+ messages=[
46
+ {
47
+ "role": "user",
48
+ "content": content
49
+ }
50
+ ]
51
+ )
52
+
53
+ remittance_logger.debug(f'Anthropic (raw) response: {message.content}')
54
+
55
+ invoice_numbers = parse_anthropic_response(message.content[0].text)
56
+ return [invoice_numbers]
57
+
58
+ def extract_invoice_numbers_with_anthropic_ai_multi_hop(base64_images: list[str]) -> list[InvoiceNumbers]:
59
+ # First hop: Extract column headers
60
+ column_headers = extract_column_headers_from_images(base64_images)
61
+ remittance_logger.debug(f"Extracted column headers: {column_headers}")
62
+
63
+ # Second hop: Extract invoice numbers for each column (up to 3 columns)
64
+ all_invoice_numbers = []
65
+ for column_name in column_headers[:3]:
66
+ invoice_numbers = extract_invoice_numbers_for_column_from_images(base64_images, column_name)
67
+ remittance_logger.debug(f"Extracted invoice numbers for column '{column_name}': {invoice_numbers}")
68
+ if invoice_numbers: # Only add non-empty lists
69
+ all_invoice_numbers.append(invoice_numbers)
70
+
71
+ # Remove duplicate lists using the utility function
72
+ unique_invoice_numbers = remove_duplicate_lists(all_invoice_numbers)
73
+ return unique_invoice_numbers
74
+
75
+ def extract_column_headers_from_images(base64_images: list[str]) -> list[str]:
76
+ client = anthropic.Anthropic(
77
+ api_key=os.environ.get("ANTHROPIC_API_KEY"),
78
+ )
79
+
80
+ content: list[ContentBlock] = [
81
+ {
82
+ "type": "image",
83
+ "source": {
84
+ "type": "base64",
85
+ "media_type": "image/png",
86
+ "data": image
87
+ }
88
+ } for image in base64_images
89
+ ]
90
+
91
+ message = client.messages.create(
92
+ model="claude-3-5-sonnet-20240620",
93
+ max_tokens=1024,
94
+ temperature=0,
95
+ system="Given the remittance letter images, extract all column header names that could contain invoice numbers. Respond with a comma-separated list only.",
96
+ messages=[
97
+ {
98
+ "role": "user",
99
+ "content": content
100
+ }
101
+ ]
102
+ )
103
+
104
+ remittance_logger.debug(f'Anthropic (raw) response for column headers: {message.content}')
105
+
106
+ return parse_anthropic_response(message.content[0].text)
107
+
108
+ def extract_invoice_numbers_for_column_from_images(base64_images: list[str], column_name: str) -> InvoiceNumbers:
109
+ client = anthropic.Anthropic(
110
+ api_key=os.environ.get("ANTHROPIC_API_KEY"),
111
+ )
112
+
113
+ content: list[ContentBlock] = [
114
+ {
115
+ "type": "image",
116
+ "source": {
117
+ "type": "base64",
118
+ "media_type": "image/png",
119
+ "data": image
120
+ }
121
+ } for image in base64_images
122
+ ]
123
+
124
+ message = client.messages.create(
125
+ model="claude-3-5-sonnet-20240620",
126
+ max_tokens=1024,
127
+ temperature=0,
128
+ system=f"Given the remittance letter images, extract all invoice numbers from the column '{column_name}'. Respond with a comma-separated list only.",
129
+ messages=[
130
+ {
131
+ "role": "user",
132
+ "content": content
133
+ }
134
+ ]
135
+ )
136
+
137
+ remittance_logger.debug(f'Anthropic (raw) response for invoice numbers in column {column_name}: {message.content}')
138
+
139
+ return parse_anthropic_response(message.content[0].text)
140
+
141
+ def parse_anthropic_response(response: str) -> list[str]:
142
+ """
143
+ Parses the response from Claude 3.5 Sonnet model and extracts a list of items.
144
+
145
+ Args:
146
+ response (str): The response string from Claude 3.5 Sonnet model.
147
+
148
+ Returns:
149
+ list[str]: A list of extracted items (invoice numbers or column headers).
150
+ """
151
+ return [item.strip() for item in response.split(',') if item.strip()]
152
+
153
+ def extract_invoice_numbers_from_single_base64_image(base64_image: str, multi_hop: bool = False) -> list[InvoiceNumbers]:
154
+ """
155
+ Extracts invoice numbers from a single base64-encoded image using Anthropic's Claude 3.5 Sonnet model.
156
+
157
+ Args:
158
+ base64_image (str): The base64-encoded image string.
159
+ multi_hop (bool): Whether to use multi-hop processing.
160
+
161
+ Returns:
162
+ list[InvoiceNumbers]: A list containing lists of extracted invoice numbers.
163
+ """
164
+ return extract_invoice_numbers_with_anthropic_ai([base64_image], multi_hop)
165
+
166
+ def extract_invoice_numbers_from_multi_page_images(base64_images: list[str], multi_hop: bool = False) -> list[InvoiceNumbers]:
167
+ """
168
+ Extracts invoice numbers from multiple base64-encoded images using Anthropic's Claude 3.5 Sonnet model.
169
+
170
+ Args:
171
+ base64_images (list[str]): A list of base64-encoded image strings.
172
+ multi_hop (bool): Whether to use multi-hop processing.
173
+
174
+ Returns:
175
+ list[InvoiceNumbers]: A list containing lists of extracted invoice numbers.
176
+ """
177
+ return extract_invoice_numbers_with_anthropic_ai(base64_images, multi_hop)
178
+
179
+
180
+ def extract_payment_amounts_with_anthropic_ai(base64_images: list[str]) -> list[PaymentAmount]:
181
+ """
182
+ Extracts payment amounts from one or more base64-encoded images using Anthropic's Claude 3.5 Sonnet model.
183
+
184
+ Args:
185
+ base64_images (list[str]): A list of base64-encoded image strings.
186
+
187
+ Returns:
188
+ list[PaymentAmount]: A list containing extracted payment amounts.
189
+ """
190
+ client = anthropic.Anthropic(
191
+ api_key=os.environ.get("ANTHROPIC_API_KEY"),
192
+ )
193
+
194
+ # Prepare the message content
195
+ content = []
196
+ for image in base64_images:
197
+ content.append({
198
+ "type": "image",
199
+ "source": {
200
+ "type": "base64",
201
+ "media_type": "image/png",
202
+ "data": image
203
+ }
204
+ })
205
+
206
+ # Call the Anthropic API
207
+ message = client.messages.create(
208
+ model="claude-3-5-sonnet-20240620",
209
+ max_tokens=1024,
210
+ temperature=0,
211
+ system="You are a precise payment amount extractor. Given remittance letter images, extract only the total payment amount. Respond with the numerical amount only, including any decimal places and currency symbols if present. Do not include any additional text or explanations.",
212
+ messages=[
213
+ {
214
+ "role": "user",
215
+ "content": content
216
+ }
217
+ ]
218
+ )
219
+
220
+ remittance_logger.debug(f'Anthropic (raw) response for payment amount: {message.content}')
221
+
222
+ # assert(isinstance(message.content, anthropic.TextBlock))
223
+ # Parse the response
224
+ payment_amount = parse_anthropic_payment_response(message.content[0].text)
225
+ return payment_amount
226
+
227
+ def parse_anthropic_payment_response(response: str) -> list[PaymentAmount]:
228
+ """
229
+ Parses the response from Claude 3.5 Sonnet model and extracts the payment amount.
230
+
231
+ Args:
232
+ response (str): The response string from Claude 3.5 Sonnet model.
233
+
234
+ Returns:
235
+ list[PaymentAmount]: A list containing the extracted payment amount.
236
+ """
237
+ # Strip whitespace and return as a single-item list
238
+ return [response.strip()]
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import gradio as gr
4
+ from gradio_pdf import PDF
5
+ from remittance_pdf_processor import process_pdf_with_flow
6
+ from remittance_pdf_processing_types import Candidate, InvoiceNumbers, PaymentAmount, ProcessedPDFResult
7
+ from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal
8
+
9
+
10
+ def custom_invoice_numbers_verifier(invoice_numbers: InvoiceNumbers, ground_truth_invoices: str) -> InvoiceNumbers:
11
+ ground_truth_set = set(map(str.lower, map(str.strip, ground_truth_invoices.split(','))))
12
+ return [num for num in invoice_numbers if num.lower() in ground_truth_set]
13
+
14
+ def custom_invoice_and_amount_verifier(invoice_numbers: InvoiceNumbers, amount: PaymentAmount, ground_truth_invoices: str, ground_truth_amount: str) -> bool:
15
+ return format_amount_str_to_decimal(amount) == format_amount_str_to_decimal(ground_truth_amount)
16
+
17
+ def process_pdf_file(pdf_file_path: str, ground_truth_invoices: str, ground_truth_amount: str):
18
+ try:
19
+ invoice_verifier = lambda x: custom_invoice_numbers_verifier(x, ground_truth_invoices)
20
+ invoice_and_amount_verifier = lambda x, y: custom_invoice_and_amount_verifier(x, y, ground_truth_invoices, ground_truth_amount)
21
+
22
+ extracted_result: ProcessedPDFResult = process_pdf_with_flow(
23
+ pdf_file_path,
24
+ invoice_verifier=invoice_verifier,
25
+ invoice_and_amount_verifier=invoice_and_amount_verifier
26
+ )
27
+
28
+ verified_candidate, unverified_candidate = extracted_result
29
+
30
+ result = ""
31
+ result += f"Extracted invoice numbers and payment amounts:\n"
32
+ result += f"Ground Truth Invoices: {ground_truth_invoices}\n"
33
+ result += f"Ground Truth Amount: {ground_truth_amount}\n"
34
+ result += "-" * 40 + "\n"
35
+
36
+ # Print Invoice Numbers
37
+ result += "Invoice Numbers:\n"
38
+ for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
39
+ status = "Verified" if any(set(invoice_numbers) == set(v) for v in verified_candidate[0]) else "Unverified"
40
+ result += f" Candidate {i} ({status}): {', '.join(invoice_numbers)}\n"
41
+ result += "\n"
42
+
43
+ # Print Payment Amounts
44
+ result += "Payment Amounts:\n"
45
+ for i, amount in enumerate(unverified_candidate[1], 1):
46
+ status = "Verified" if amount in verified_candidate[1] else "Unverified"
47
+ result += f" Candidate {i} ({status}): {amount}\n"
48
+
49
+ return result
50
+ except Exception as e:
51
+ error_message = f"An error occurred while processing the PDF: {str(e)}"
52
+ remittance_logger.error(error_message, exc_info=True)
53
+ return error_message
54
+
55
+ # Create the Gradio interface
56
+ with gr.Blocks() as iface:
57
+ gr.Markdown("# Remittance PDF Processor")
58
+ gr.Markdown("Upload a PDF file to extract invoice numbers and payment amounts. Provide ground truth data for comparison.")
59
+
60
+ with gr.Row():
61
+ with gr.Column(scale=1):
62
+ pdf_input = PDF(label="Upload PDF", visible=True)
63
+
64
+ with gr.Column(scale=1):
65
+ ground_truth_invoices = gr.Textbox(label="Ground Truth Invoice Numbers (comma-separated)")
66
+ ground_truth_amount = gr.Textbox(label="Ground Truth Payment Amount")
67
+
68
+ submit_button = gr.Button("Process PDF")
69
+
70
+ output = gr.Textbox(label="Processing Results", lines=20)
71
+
72
+ submit_button.click(
73
+ process_pdf_file,
74
+ inputs=[pdf_input, ground_truth_invoices, ground_truth_amount],
75
+ outputs=[output]
76
+ )
77
+
78
+ # Launch the interface
79
+ iface.launch()
remittance_pdf_processing_cli.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # File: remittance_pdf_processing_cli.py
2
+
3
+ import argparse
4
+ from remittance_pdf_processor import process_pdf, extract_text_from_pdf
5
+ from ccmt_verifier_aus import verify_invoice_numbers
6
+ from remittance_pdf_processing_types import Candidate, ProcessedPDFResult
7
+
8
+ def main():
9
+ parser = argparse.ArgumentParser(description="Extract text and invoice numbers from a PDF file.")
10
+ parser.add_argument("pdf_file", help="Path to the PDF file")
11
+ parser.add_argument("-o", "--output", help="Output file path for extracted text (optional)")
12
+ parser.add_argument("-v", "--verbose", action="store_true", help="Print extracted text from PDF")
13
+ parser.add_argument("-f", "--force-image", action="store_true", help="Force processing PDF as image")
14
+ parser.add_argument("--verify", action="store_true", help="Enable invoice number verification")
15
+ parser.add_argument("--multi-hop", action="store_true", help="Force multi-hop processing")
16
+
17
+ args = parser.parse_args()
18
+
19
+ extracted_text = extract_text_from_pdf(args.pdf_file, wrap_pages=True)
20
+
21
+ # Determine the invoice verifier function
22
+ invoice_verifier = verify_invoice_numbers if args.verify else None
23
+
24
+ extracted_result: ProcessedPDFResult = process_pdf(args.pdf_file,
25
+ force_image_processing=args.force_image,
26
+ force_multi_hop=args.multi_hop,
27
+ invoice_verifier=invoice_verifier)
28
+
29
+ verified_candidate, unverified_candidate = extracted_result
30
+
31
+ if args.verbose:
32
+ print("Extracted text from PDF:")
33
+ print("-" * 40)
34
+ print(extracted_text)
35
+ print("-" * 40)
36
+ print()
37
+
38
+ print("Extracted invoice numbers and payment amounts:")
39
+ print(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}")
40
+ print(f"Verification: {'Enabled' if args.verify else 'Disabled'}")
41
+ print("-" * 40)
42
+
43
+ print("Verified Results:")
44
+ if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1:
45
+ print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}")
46
+ print(f" Verified Amount: {verified_candidate[1][0]}")
47
+ elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0:
48
+ print(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}")
49
+ print(" Amount doesn't match. This might be a partial payment.")
50
+ elif len(verified_candidate[0]) >= 2:
51
+ print(" Verified Invoice Numbers:")
52
+ for i, invoice_numbers in enumerate(verified_candidate[0], 1):
53
+ print(f" List {i}: {', '.join(invoice_numbers)}")
54
+ else:
55
+ print(" No verified results.")
56
+
57
+ print("\nUnverified Invoice Numbers:")
58
+ for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
59
+ print(f" Candidate {i}: {', '.join(invoice_numbers)}")
60
+
61
+ print("\nUnverified Payment Amounts:")
62
+ for i, amount in enumerate(unverified_candidate[1], 1):
63
+ print(f" Candidate {i}: {amount}")
64
+
65
+ print("-" * 40)
66
+
67
+ if args.output:
68
+ with open(args.output, 'w', encoding='utf-8') as f:
69
+ f.write("Extracted text:\n")
70
+ f.write(extracted_text)
71
+ f.write("\n\nExtracted invoice numbers and payment amounts:\n")
72
+ f.write(f"Multi-hop processing: {'Enabled' if args.multi_hop else 'Disabled'}\n")
73
+ f.write(f"Verification: {'Enabled' if args.verify else 'Disabled'}\n")
74
+ f.write("Verified Results:\n")
75
+ if len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 1:
76
+ f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n")
77
+ f.write(f" Verified Amount: {verified_candidate[1][0]}\n")
78
+ elif len(verified_candidate[0]) == 1 and len(verified_candidate[1]) == 0:
79
+ f.write(f" Verified Invoice Numbers: {', '.join(verified_candidate[0][0])}\n")
80
+ f.write(" Amount doesn't match. This might be a partial payment.\n")
81
+ elif len(verified_candidate[0]) >= 2:
82
+ f.write(" Verified Invoice Numbers:\n")
83
+ for i, invoice_numbers in enumerate(verified_candidate[0], 1):
84
+ f.write(f" List {i}: {', '.join(invoice_numbers)}\n")
85
+ else:
86
+ f.write(" No verified results.\n")
87
+ f.write("\nUnverified Invoice Numbers:\n")
88
+ for i, invoice_numbers in enumerate(unverified_candidate[0], 1):
89
+ f.write(f" Candidate {i}: {', '.join(invoice_numbers)}\n")
90
+ f.write("\nUnverified Payment Amounts:\n")
91
+ for i, amount in enumerate(unverified_candidate[1], 1):
92
+ f.write(f" Candidate {i}: {amount}\n")
93
+ print(f"Extracted text and invoice numbers written to {args.output}")
94
+
95
+ if __name__ == "__main__":
96
+ main()
remittance_pdf_processing_types.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Callable, Optional, Literal
2
+
3
+ # Type alias for invoice numbers
4
+ InvoiceNumbers = list[str]
5
+
6
+ # Type alias for invoice verifier function
7
+ InvoiceVerifier = Callable[[InvoiceNumbers], InvoiceNumbers]
8
+
9
+ # Type alias for document type
10
+ DocumentType = Literal['single', 'multi']
11
+
12
+ # Type alias for extractor function
13
+ ExtractorFunction = Callable[[str], list[InvoiceNumbers]]
14
+
15
+ # Add any other shared type definitions related to remittance PDF processing here
16
+ PaymentAmount = str
17
+
18
+ Candidate = tuple[list[InvoiceNumbers], list[PaymentAmount]]
19
+
20
+ # Add this line to remittance_pdf_processing_types.py
21
+ ProcessedPDFResult = tuple[Candidate, Candidate]
22
+
23
+ # New type alias for invoice list and amount verifier function
24
+ InvoiceListAndAmountVerifier = Callable[[InvoiceNumbers, PaymentAmount], bool]
remittance_pdf_processing_utils.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+
3
+ def setup_logger():
4
+ # Create a logger
5
+ logger = logging.getLogger('remittance_processing')
6
+ logger.setLevel(logging.DEBUG)
7
+
8
+ # Create handlers
9
+ c_handler = logging.StreamHandler()
10
+ f_handler = logging.FileHandler('remittance_processing.log')
11
+ c_handler.setLevel(logging.INFO)
12
+ f_handler.setLevel(logging.DEBUG)
13
+
14
+ # Create formatters and add it to handlers
15
+ log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
16
+ c_handler.setFormatter(log_format)
17
+ f_handler.setFormatter(log_format)
18
+
19
+ # Add handlers to the logger
20
+ logger.addHandler(c_handler)
21
+ logger.addHandler(f_handler)
22
+
23
+ return logger
24
+
25
+ remittance_logger = setup_logger()
26
+
27
+ def remove_duplicate_lists(lists):
28
+ """
29
+ Remove duplicate lists from a list of lists.
30
+ Args:
31
+ lists:
32
+ a list of lists of strings
33
+ Returns:
34
+ a list of lists of strings, where each list is unique
35
+ """
36
+ seen = set()
37
+ unique_lists = []
38
+
39
+ for lst in lists:
40
+ sorted_list = tuple(sorted(lst))
41
+ if sorted_list not in seen:
42
+ seen.add(sorted_list)
43
+ unique_lists.append(lst)
44
+
45
+ return unique_lists
46
+
47
+
48
+ import re
49
+ from decimal import Decimal, ROUND_HALF_UP
50
+
51
+ def format_amount_str_to_decimal(amount_str: str) -> str:
52
+ def standardize_number(s):
53
+ # Find the last occurrence of a comma or period
54
+ last_separator_index = max(s.rfind(','), s.rfind('.'))
55
+ if last_separator_index != -1:
56
+ # Split the string into two parts
57
+ before_separator = s[:last_separator_index]
58
+ after_separator = s[last_separator_index+1:]
59
+
60
+ # Clean the first part of any commas, periods, or whitespace
61
+ before_separator_cleaned = re.sub(r'[.,\s]', '', before_separator)
62
+
63
+ # Ensure the decimal part starts with a period, even if it was a comma
64
+ standardized_s = before_separator_cleaned + '.' + after_separator
65
+ else:
66
+ # If there's no separator, just remove commas, periods, or whitespace
67
+ standardized_s = re.sub(r'[.,\s]', '', s)
68
+
69
+ return standardized_s
70
+
71
+ def remove_chars_after_last_digit(s):
72
+ # Remove any non-digit characters following the last digit in the string
73
+ return re.sub(r'(?<=\d)[^\d]*$', '', s)
74
+
75
+ def clean_text(s):
76
+ # This pattern looks for:
77
+ # - Optional non-digit or non-negative sign characters followed by whitespace (if any)
78
+ # - Followed by any characters until a digit is found in the word
79
+ # It then replaces this matched portion with the remaining part of the word from the first digit
80
+ cleaned_s = re.sub(r'[^\d-]*\s?(\S*\d\S*)', r'\1', s)
81
+ return cleaned_s
82
+
83
+ # Run functions to format a text decimal
84
+ cleaned_amount = clean_text(remove_chars_after_last_digit(standardize_number(amount_str.strip().lower())))
85
+
86
+ # Convert to Decimal and round to 2 decimal places
87
+ try:
88
+ decimal_amount = Decimal(cleaned_amount).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
89
+ return f"{decimal_amount:.2f}"
90
+ except:
91
+ # If conversion fails, return the cleaned string as is
92
+ return cleaned_amount
remittance_pdf_processor.py ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pdfplumber
2
+ from typing import Optional, Callable, Literal
3
+ import base64
4
+ import io
5
+ from PIL import Image
6
+ from remittance_pdf_processing_utils import remittance_logger, format_amount_str_to_decimal
7
+ from vertex_api_invoice_extractor import extract_invoice_numbers_with_vertex_ai, extract_invoice_numbers_from_text_with_vertex_ai, extract_payment_amounts_with_vertex_ai, extract_payment_amounts_from_text_with_vertex_ai
8
+ # from dspy_invoice_extractors import SinglePageInvoiceExtractor, MultiPageInvoiceExtractor
9
+ from remittance_pdf_processing_types import InvoiceNumbers, InvoiceVerifier, DocumentType, ExtractorFunction, PaymentAmount, Candidate, ProcessedPDFResult, InvoiceListAndAmountVerifier
10
+ from anthropic_api_invoice_extractor import extract_invoice_numbers_with_anthropic_ai, extract_payment_amounts_with_anthropic_ai
11
+
12
+ def is_text_based_pdf(pdf: pdfplumber.PDF) -> bool:
13
+ text_threshold = 100 # Minimum number of characters to consider it text-based
14
+ for page in pdf.pages:
15
+ if len(page.extract_text()) > text_threshold:
16
+ return True
17
+ return False
18
+
19
+ def determine_document_type(pdf: pdfplumber.PDF) -> DocumentType:
20
+ return 'single' if len(pdf.pages) == 1 else 'multi'
21
+
22
+ def extract_text_from_pdf(pdf_path: str, wrap_pages: bool = False) -> str:
23
+ with pdfplumber.open(pdf_path) as pdf:
24
+ if not wrap_pages:
25
+ # Keep the current behavior
26
+ return "\n".join(page.extract_text() for page in pdf.pages)
27
+ else:
28
+ # Implement new wrapping behavior
29
+ pages_text = []
30
+ for i, page in enumerate(pdf.pages, start=1):
31
+ page_text = page.extract_text()
32
+ wrapped_page = f"<page_{i}>\n{page_text}\n</page_{i}>"
33
+ pages_text.append(wrapped_page)
34
+
35
+ all_pages_text = "\n".join(pages_text)
36
+ return f"<remittance>\n{all_pages_text}\n</remittance>"
37
+
38
+ # def InvoiceExtractor(doc_type: DocumentType) -> ExtractorFunction:
39
+ # if doc_type == 'single':
40
+ # def single_page_extractor(text: str) -> list[InvoiceNumbers]:
41
+ # return []
42
+ # return single_page_extractor
43
+ # else:
44
+ # def multi_page_extractor(text: str) -> list[InvoiceNumbers]:
45
+ # return []
46
+ # return multi_page_extractor
47
+
48
+ def extract_invoice_numbers_from_text(
49
+ text: str,
50
+ doc_type: DocumentType,
51
+ multi_hop: bool = False
52
+ ) -> list[InvoiceNumbers]:
53
+ remittance_logger.info(f"Extracting invoice numbers from {doc_type}-page text-based document (multi_hop: {multi_hop})")
54
+
55
+ # Call the Vertex AI extractor
56
+ return extract_invoice_numbers_from_text_with_vertex_ai(text, multi_hop)
57
+
58
+ def extract_invoice_numbers_from_single_base64_image(base64_image: str, multi_hop: bool = False) -> list[InvoiceNumbers]:
59
+ remittance_logger.debug(f"Extracting invoice numbers from a single base64 image using Vertex AI (multi_hop: {multi_hop})")
60
+ return extract_invoice_numbers_with_vertex_ai(base64_image, multi_hop)
61
+
62
+ def extract_invoice_numbers_from_multi_page_images(base64_images: list[str], multi_hop: bool = False) -> list[InvoiceNumbers]:
63
+ remittance_logger.debug(f"Extracting invoice numbers from {len(base64_images)} base64 images using Anthropic AI (multi_hop: {multi_hop})")
64
+ return extract_invoice_numbers_with_anthropic_ai(base64_images, multi_hop)
65
+
66
+ def extract_invoice_numbers_from_base64_images(base64_images: list[str], multi_hop: bool = False) -> list[InvoiceNumbers]:
67
+ remittance_logger.info(f"Extracting invoice numbers from {len(base64_images)} base64 image(s) (multi_hop: {multi_hop})")
68
+
69
+ if len(base64_images) == 1:
70
+ return extract_invoice_numbers_from_single_base64_image(base64_images[0], multi_hop)
71
+ else:
72
+ return extract_invoice_numbers_from_multi_page_images(base64_images, multi_hop)
73
+
74
+ def extract_invoice_numbers_from_image(
75
+ pdf: pdfplumber.PDF,
76
+ multi_hop: bool = False,
77
+ dpi: int = 257 # Number choosen for optimal resolution for Gemini Flash 1.5 model
78
+ ) -> list[InvoiceNumbers]:
79
+ remittance_logger.info(f"Extracting invoice numbers from {len(pdf.pages)}-page image-based document (multi_hop: {multi_hop})")
80
+
81
+ base64_images = []
82
+ for page in pdf.pages:
83
+ img = page.to_image(resolution=dpi)
84
+ img_bytes = io.BytesIO()
85
+ img.save(img_bytes, format='PNG')
86
+ img_base64 = base64.b64encode(img_bytes.getvalue()).decode('utf-8')
87
+ base64_images.append(img_base64)
88
+
89
+ return extract_invoice_numbers_from_base64_images(base64_images, multi_hop)
90
+
91
+ def extract_invoices_from_pdf(pdf_path: str, force_image_processing: bool = False, invoice_verifier: InvoiceVerifier | None = None, force_multi_hop: bool = False) -> tuple[list[InvoiceNumbers], list[InvoiceNumbers]]:
92
+ with pdfplumber.open(pdf_path) as pdf:
93
+ doc_type = determine_document_type(pdf)
94
+
95
+ for multi_hop in [True] if force_multi_hop else [False, True]:
96
+ # if doc_type == 'single' or force_image_processing:
97
+ if force_image_processing:
98
+ invoice_numbers_candidates = extract_invoice_numbers_from_image(pdf, multi_hop=multi_hop)
99
+ else:
100
+ is_text_based = is_text_based_pdf(pdf)
101
+ if is_text_based:
102
+ text = extract_text_from_pdf(pdf_path, wrap_pages=True)
103
+ invoice_numbers_candidates = extract_invoice_numbers_from_text(text, doc_type, multi_hop=multi_hop)
104
+ else:
105
+ invoice_numbers_candidates = extract_invoice_numbers_from_image(pdf, multi_hop=multi_hop)
106
+
107
+ if invoice_verifier:
108
+ verified_invoices = [
109
+ invoice_verifier(invoice_numbers) or []
110
+ for invoice_numbers in invoice_numbers_candidates
111
+ ]
112
+ # Filter out empty lists for verified invoices
113
+ verified_result = [invoices for invoices in verified_invoices if invoices]
114
+ else:
115
+ verified_result = [] # When there's no verifier, the verified list should be empty
116
+
117
+ remittance_logger.info(f"Extracted invoice numbers (post verification, multi_hop={multi_hop}): {verified_result}")
118
+ # If we found invoices (either verified or unverified), return them
119
+ if verified_result or invoice_numbers_candidates:
120
+ return verified_result, invoice_numbers_candidates
121
+
122
+ # If we've tried both with and without multi_hop and found nothing, return empty lists
123
+ remittance_logger.warning("No invoice numbers found after trying both single-hop and multi-hop processing.")
124
+ return [], []
125
+
126
+ def extract_payment_amounts_from_single_base64_image(base64_image: str) -> list[PaymentAmount]:
127
+ remittance_logger.debug("Extracting payment amounts from a single base64 image using Vertex AI")
128
+ return extract_payment_amounts_with_vertex_ai(base64_image)
129
+
130
+ def extract_payment_amounts_from_multi_page_images(base64_images: list[str]) -> list[PaymentAmount]:
131
+ remittance_logger.debug(f"Extracting payment amounts from {len(base64_images)} base64 images using Anthropic AI")
132
+ return extract_payment_amounts_with_anthropic_ai(base64_images)
133
+
134
+ def extract_payment_amounts_from_base64_images(base64_images: list[str]) -> list[PaymentAmount]:
135
+ remittance_logger.info(f"Extracting payment amounts from {len(base64_images)} base64 image(s)")
136
+
137
+ if len(base64_images) == 1:
138
+ return extract_payment_amounts_from_single_base64_image(base64_images[0])
139
+ else:
140
+ return extract_payment_amounts_from_multi_page_images(base64_images)
141
+
142
+ def extract_payment_amounts_from_pdf(pdf_path: str, force_image_processing: bool = False, payment_amount_formatter: Callable[[str], str] | None = None) -> list[PaymentAmount]:
143
+ with pdfplumber.open(pdf_path) as pdf:
144
+ doc_type = determine_document_type(pdf)
145
+
146
+ if doc_type == 'single' or force_image_processing:
147
+ payment_amounts = extract_payment_amounts_from_image(pdf)
148
+ else:
149
+ is_text_based = is_text_based_pdf(pdf)
150
+ if is_text_based:
151
+ text = extract_text_from_pdf(pdf_path, wrap_pages=True)
152
+ payment_amounts = extract_payment_amounts_from_text(text, doc_type)
153
+ else:
154
+ payment_amounts = extract_payment_amounts_from_image(pdf)
155
+
156
+ if payment_amount_formatter:
157
+ payment_amounts = [payment_amount_formatter(amount) for amount in payment_amounts]
158
+
159
+ return payment_amounts
160
+
161
+ def extract_payment_amounts_from_text(text: str, doc_type: DocumentType) -> list[PaymentAmount]:
162
+ remittance_logger.info(f"Extracting payment amounts from {doc_type}-page text-based document")
163
+
164
+ # Call the Vertex AI extractor
165
+ return extract_payment_amounts_from_text_with_vertex_ai(text)
166
+
167
+ def extract_payment_amounts_from_image(pdf: pdfplumber.PDF, dpi: int = 257) -> list[PaymentAmount]:
168
+ remittance_logger.info(f"Extracting payment amounts from {len(pdf.pages)}-page image-based document")
169
+
170
+ base64_images = []
171
+ for page in pdf.pages:
172
+ img = page.to_image(resolution=dpi)
173
+ img_bytes = io.BytesIO()
174
+ img.save(img_bytes, format='PNG')
175
+ img_base64 = base64.b64encode(img_bytes.getvalue()).decode('utf-8')
176
+ base64_images.append(img_base64)
177
+
178
+ return extract_payment_amounts_from_base64_images(base64_images)
179
+
180
+
181
+ def process_pdf(pdf_path: str, force_image_processing: bool = False, force_multi_hop: bool = False, invoice_verifier: InvoiceVerifier | None = None, invoice_and_amount_verifier: InvoiceListAndAmountVerifier | None = None) -> ProcessedPDFResult:
182
+ verified_invoice_numbers, unverified_invoice_numbers = extract_invoices_from_pdf(
183
+ pdf_path,
184
+ force_image_processing,
185
+ invoice_verifier,
186
+ force_multi_hop=force_multi_hop
187
+ )
188
+ payment_amounts = extract_payment_amounts_from_pdf(pdf_path, force_image_processing, payment_amount_formatter=format_amount_str_to_decimal)
189
+ remittance_logger.debug(f"Extracted payment amounts: {payment_amounts}")
190
+
191
+ verified_payment_amounts = []
192
+ if invoice_and_amount_verifier and len(verified_invoice_numbers) == 1:
193
+ for amount in payment_amounts:
194
+ if invoice_and_amount_verifier(verified_invoice_numbers[0], amount):
195
+ verified_payment_amounts = [amount]
196
+ break
197
+
198
+ verified_candidate = (verified_invoice_numbers, verified_payment_amounts)
199
+ unverified_candidate = (unverified_invoice_numbers, payment_amounts)
200
+ return verified_candidate, unverified_candidate
201
+
202
+ # from typing import list, tuple
203
+
204
+ def process_pdf_with_flow(
205
+ pdf_path: str,
206
+ invoice_verifier: InvoiceVerifier | None = None,
207
+ invoice_and_amount_verifier: InvoiceListAndAmountVerifier | None = None
208
+ ) -> ProcessedPDFResult:
209
+ """
210
+ Process a PDF file using a specific flow of extraction methods.
211
+
212
+ Args:
213
+ pdf_path (str): Path to the PDF file.
214
+ invoice_verifier (InvoiceVerifier | None): Function to verify invoice numbers.
215
+ invoice_and_amount_verifier (InvoiceListAndAmountVerifier | None): Function to verify invoice numbers and amount pairs.
216
+
217
+ Returns:
218
+ ProcessedPDFResult: A tuple containing verified and unverified candidates.
219
+ """
220
+ all_verified_invoices: list[InvoiceNumbers] = []
221
+ all_verified_amounts: list[PaymentAmount] = []
222
+ all_unverified_invoices: list[InvoiceNumbers] = []
223
+ all_unverified_amounts: list[PaymentAmount] = []
224
+
225
+ with pdfplumber.open(pdf_path) as pdf:
226
+ is_text_based = is_text_based_pdf(pdf)
227
+
228
+ if is_text_based:
229
+ # Try single hop text processing
230
+ text = extract_text_from_pdf(pdf_path, wrap_pages=True)
231
+ result = process_text_based(text, invoice_verifier, invoice_and_amount_verifier, multi_hop=False)
232
+ if has_single_verified_pair(result):
233
+ return result
234
+ accumulate_candidates(result, all_verified_invoices, all_verified_amounts, all_unverified_invoices, all_unverified_amounts)
235
+ remittance_logger.debug(f"Result snapshot - single hop text processing: {result}")
236
+
237
+ # Try multi hop text processing
238
+ result = process_text_based(text, invoice_verifier, invoice_and_amount_verifier, multi_hop=True)
239
+ if has_single_verified_pair(result):
240
+ return result
241
+ accumulate_candidates(result, all_verified_invoices, all_verified_amounts, all_unverified_invoices, all_unverified_amounts)
242
+ remittance_logger.debug(f"Result snapshot - multi hop text processing: {result}")
243
+
244
+ # Try single hop image processing
245
+ result = process_image_based(pdf, invoice_verifier, invoice_and_amount_verifier, multi_hop=False)
246
+ if has_single_verified_pair(result):
247
+ return result
248
+ accumulate_candidates(result, all_verified_invoices, all_verified_amounts, all_unverified_invoices, all_unverified_amounts)
249
+ remittance_logger.debug(f"Result snapshot - single hop image processing: {result}")
250
+
251
+ # Try multi hop image processing
252
+ result = process_image_based(pdf, invoice_verifier, invoice_and_amount_verifier, multi_hop=True)
253
+ if has_single_verified_pair(result):
254
+ return result
255
+ accumulate_candidates(result, all_verified_invoices, all_verified_amounts, all_unverified_invoices, all_unverified_amounts)
256
+ remittance_logger.debug(f"Result snapshot - multi hop image processing: {result}")
257
+
258
+ # If no single verified pair is found, return all accumulated candidates
259
+ return (all_verified_invoices, all_verified_amounts), (all_unverified_invoices, all_unverified_amounts)
260
+
261
+ def process_text_based(
262
+ text: str,
263
+ invoice_verifier: InvoiceVerifier | None,
264
+ invoice_and_amount_verifier: InvoiceListAndAmountVerifier | None,
265
+ multi_hop: bool
266
+ ) -> ProcessedPDFResult:
267
+ invoice_numbers = extract_invoice_numbers_from_text(text, 'multi', multi_hop)
268
+ payment_amounts = extract_payment_amounts_from_text(text, 'multi')
269
+
270
+ return verify_candidates(invoice_numbers, payment_amounts, invoice_verifier, invoice_and_amount_verifier)
271
+
272
+ def process_image_based(
273
+ pdf: pdfplumber.PDF,
274
+ invoice_verifier: InvoiceVerifier | None,
275
+ invoice_and_amount_verifier: InvoiceListAndAmountVerifier | None,
276
+ multi_hop: bool
277
+ ) -> ProcessedPDFResult:
278
+ invoice_numbers = extract_invoice_numbers_from_image(pdf, multi_hop)
279
+ payment_amounts = extract_payment_amounts_from_image(pdf)
280
+
281
+ return verify_candidates(invoice_numbers, payment_amounts, invoice_verifier, invoice_and_amount_verifier)
282
+
283
+ def verify_candidates(
284
+ invoice_numbers: list[InvoiceNumbers],
285
+ payment_amounts: list[PaymentAmount],
286
+ invoice_verifier: InvoiceVerifier | None,
287
+ invoice_and_amount_verifier: InvoiceListAndAmountVerifier | None
288
+ ) -> ProcessedPDFResult:
289
+ verified_invoices = []
290
+ verified_amounts = []
291
+
292
+ if invoice_verifier:
293
+ verified_invoices = [invoice_verifier(inv) for inv in invoice_numbers if invoice_verifier(inv)]
294
+
295
+ if invoice_and_amount_verifier and len(verified_invoices) == 1:
296
+ for amount in payment_amounts:
297
+ if invoice_and_amount_verifier(verified_invoices[0], amount):
298
+ verified_amounts = [amount]
299
+ break
300
+
301
+ return (verified_invoices, verified_amounts), (invoice_numbers, payment_amounts)
302
+
303
+ def has_single_verified_pair(result: ProcessedPDFResult) -> bool:
304
+ verified, _ = result
305
+ return len(verified[0]) == 1 and len(verified[1]) == 1
306
+
307
+ def accumulate_candidates(
308
+ result: ProcessedPDFResult,
309
+ all_verified_invoices: list[InvoiceNumbers],
310
+ all_verified_amounts: list[PaymentAmount],
311
+ all_unverified_invoices: list[InvoiceNumbers],
312
+ all_unverified_amounts: list[PaymentAmount]
313
+ ) -> None:
314
+ verified, unverified = result
315
+
316
+ # Helper function to add unique items to a list
317
+ def add_unique(items: list, new_items: list) -> None:
318
+ for item in new_items:
319
+ if isinstance(item, list): # For invoice numbers
320
+ if not any(set(item) == set(existing) for existing in items):
321
+ items.append(item)
322
+ else: # For payment amounts
323
+ if item not in items:
324
+ items.append(item)
325
+
326
+ add_unique(all_verified_invoices, verified[0])
327
+ add_unique(all_verified_amounts, verified[1])
328
+ add_unique(all_unverified_invoices, unverified[0])
329
+ add_unique(all_unverified_amounts, unverified[1])
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio==4.41.0
2
+ gradio-pdf==0.0.12
3
+ pdfplumber==0.11.3
4
+ google-cloud-aiplatform==1.61.0
5
+ google-auth==2.33.0
6
+ google-api-python-client==2.140.0
7
+ anthropic==0.33.1
vertex_api_invoice_extractor.py ADDED
@@ -0,0 +1,594 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import os
4
+ from google.oauth2 import service_account
5
+ import vertexai
6
+ from remittance_pdf_processing_utils import remittance_logger
7
+ from vertexai.generative_models import GenerativeModel, Part
8
+ import vertexai.preview.generative_models as generative_models
9
+ from remittance_pdf_processing_types import InvoiceNumbers,PaymentAmount
10
+ from remittance_pdf_processing_utils import remove_duplicate_lists
11
+
12
+ # Set up authentication
13
+ def initialize_vertexai():
14
+ # Get the base64-encoded service account JSON from an environment variable
15
+ encoded_sa_json = os.environ.get('VERTEX_AI_SERVICE_ACCOUNT_JSON')
16
+
17
+ if not encoded_sa_json:
18
+ raise ValueError("VERTEX_AI_SERVICE_ACCOUNT_JSON environment variable is not set")
19
+
20
+ try:
21
+ # Decode the base64 string to get the JSON content
22
+ sa_json_str = base64.b64decode(encoded_sa_json).decode('utf-8')
23
+ sa_info = json.loads(sa_json_str)
24
+
25
+ # Create credentials object from the decoded JSON
26
+ credentials = service_account.Credentials.from_service_account_info(
27
+ sa_info,
28
+ scopes=['https://www.googleapis.com/auth/cloud-platform']
29
+ )
30
+
31
+ # Initialize Vertex AI with the credentials
32
+ vertexai.init(project="saltech-ai-sandbox", location="us-central1", credentials=credentials)
33
+
34
+ print("Vertex AI initialized successfully.")
35
+ except json.JSONDecodeError:
36
+ raise ValueError("Invalid JSON format in the decoded service account information")
37
+ except Exception as e:
38
+ raise Exception(f"Error initializing Vertex AI: {str(e)}")
39
+
40
+
41
+ # Call this function at the start of your script or in your main function
42
+ initialize_vertexai()
43
+
44
+ def extract_invoice_numbers_with_vertex_ai(base64_image: str, multi_hop: bool = False) -> list[InvoiceNumbers]:
45
+ """
46
+ Dispatches the invoice number extraction to either single-hop or multi-hop method based on the multi_hop parameter.
47
+
48
+ Args:
49
+ base64_image (str): The base64-encoded image string.
50
+ multi_hop (bool): Whether to use multi-hop processing.
51
+
52
+ Returns:
53
+ list[InvoiceNumbers]: A list containing lists of extracted invoice numbers.
54
+ """
55
+ if multi_hop:
56
+ return extract_invoice_numbers_with_vertex_ai_multi_hop(base64_image)
57
+ else:
58
+ return extract_invoice_numbers_with_vertex_ai_single_hop(base64_image)
59
+
60
+ def extract_invoice_numbers_with_vertex_ai_single_hop(base64_image: str) -> list[InvoiceNumbers]:
61
+ """
62
+ Extracts invoice numbers from a single base64-encoded image using Google's Gemini Flash model with single-hop processing.
63
+
64
+ Args:
65
+ base64_image (str): The base64-encoded image string.
66
+
67
+ Returns:
68
+ list[InvoiceNumbers]: A list containing lists of extracted invoice numbers.
69
+ """
70
+ vertexai.init(project="saltech-ai-sandbox", location="us-central1")
71
+ model = GenerativeModel("gemini-1.5-flash-001")
72
+
73
+ image_part = Part.from_data(
74
+ mime_type="image/png",
75
+ data=base64.b64decode(base64_image),
76
+ )
77
+
78
+ text_prompt = """Given the remittance letter image, extract all invoice numbers.
79
+ Respond with a comma-separated list of invoice numbers only.
80
+ If no invoice numbers are found, respond with 'No invoice numbers found'."""
81
+
82
+ generation_config = {
83
+ "max_output_tokens": 8192,
84
+ "temperature": 0.1,
85
+ "top_p": 0.95,
86
+ }
87
+
88
+ safety_settings = {
89
+ generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
90
+ generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
91
+ generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
92
+ generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
93
+ }
94
+
95
+ responses = model.generate_content(
96
+ [image_part, text_prompt],
97
+ generation_config=generation_config,
98
+ safety_settings=safety_settings,
99
+ stream=True,
100
+ )
101
+
102
+ full_response = ""
103
+ for response in responses:
104
+ full_response += response.text
105
+
106
+ remittance_logger.debug(f"Extracted invoice numbers (raw model response): {full_response}")
107
+
108
+ extracted_numbers = parse_gemini_response(full_response)
109
+ return [extracted_numbers] # Wrap in a list to match the expected return type
110
+
111
+ def extract_column_headers(base64_image: str) -> list[str]:
112
+ """
113
+ Extracts column header names that could contain invoice numbers from a base64-encoded image.
114
+
115
+ Args:
116
+ base64_image (str): The base64-encoded image string.
117
+
118
+ Returns:
119
+ list[str]: A list of column header names.
120
+ """
121
+ vertexai.init(project="saltech-ai-sandbox", location="us-central1")
122
+ model = GenerativeModel("gemini-1.5-flash-001")
123
+
124
+ image_part = Part.from_data(
125
+ mime_type="image/png",
126
+ data=base64.b64decode(base64_image),
127
+ )
128
+
129
+ text_prompt = """Given the remittance letter image, extract all column header names that could contain invoice numbers.
130
+ Respond with a comma-separated list only."""
131
+
132
+ generation_config = {
133
+ "max_output_tokens": 8192,
134
+ "temperature": 0.1,
135
+ "top_p": 0.95,
136
+ }
137
+
138
+ safety_settings = {
139
+ generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
140
+ generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
141
+ generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
142
+ generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
143
+ }
144
+
145
+ responses = model.generate_content(
146
+ [image_part, text_prompt],
147
+ generation_config=generation_config,
148
+ safety_settings=safety_settings,
149
+ stream=True,
150
+ )
151
+
152
+ full_response = ""
153
+ for response in responses:
154
+ full_response += response.text
155
+
156
+ remittance_logger.debug(f"Extracted column headers (raw model response): {full_response}")
157
+
158
+ return [header.strip() for header in full_response.split(',')]
159
+
160
+ def extract_invoice_numbers_for_column(base64_image: str, column_name: str) -> InvoiceNumbers:
161
+ """
162
+ Extracts invoice numbers from a specific column in a base64-encoded image.
163
+
164
+ Args:
165
+ base64_image (str): The base64-encoded image string.
166
+ column_name (str): The name of the column to extract invoice numbers from.
167
+
168
+ Returns:
169
+ InvoiceNumbers: A list of extracted invoice numbers for the specified column.
170
+ """
171
+ vertexai.init(project="saltech-ai-sandbox", location="us-central1")
172
+ model = GenerativeModel("gemini-1.5-flash-001")
173
+
174
+ image_part = Part.from_data(
175
+ mime_type="image/png",
176
+ data=base64.b64decode(base64_image),
177
+ )
178
+
179
+ text_prompt = f"""Given the remittance letter image, extract all invoice numbers from the column "{column_name}".
180
+ Respond with a comma-separated list only."""
181
+
182
+ generation_config = {
183
+ "max_output_tokens": 8192,
184
+ "temperature": 0.1,
185
+ "top_p": 0.95,
186
+ }
187
+
188
+ safety_settings = {
189
+ generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
190
+ generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
191
+ generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
192
+ generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
193
+ }
194
+
195
+ responses = model.generate_content(
196
+ [image_part, text_prompt],
197
+ generation_config=generation_config,
198
+ safety_settings=safety_settings,
199
+ stream=True,
200
+ )
201
+
202
+ full_response = ""
203
+ for response in responses:
204
+ full_response += response.text
205
+
206
+ remittance_logger.debug(f"Extracted invoice numbers for column '{column_name}' (raw model response): {full_response}")
207
+
208
+ return [number.strip() for number in full_response.split(',') if number.strip()]
209
+
210
+ def extract_invoice_numbers_with_vertex_ai_multi_hop(base64_image: str) -> list[InvoiceNumbers]:
211
+ """
212
+ Extracts invoice numbers from a single base64-encoded image using Google's Gemini Flash model with multi-hop processing.
213
+
214
+ Args:
215
+ base64_image (str): The base64-encoded image string.
216
+
217
+ Returns:
218
+ list[InvoiceNumbers]: A list containing lists of extracted invoice numbers for each processed column.
219
+ """
220
+ # First hop: Extract column headers
221
+ column_headers = extract_column_headers(base64_image)
222
+ remittance_logger.debug(f"Extracted column headers: {column_headers}")
223
+
224
+ # Second hop: Extract invoice numbers for each column (up to 3 columns)
225
+ all_invoice_numbers = []
226
+ for column_name in column_headers[:3]:
227
+ invoice_numbers = extract_invoice_numbers_for_column(base64_image, column_name)
228
+ remittance_logger.debug(f"Extracted invoice numbers for column '{column_name}': {invoice_numbers}")
229
+ if invoice_numbers: # Only add non-empty lists
230
+ all_invoice_numbers.append(invoice_numbers)
231
+
232
+ # Remove duplicate lists using the utility function
233
+ unique_invoice_numbers = remove_duplicate_lists(all_invoice_numbers)
234
+ return unique_invoice_numbers
235
+
236
+ # def extract_invoice_numbers_from_text_with_vertex_ai(text: str, multi_hop: bool = False) -> list[InvoiceNumbers]:
237
+ # """
238
+ # Extracts invoice numbers from text using Google's Gemini Flash model.
239
+
240
+ # Args:
241
+ # text (str): The text of the remittance letter.
242
+ # multi_hop (bool): Whether to use multi-hop processing (not implemented yet).
243
+
244
+ # Returns:
245
+ # list[InvoiceNumbers]: A list containing lists of extracted invoice numbers.
246
+ # """
247
+ # vertexai.init(project="saltech-ai-sandbox", location="us-central1")
248
+ # model = GenerativeModel("gemini-1.5-flash-001")
249
+
250
+ # prompt = f"""Given the following remittance letter text, extract all invoice numbers.
251
+ # Respond with a comma-separated list of invoice numbers only.
252
+ # If no invoice numbers are found, respond with 'No invoice numbers found'.
253
+
254
+ # Remittance letter text:
255
+ # {text}
256
+ # """
257
+
258
+ # generation_config = {
259
+ # "max_output_tokens": 8192,
260
+ # "temperature": 0.1,
261
+ # "top_p": 0.95,
262
+ # }
263
+
264
+ # safety_settings = {
265
+ # generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
266
+ # generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
267
+ # generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
268
+ # generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
269
+ # }
270
+
271
+ # responses = model.generate_content(
272
+ # prompt,
273
+ # generation_config=generation_config,
274
+ # safety_settings=safety_settings,
275
+ # stream=True,
276
+ # )
277
+
278
+ # full_response = ""
279
+ # for response in responses:
280
+ # full_response += response.text
281
+
282
+ # remittance_logger.debug(f"Vertex AI invoice numbers full response: {full_response}")
283
+
284
+ # extracted_numbers = parse_gemini_response(full_response)
285
+ # return [extracted_numbers] # Wrap in a list to match the expected return type
286
+
287
+ def parse_gemini_response(response: str) -> list[str]:
288
+ """
289
+ Parses the response from Gemini Flash model and extracts invoice numbers.
290
+
291
+ Args:
292
+ response (str): The response string from Gemini Flash model.
293
+
294
+ Returns:
295
+ list[str]: A list of extracted invoice numbers.
296
+ """
297
+ if response.strip().lower().startswith('no invoice numbers found'):
298
+ return []
299
+
300
+ # Split the comma-separated list and strip whitespace from each number
301
+ invoice_numbers = [num.strip() for num in response.split(',')]
302
+ return invoice_numbers
303
+
304
+ # Note: You'll need to set up authentication for Google Cloud.
305
+ # Typically, you'd set the GOOGLE_APPLICATION_CREDENTIALS environment variable
306
+ # to point to your service account key file.
307
+
308
+
309
+ def extract_invoice_numbers_from_text_with_vertex_ai(text: str, multi_hop: bool = False) -> list[InvoiceNumbers]:
310
+ """
311
+ Dispatches the invoice number extraction to either single-hop or multi-hop method based on the multi_hop parameter.
312
+
313
+ Args:
314
+ text (str): The text of the remittance letter.
315
+ multi_hop (bool): Whether to use multi-hop processing.
316
+
317
+ Returns:
318
+ list[InvoiceNumbers]: A list containing lists of extracted invoice numbers.
319
+ """
320
+ if multi_hop:
321
+ return extract_invoice_numbers_from_text_with_vertex_ai_multi_hop(text)
322
+ else:
323
+ return extract_invoice_numbers_from_text_with_vertex_ai_single_hop(text)
324
+
325
+ def extract_invoice_numbers_from_text_with_vertex_ai_single_hop(text: str) -> list[InvoiceNumbers]:
326
+ """
327
+ Extracts invoice numbers from text using Google's Gemini Flash model with single-hop processing.
328
+
329
+ Args:
330
+ text (str): The text of the remittance letter.
331
+
332
+ Returns:
333
+ list[InvoiceNumbers]: A list containing lists of extracted invoice numbers.
334
+ """
335
+ vertexai.init(project="saltech-ai-sandbox", location="us-central1")
336
+ model = GenerativeModel("gemini-1.5-flash-001")
337
+
338
+ prompt = f"""Given the following remittance letter text, extract all invoice numbers.
339
+ Respond with a comma-separated list of invoice numbers only.
340
+ If no invoice numbers are found, respond with 'No invoice numbers found'.
341
+
342
+ Remittance letter text:
343
+ {text}
344
+ """
345
+
346
+ generation_config = {
347
+ "max_output_tokens": 8192,
348
+ "temperature": 0.1,
349
+ "top_p": 0.95,
350
+ }
351
+
352
+ safety_settings = {
353
+ generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
354
+ generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
355
+ generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
356
+ generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
357
+ }
358
+
359
+ responses = model.generate_content(
360
+ prompt,
361
+ generation_config=generation_config,
362
+ safety_settings=safety_settings,
363
+ stream=True,
364
+ )
365
+
366
+ full_response = ""
367
+ for response in responses:
368
+ full_response += response.text
369
+
370
+ remittance_logger.debug(f"Vertex AI invoice numbers full response (single-hop): {full_response}")
371
+
372
+ extracted_numbers = parse_gemini_response(full_response)
373
+ return [extracted_numbers] # Wrap in a list to match the expected return type
374
+
375
+ def extract_invoice_numbers_from_text_with_vertex_ai_multi_hop(text: str) -> list[InvoiceNumbers]:
376
+ """
377
+ Extracts invoice numbers from text using Google's Gemini Flash model with multi-hop processing.
378
+
379
+ Args:
380
+ text (str): The text of the remittance letter.
381
+
382
+ Returns:
383
+ list[InvoiceNumbers]: A list containing lists of extracted invoice numbers for each processed column.
384
+ """
385
+ # First hop: Extract column headers
386
+ column_headers = extract_column_headers_from_text(text)
387
+ remittance_logger.debug(f"Extracted column headers: {column_headers}")
388
+
389
+ # Second hop: Extract invoice numbers for each column (up to 3 columns)
390
+ all_invoice_numbers = []
391
+ for column_name in column_headers[:3]:
392
+ invoice_numbers = extract_invoice_numbers_for_column_from_text(text, column_name)
393
+ remittance_logger.debug(f"Extracted invoice numbers for column '{column_name}': {invoice_numbers}")
394
+ if invoice_numbers: # Only add non-empty lists
395
+ all_invoice_numbers.append(invoice_numbers)
396
+
397
+ # Remove duplicate lists using the utility function
398
+ unique_invoice_numbers = remove_duplicate_lists(all_invoice_numbers)
399
+ return unique_invoice_numbers
400
+
401
+ def extract_column_headers_from_text(text: str) -> list[str]:
402
+ """
403
+ Extracts column header names that could contain invoice numbers from the text.
404
+
405
+ Args:
406
+ text (str): The text of the remittance letter.
407
+
408
+ Returns:
409
+ list[str]: A list of column header names.
410
+ """
411
+ vertexai.init(project="saltech-ai-sandbox", location="us-central1")
412
+ model = GenerativeModel("gemini-1.5-flash-001")
413
+
414
+ prompt = f"""Given the following remittance letter text, extract all column header names or section titles that could contain invoice numbers.
415
+ Respond with a comma-separated list only.
416
+
417
+ Remittance letter text:
418
+ {text}
419
+ """
420
+
421
+ generation_config = {
422
+ "max_output_tokens": 8192,
423
+ "temperature": 0.1,
424
+ "top_p": 0.95,
425
+ }
426
+
427
+ safety_settings = {
428
+ generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
429
+ generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
430
+ generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
431
+ generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
432
+ }
433
+
434
+ response = model.generate_content(
435
+ prompt,
436
+ generation_config=generation_config,
437
+ safety_settings=safety_settings,
438
+ )
439
+
440
+ remittance_logger.debug(f"Extracted column headers (raw model response): {response.text}")
441
+
442
+ return [header.strip() for header in response.text.split(',')]
443
+
444
+ def extract_invoice_numbers_for_column_from_text(text: str, column_name: str) -> InvoiceNumbers:
445
+ """
446
+ Extracts invoice numbers from a specific column or section in the text.
447
+
448
+ Args:
449
+ text (str): The text of the remittance letter.
450
+ column_name (str): The name of the column or section to extract invoice numbers from.
451
+
452
+ Returns:
453
+ InvoiceNumbers: A list of extracted invoice numbers for the specified column.
454
+ """
455
+ vertexai.init(project="saltech-ai-sandbox", location="us-central1")
456
+ model = GenerativeModel("gemini-1.5-flash-001")
457
+
458
+ prompt = f"""Given the following remittance letter text, extract all invoice numbers from the column or section "{column_name}".
459
+ Respond with a comma-separated list only. If no invoice numbers are found, respond with 'No invoice numbers found'.
460
+
461
+ Remittance letter text:
462
+ {text}
463
+ """
464
+
465
+ generation_config = {
466
+ "max_output_tokens": 8192,
467
+ "temperature": 0.1,
468
+ "top_p": 0.95,
469
+ }
470
+
471
+ safety_settings = {
472
+ generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
473
+ generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
474
+ generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
475
+ generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
476
+ }
477
+
478
+ response = model.generate_content(
479
+ prompt,
480
+ generation_config=generation_config,
481
+ safety_settings=safety_settings,
482
+ )
483
+
484
+ remittance_logger.debug(f"Extracted invoice numbers for column '{column_name}' (raw model response): {response.text}")
485
+
486
+ return parse_gemini_response(response.text)
487
+
488
+ def extract_payment_amounts_with_vertex_ai(base64_image: str) -> list[PaymentAmount]:
489
+ vertexai.init(project="saltech-ai-sandbox", location="us-central1")
490
+ model = GenerativeModel("gemini-1.5-flash-001")
491
+
492
+ image_part = Part.from_data(
493
+ mime_type="image/png",
494
+ data=base64.b64decode(base64_image),
495
+ )
496
+
497
+ text_prompt = """Given the remittance letter image, extract the total payment amount.
498
+ Respond with the payment amount only.
499
+ If no payment amounts are found, respond with 'No payment amounts found'."""
500
+
501
+ generation_config = {
502
+ "max_output_tokens": 256,
503
+ "temperature": 0.1,
504
+ "top_p": 0.95,
505
+ }
506
+
507
+ safety_settings = {
508
+ generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
509
+ generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
510
+ generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
511
+ generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
512
+ }
513
+
514
+ responses = model.generate_content(
515
+ [image_part, text_prompt],
516
+ generation_config=generation_config,
517
+ safety_settings=safety_settings,
518
+ stream=True,
519
+ )
520
+
521
+ full_response = ""
522
+ for response in responses:
523
+ full_response += response.text
524
+
525
+ remittance_logger.debug(f"Vertex AI payment amount full response: {full_response}")
526
+
527
+ extracted_amounts = parse_gemini_payment_response(full_response)
528
+ return extracted_amounts
529
+
530
+ def extract_payment_amounts_from_text_with_vertex_ai(text: str) -> list[PaymentAmount]:
531
+ """
532
+ Extracts payment amounts from text using Google's Gemini Flash model.
533
+
534
+ Args:
535
+ text (str): The text of the remittance letter.
536
+
537
+ Returns:
538
+ list[PaymentAmount]: A list of extracted payment amounts.
539
+ """
540
+ vertexai.init(project="saltech-ai-sandbox", location="us-central1")
541
+ model = GenerativeModel("gemini-1.5-flash-001")
542
+
543
+ prompt = f"""Given the following remittance letter text, extract the total payment amount.
544
+ Respond with the payment amount only.
545
+ If no payment amounts are found, respond with 'No payment amounts found'.
546
+
547
+ Remittance letter text:
548
+ {text}
549
+ """
550
+
551
+ generation_config = {
552
+ "max_output_tokens": 256,
553
+ "temperature": 0.1,
554
+ "top_p": 0.95,
555
+ }
556
+
557
+ safety_settings = {
558
+ generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
559
+ generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
560
+ generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
561
+ generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
562
+ }
563
+
564
+ response = model.generate_content(
565
+ prompt,
566
+ generation_config=generation_config,
567
+ safety_settings=safety_settings,
568
+ )
569
+
570
+ remittance_logger.debug(f"Vertex AI payment amount full response: {response.text}")
571
+
572
+ extracted_amounts = parse_gemini_payment_response(response.text)
573
+ return extracted_amounts
574
+
575
+ def parse_gemini_payment_response(response: str) -> list[PaymentAmount]:
576
+ """
577
+ Parses the response from Gemini Flash model and extracts payment amounts.
578
+
579
+ Args:
580
+ response (str): The response string from Gemini Flash model.
581
+
582
+ Returns:
583
+ list[PaymentAmount]: A list of one extracted payment amount (or empty).
584
+ """
585
+ if response.strip().lower() == 'no payment amounts found':
586
+ return []
587
+
588
+ payment_amounts = [response.strip()]
589
+ return payment_amounts
590
+
591
+ def extract_payment_amounts_from_base64_images(base64_images: list[str]) -> list[PaymentAmount]:
592
+ # Implementation similar to extract_invoice_numbers_with_vertex_ai
593
+ # but focused on extracting payment amounts
594
+ return []