Spaces:

Saltech
/

remittance-poc-with-verifier

Sleeping

App Files Files Community

remittance-poc-with-verifier / remittance_pdf_processing_utils.py

eelang

Upload 8 files

7850a69 verified 2 months ago

raw

history blame contribute delete

2.88 kB

	import logging

	def setup_logger():
	# Create a logger
	logger = logging.getLogger('remittance_processing')
	logger.setLevel(logging.DEBUG)

	# Create handlers
	c_handler = logging.StreamHandler()
	f_handler = logging.FileHandler('remittance_processing.log')
	c_handler.setLevel(logging.INFO)
	f_handler.setLevel(logging.DEBUG)

	# Create formatters and add it to handlers
	log_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	c_handler.setFormatter(log_format)
	f_handler.setFormatter(log_format)

	# Add handlers to the logger
	logger.addHandler(c_handler)
	logger.addHandler(f_handler)

	return logger

	remittance_logger = setup_logger()

	def remove_duplicate_lists(lists):
	"""
	Remove duplicate lists from a list of lists.
	Args:
	lists:
	a list of lists of strings
	Returns:
	a list of lists of strings, where each list is unique
	"""
	seen = set()
	unique_lists = []

	for lst in lists:
	sorted_list = tuple(sorted(lst))
	if sorted_list not in seen:
	seen.add(sorted_list)
	unique_lists.append(lst)

	return unique_lists


	import re
	from decimal import Decimal, ROUND_HALF_UP

	def format_amount_str_to_decimal(amount_str: str) -> str:
	def standardize_number(s):
	# Find the last occurrence of a comma or period
	last_separator_index = max(s.rfind(','), s.rfind('.'))
	if last_separator_index != -1:
	# Split the string into two parts
	before_separator = s[:last_separator_index]
	after_separator = s[last_separator_index+1:]

	# Clean the first part of any commas, periods, or whitespace
	before_separator_cleaned = re.sub(r'[.,\s]', '', before_separator)

	# Ensure the decimal part starts with a period, even if it was a comma
	standardized_s = before_separator_cleaned + '.' + after_separator
	else:
	# If there's no separator, just remove commas, periods, or whitespace
	standardized_s = re.sub(r'[.,\s]', '', s)

	return standardized_s

	def remove_chars_after_last_digit(s):
	# Remove any non-digit characters following the last digit in the string
	return re.sub(r'(?<=\d)[^\d]*$', '', s)

	def clean_text(s):
	# This pattern looks for:
	# - Optional non-digit or non-negative sign characters followed by whitespace (if any)
	# - Followed by any characters until a digit is found in the word
	# It then replaces this matched portion with the remaining part of the word from the first digit
	cleaned_s = re.sub(r'[^\d-]\s?(\S\d\S*)', r'\1', s)
	return cleaned_s

	# Run functions to format a text decimal
	cleaned_amount = clean_text(remove_chars_after_last_digit(standardize_number(amount_str.strip().lower())))

	# Convert to Decimal and round to 2 decimal places
	try:
	decimal_amount = Decimal(cleaned_amount).quantize(Decimal('0.01'), rounding=ROUND_HALF_UP)
	return f"{decimal_amount:.2f}"
	except:
	# If conversion fails, return the cleaned string as is
	return cleaned_amount