import os import logging from pdf2docx import Converter from io import BytesIO import fitz # PyMuPDF import re from langchain_core.prompts import PromptTemplate from langchain_openai import OpenAIEmbeddings from langchain_anthropic import ChatAnthropic from langchain_google_genai import ChatGoogleGenerativeAI from docx import Document from docx.shared import Pt, RGBColor from pdfminer import high_level # Set up logging logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) OPENAI_API_KEY = os.environ["OPENAI_API_KEY"] ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"] GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"] embedding = OpenAIEmbeddings(model="text-embedding-ada-002") models = { "claude-3": ChatAnthropic(model='claude-3-sonnet-20240229'), "gemini-pro": ChatGoogleGenerativeAI(model="gemini-1.5-pro-latest", temperature=0.0) } def model_names(): return models.keys() def sanitize_text(text): return re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text) def extract_text_with_formatting(file_path): document = fitz.open(file_path) formatted_text = [] for page_num in range(document.page_count): page = document[page_num] blocks = page.get_text("dict")["blocks"] for block in blocks: if "lines" in block: for line in block["lines"]: for span in line["spans"]: text = span["text"] font_size = span["size"] font_flags = span["flags"] formatted_text.append({ "text": text, "font_size": font_size, "font_flags": font_flags, }) return formatted_text def convert_pdf_to_docx(pdf_path, output_path): cv = Converter(pdf_path) cv.convert(output_path, start=0, end=None) cv.close() print(f"PDF converted to Word document and saved to {output_path}") def create_redlined_word_doc(formatted_text, changes, output_path): doc = Document() for item in formatted_text: text = sanitize_text(item["text"]) font_size = item["font_size"] font_flags = item["font_flags"] paragraph = doc.add_paragraph() run = paragraph.add_run(text) run.font.size = Pt(font_size) if font_flags & 2: run.bold = True if font_flags & 1: run.italic = True try: changed_sentences, detailed_explanation = changes.split("2. **Detailed Explanation**:\n", 1) changed_sentences = changed_sentences.replace("1. **Changed Sentences**:\n", "").strip() detailed_explanation = detailed_explanation.strip() logger.info(detailed_explanation) logger.info(changed_sentences) except ValueError: logger.error("Model response does not contain the expected sections.") raise ValueError("Invalid format: 'Changed Sentences' and 'Detailed Explanation' sections not found") for line in changed_sentences.split('\n'): if line.strip(): sanitized_line = sanitize_text(line) while '[-' in sanitized_line and '-]' in sanitized_line: start = sanitized_line.find('[-') end = sanitized_line.find('-]', start) deletion = sanitized_line[start+2:end] sanitized_line = sanitized_line[:start] + sanitized_line[end+2:] run = doc.add_paragraph().add_run(deletion) run.font.color.rgb = RGBColor(255, 0, 0) run.font.strike = True while '[+' in sanitized_line and '+]' in sanitized_line: start = sanitized_line.find('[+') end = sanitized_line.find('+]', start) addition = sanitized_line[start+2:end] sanitized_line = sanitized_line[:start] + sanitized_line[end+2:] run = doc.add_paragraph().add_run(addition) run.font.color.rgb = RGBColor(0, 0, 255) if sanitized_line.strip(): doc.add_paragraph(sanitized_line) doc.add_page_break() doc.add_heading('Detailed Explanation', level=1) doc.add_paragraph(detailed_explanation) doc.save(output_path) print(f"Redlined document saved to {output_path}") def pipeline(uploaded_file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft): # Save the uploaded file to a temporary file with NamedTemporaryFile(delete=False, suffix='.pdf') as tmp: tmp.write(uploaded_file.read()) tmp_path = tmp.name # Extract text from the uploaded file disclosure_text = high_level.extract_text(tmp_path) prompt_template = """ law context: 20-cv-12061 08-23-2021 Veronica Gardner, Plaintiff, v. Flagstar Bank, FSB, Defendant. GERSHWIN A. DRAIN, UNITED STATES DISTRICT JUDGE OPINION AND ORDER GRANTING IN PART AND DENYING IN PART DEFENDANT'S MOTION TO DISMISS [#18] GERSHWIN A. DRAIN, UNITED STATES DISTRICT JUDGE I. Introduction On July 31, 2020, Plaintiff Veronica Gardner brought the instant action on behalf of herself and all others similarly situated against Defendant Flagstar Bank, FSB (“Flagstar” or “Bank”). ECF No. 1. Plaintiff filed her First Amended Complaint on October 6, 2020 and alleges that Defendant unlawfully assesses and collects overdraft fees on transactions, sometimes multiple times, in violation of the contract between the parties. Id. Plaintiff brings two state law claims for breach of contract and conversion. Id. Presently before the Court is Defendant's Motion to Dismiss. ECF No. 18. This matter is fully briefed. ECF Nos. 20, 23. Plaintiff also filed two Notices of Supplemental Authority. ECF Nos. 26, Tims v. LGE Cmty. Credit Union 935 F.3d 1228 (11th Cir. 2019) have considered the matter. See Salls v. Dig. Fed. Credit Union , 349 F. Supp. 3d 81, 91 (D. Mass 2018) (collecting cases). Tims's complaint challenged the substance of LGE's Opt-In Agreement. Because the safe harbor does not protect financial institutions from challenges to the substance of Opt-In Agreements, Tims's EFTA claim survives a motion to dismiss, and the district court erred in granting the motion. IV. CONCLUSION For the foregoing reasons, we reverse the district court's order granting LGE's motion to dismiss and remand for further proceedings consistent with this opinion. REVERSED AND REMANDED. 13 -------- above are several cases and a bank disclosure. Using the cases, please provide changes to the disclosure and keep as much formatting as possible and to ensure there are no legal contradictions between the content of the disclosure and the cases and please provide reasoning for each proposed change. Please also integrate the bank's policies into the disclosure. In the first sentence, please include a reference to the account agreement "for more information on overdrafts" and a placeholder for a URL. Available balance or ledger balance should replace money in the first sentence. Here are the answers to the bank's policy questions: Do you charge on available balance or ledger balance?: {balance_type} Do you charge for APSN transactions?: {apsn_transactions} How many overdraft fees per day can be charged?: {max_fees_per_day} What is the minimum amount overdrawn to incur a fee?: {min_overdrawn_fee} What is the minimum transaction amount to trigger an overdraft?: {min_transaction_overdraft} Please output in the following format: entire disclosure text updated with formatting retained ------ reasons for changes citing caselaw """ prompt = prompt_template.format( disclosure=disclosure_text, balance_type=balance_type, apsn_transactions=apsn_transactions, max_fees_per_day=max_fees_per_day, min_overdrawn_fee=min_overdrawn_fee, min_transaction_overdraft=min_transaction_overdraft ) logger.debug(f"Formatted prompt: {prompt}") chat_response = models[model_name].invoke(input=prompt) changes = chat_response.content logger.info("Model response:") logger.info(changes) output_directory = 'output' output_path = os.path.join(output_directory, 'Redlined_Reg_E_Notice.docx') if not os.path.exists(output_directory): os.makedirs(output_directory) formatted_text = extract_text_with_formatting(tmp_path) create_redlined_word_doc(formatted_text, changes, output_path) return output_path