import streamlit as st from langchain_pipeline import pipeline, model_names import pdfplumber from docx import Document from redlines import Redlines import tempfile def pdf_to_word(pdf_path, word_path): with pdfplumber.open(pdf_path) as pdf: doc = Document() for page in pdf.pages: for item in page.extract_words(): doc.add_paragraph(item['text']) doc.add_page_break() doc.save(word_path) def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft): return pipeline( file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft ) def redline_changes(original_path, revised_path, output_path): original_doc = Document(original_path) revised_doc = Document(revised_path) original_text = "\n".join([para.text for para in original_doc.paragraphs]) revised_text = "\n".join([para.text for para in revised_doc.paragraphs]) redline = Redlines(original_text, revised_text) diff_text = redline.output_markdown() # Create a new document and add the redlined content as text diff_doc = Document() for line in diff_text.split("\n"): if line.startswith('---') or line.startswith('+++') or line.startswith('@@'): diff_doc.add_paragraph(line, style='Normal').font.color.rgb = RGBColor(255, 0, 0) # Red elif line.startswith('-'): p = diff_doc.add_paragraph(line, style='Normal') p.font.color.rgb = RGBColor(255, 0, 0) # Red elif line.startswith('+'): p = diff_doc.add_paragraph(line, style='Normal') p.font.color.rgb = RGBColor(0, 128, 0) # Green else: diff_doc.add_paragraph(line, style='Normal') diff_doc.save(output_path) # Streamlit App st.title("Canarie AI Prototype") st.subheader("Finding the canarie in the coal mine") model_name = st.selectbox("Model", model_names()) balance_type = st.selectbox("Do you charge on available balance or ledger balance?", ["available balance", "ledger balance"]) apsn_transactions = st.selectbox("Do you charge for APSN transactions?", ["yes", "no"]) max_fees_per_day = st.number_input("How many overdraft fees per day can be charged?", min_value=0, max_value=10) min_overdrawn_fee = st.number_input("What is the minimum amount overdrawn to incur a fee?", min_value=0, max_value=500) min_transaction_overdraft = st.number_input("What is the minimum transaction amount to trigger an overdraft?", min_value=0, max_value=500) uploaded_file = st.file_uploader("Choose a file") if uploaded_file is not None: with st.spinner('Please wait ...'): try: original_word_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name pdf_to_word(uploaded_file, original_word_path) diff = apply_pipeline( uploaded_file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft ) revised_word_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name revised_doc = Document() for line in diff.split("\n"): revised_doc.add_paragraph(line) revised_doc.save(revised_word_path) redlined_output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name redline_changes(original_word_path, revised_word_path, redlined_output_path) with open(redlined_output_path, "rb") as f: st.download_button( label="Download Redlined Document", data=f, file_name="redlined_document.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" ) st.success("Redlined document created successfully!") except Exception as e: st.exception(e)