import streamlit as st
from langchain_pipeline import pipeline, model_names
import fitz  # PyMuPDF
from docx import Document
from difflib import unified_diff
import tempfile
from docx.shared import RGBColor
import re

def pdf_to_text_with_layout(pdf_file):
    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
    text = []
    for page_num in range(doc.page_count):
        page = doc.load_page(page_num)
        text.append(page.get_text("text"))
    return "\n".join(text)

def clean_text(text):
    # Remove non-ASCII and control characters
    return ''.join(c for c in text if c.isprintable() and ord(c) < 65536)

def text_to_word_with_formatting(text, word_path):
    doc = Document()
    for line in text.split("\n"):
        clean_line = clean_text(line)
        doc.add_paragraph(clean_line)
    doc.save(word_path)

def apply_pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
    return pipeline(
        file,
        model_name,
        balance_type,
        apsn_transactions,
        max_fees_per_day,
        min_overdrawn_fee,
        min_transaction_overdraft
    )

def redline_changes(original_path, revised_path, output_path):
    original_doc = Document(original_path)
    revised_doc = Document(revised_path)

    original_text = "\n".join([para.text for para in original_doc.paragraphs])
    revised_text = "\n".join([para.text for para in revised_doc.paragraphs])

    diff = unified_diff(original_text.splitlines(), revised_text.splitlines(), lineterm='')

    diff_doc = Document()
    for line in diff:
        if line.startswith('-'):
            p = diff_doc.add_paragraph(style='Normal')
            run = p.add_run(line)
            run.font.color.rgb = RGBColor(255, 0, 0)  # Red
        elif line.startswith('+'):
            p = diff_doc.add_paragraph(style='Normal')
            run = p.add_run(line)
            run.font.color.rgb = RGBColor(0, 128, 0)  # Green
        elif line.startswith('@@'):
            p = diff_doc.add_paragraph(style='Normal')
            run = p.add_run(line)
            run.font.color.rgb = RGBColor(0, 0, 255)  # Blue
        else:
            diff_doc.add_paragraph(line, style='Normal')

    diff_doc.save(output_path)

# Streamlit App
st.title("Canarie AI Prototype")
st.subheader("Finding the canarie in the coal mine")

model_name = st.selectbox("Model", model_names())

balance_type = st.selectbox("Do you charge on available balance or ledger balance?", ["available balance", "ledger balance"])

apsn_transactions = st.selectbox("Do you charge for APSN transactions?", ["yes", "no"])

max_fees_per_day = st.number_input("How many overdraft fees per day can be charged?", min_value=0, max_value=10)

min_overdrawn_fee = st.number_input("What is the minimum amount overdrawn to incur a fee?", min_value=0, max_value=500)

min_transaction_overdraft = st.number_input("What is the minimum transaction amount to trigger an overdraft?", min_value=0, max_value=500)

uploaded_file = st.file_uploader("Choose a file", type=["pdf"])

if uploaded_file is not None:
    with st.spinner('Please wait ...'):
        try:
            # Extract text with layout preservation
            extracted_text = pdf_to_text_with_layout(uploaded_file)
            
            original_word_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name
            text_to_word_with_formatting(extracted_text, original_word_path)

            diff = apply_pipeline(
                uploaded_file,
                model_name,
                balance_type,
                apsn_transactions,
                max_fees_per_day,
                min_overdrawn_fee,
                min_transaction_overdraft
            )

            revised_word_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name
            text_to_word_with_formatting(diff, revised_word_path)

            redlined_output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".docx").name
            redline_changes(original_word_path, revised_word_path, redlined_output_path)

            with open(redlined_output_path, "rb") as f:
                st.download_button(
                    label="Download Redlined Document",
                    data=f,
                    file_name="redlined_document.docx",
                    mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
                )
            st.success("Redlined document created successfully!")

        except Exception as e:
            st.exception(e)