File size: 6,915 Bytes
3056140
 
bf13bd6
3056140
 
3b5a8b1
2383f67
e87885c
 
 
 
3056140
c1c7a1b
 
 
 
2383f67
 
 
 
 
 
 
 
3056140
 
 
c1c7a1b
3056140
e462109
 
 
 
 
 
 
 
 
3056140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e462109
 
 
 
 
3056140
e462109
3056140
e462109
 
3056140
 
e462109
 
 
3056140
 
 
 
 
 
 
 
c16c548
3056140
 
 
bf13bd6
 
 
 
 
 
3056140
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import base64
import fitz  # PyMuPDF
from langchain_core.prompts import PromptTemplate
from google.cloud import aiplatform
import streamlit as st
import json
import vertexai
from vertexai.generative_models import GenerativeModel, Part, FinishReason
import vertexai.preview.generative_models as generative_models


# Function to pad base64 strings
def pad_base64(base64_string):
    return base64_string + '=' * (-len(base64_string) % 4)

# Set the environment variable for Google Cloud credentials
credentials_json = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
if credentials_json:
    credentials_path = "/tmp/gcp_credentials.json"
    with open(credentials_path, "w") as f:
        f.write(credentials_json)
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path

# Initialize the Google AI Platform
aiplatform.init(project="akroda", location="us-central1")

# Define the documents as dictionaries, ensuring correct padding
documents = [
    {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJeODgxNz5dL1Jvb3QgMTU0IDAgUi9TaXplIDE2Nj4+CnN0YXJ0eHJlZgoyMTY0NjkKJSVFT0YK"))},
    {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJeLjz9MKNijU+PgpzdGFydHhyZWYKMTMxMDY0CiUlRU9GCg=="))},
    {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJeLjz9MKNiAwZDU0YTVlNzllMWRhYWY1ZDQ2YjI+XS9Sb290IDE3NyAwIFIvU2l6ZSAxODc+PgpzdGFydHhyZWYKMjA3NTk5CiUlRU9GCg=="))},
    {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJeLjz9ML1Jvb3QgMTg5IDAgUi9TaXplIDE5OT4+CnN0YXJ0eHJlZgoxOTgzNzMKJSVFT0YK"))},
    {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xCcnCmVuZHN0cmVhbQplbmRvYmoKc3RhcnR4cmVmCjIwOTgyNQolJUVPRgo="))},
    {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLj+CnN0YXJ0eHJlZgoyMTk5MDYKJSVFT0YK"))},
    {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJiUlRU9GCg=="))},
    {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjQKJe90IDMwOCAwIFIvU2l6ZSAzMTg+PgpzdGFydHhyZWYKMjcwNzU3CiUlRU9GCg=="))},
    {"content_type": "application/pdf", "data": base64.b64decode(pad_base64("JVBERi0xLjUNJeLjz9MNCjcgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgNjc1NzgvTyA5L0UgNjAyNDYvTiAxL1QgNjcyODcvSCBbIDQ4MyAxNTRdPj4NZW5kb2JxDSAgICAgICAgICAgICAgICAgICAgDQoyMiAwIG9iag08PC9EZWNvZGVQYXJtczw8L0NvbHVtbnMgNC9QcmVkaWN0b3IgMTI+Pi9GaWx0ZXIvRmxhdGVEZWNvZGUvSURbPDE3NzU4MkJFODc4MzRFQjNBOEM3RkIzQTgyRjFFMEFCPjw5MzI2Qjk4REM4NjQ2RTRCODI3MzZFQUEzOENEQjFBQj5dL0luZGV4WzcgMjhdL0luZm8gNiAwIFIvTGVuZ3RoIDgzL1ByZXYgNjcyODgvUm9vdCA4IDAgUi9TaXplIDM1L1R5cGUvWFPRg0K"))}
]

text1 = """
attached are several cases and a bank disclosure. Using the cases, please provide changes to the disclosure and keep as much formatting as possible and to ensure there are no legal contradictions between the content of the disclosure and the cases and please provide reasoning for each proposed change. Please also integrate the bank's policies into the disclosure. In the first sentence, please include a reference to the account agreement "for more information on overdrafts" and a placeholder for a URL. 
Here are the answers to the bank's policy questions:
Do you charge on available balance or ledger balance?: {balance_type} (which should replace money in the first sentence)
Do you charge for APSN transactions?: {apsn_transactions}
How many overdraft fees per day can be charged?: {max_fees_per_day}
What is the minimum amount overdrawn to incur a fee?: ${min_overdrawn_fee}
What is the minimum transaction amount to trigger an overdraft?: ${min_transaction_overdraft}

Please output in the following format:
{{entire updated disclosure text with changes bolded}}
------
{{reasons for each change listed and cases cited}}
"""

prompt = PromptTemplate(
    input_variables=["context", "disclosure", "balance_type", "apsn_transactions", "max_fees_per_day", "min_overdrawn_fee", "min_transaction_overdraft"],
    template=text1,
)

# Placeholder values for the variables used in prompt formatting
legal_cases_context = "Provide the legal context here..."
disclosure_text = "Include the initial disclosure text here..."
balance_type = "available balance"
apsn_transactions = "yes"
max_fees_per_day = 3
min_overdrawn_fee = 5
min_transaction_overdraft = 1

# Base64 encode the disclosure text
encoded_disclosure_text = base64.b64encode(disclosure_text.encode()).decode()

val = prompt.format(
    context=legal_cases_context,
    disclosure=encoded_disclosure_text,
    balance_type=balance_type,
    apsn_transactions=apsn_transactions,
    max_fees_per_day=max_fees_per_day,
    min_overdrawn_fee=min_overdrawn_fee,
    min_transaction_overdraft=min_transaction_overdraft,
)

generation_config = {
    "max_output_tokens": 8192,
    "temperature": 1,
    "top_p": 0.95,
}

# Placeholder function for generating content
def generate(document_parts, prompt_text):
    # Placeholder: Replace with actual content generation logic using Google Cloud API
    return "Generated content based on provided documents and prompt."

def pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
    # Placeholder: Replace with actual logic to prepare document parts and call generate
    document_parts = documents
    response_text = generate(document_parts, val)
    return response_text

# Streamlit Interface
st.title("Bank Disclosure Update Pipeline")
st.write("Upload your document and provide the necessary details to update the bank disclosure.")

uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

if uploaded_file is not None:
    file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type}
    st.write(file_details)
    
    # Extract text from the uploaded PDF file
    pdf_document = fitz.open(stream=uploaded_file.read(), filetype="pdf")
    content = ""
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        content += page.get_text()
    st.text(content)

    # Placeholder for user inputs
    balance_type = st.text_input("Balance Type", "available balance")
    apsn_transactions = st.text_input("APSN Transactions", "yes")
    max_fees_per_day = st.number_input("Max Fees Per Day", min_value=1, value=3)
    min_overdrawn_fee = st.number_input("Min Overdrawn Fee ($)", min_value=0, value=5)
    min_transaction_overdraft = st.number_input("Min Transaction Overdraft ($)", min_value=0, value=1)

    if st.button("Generate Updated Disclosure"):
        # Run the pipeline with the provided inputs
        result = pipeline(uploaded_file, "gemini-1.5-pro-001", balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft)
        st.write("Updated Disclosure:")
        st.text(result)