Spaces:
Sleeping
Sleeping
anmolsahai
commited on
Commit
•
3056140
1
Parent(s):
1ea97da
retry
Browse files- app.py +124 -92
- requirements.txt +4 -2
app.py
CHANGED
@@ -1,95 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
)
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
if uploaded_file is not None:
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
redline_changes(original_word_path, revised_word_path, redlined_output_path)
|
71 |
-
with open(original_word_path, "rb") as f:
|
72 |
-
st.download_button(
|
73 |
-
label="Download Original Document",
|
74 |
-
data=f,
|
75 |
-
file_name="original_document.docx",
|
76 |
-
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
77 |
-
)
|
78 |
-
|
79 |
-
with open(revised_word_path, "rb") as f:
|
80 |
-
st.download_button(
|
81 |
-
label="Download Revised Document",
|
82 |
-
data=f,
|
83 |
-
file_name="revised_document.docx",
|
84 |
-
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
85 |
-
)
|
86 |
-
with open(redlined_output_path, "rb") as f:
|
87 |
-
st.download_button(
|
88 |
-
label="Download Redlined Document",
|
89 |
-
data=f,
|
90 |
-
file_name="redlined_document.docx",
|
91 |
-
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
92 |
-
)
|
93 |
-
st.success("Documents created successfully!")
|
94 |
-
except Exception as e:
|
95 |
-
st.exception(e)
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
from pdfminer.high_level import extract_text
|
4 |
+
from langchain_core.prompts import PromptTemplate
|
5 |
+
from google.cloud import aiplatform
|
6 |
+
from google.cloud.aiplatform_v1 import ModelServiceClient
|
7 |
+
from google.cloud.aiplatform_v1.types import GenerateContentRequest, Document, GenerationConfig, SafetySettings, HarmCategory, HarmBlockThreshold
|
8 |
import streamlit as st
|
9 |
+
|
10 |
+
# Initialize the Google AI Platform
|
11 |
+
aiplatform.init(project="akroda", location="us-central1")
|
12 |
+
|
13 |
+
# Define the documents (base64-encoded PDF content)
|
14 |
+
documents = [
|
15 |
+
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJeODgxNz5dL1Jvb3QgMTU0IDAgUi9TaXplIDE2Nj4+CnN0YXJ0eHJlZgoyMTY0NjkKJSVFT0YK")),
|
16 |
+
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJeLjz9MKNijU+PgpzdGFydHhyZWYKMTMxMDY0CiUlRU9GCg==")),
|
17 |
+
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJeLjz9MKNiAwZDU0YTVlNzllMWRhYWY1ZDQ2YjI+XS9Sb290IDE3NyAwIFIvU2l6ZSAxODc+PgpzdGFydHhyZWYKMjA3NTk5CiUlRU9GCg==")),
|
18 |
+
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJeLjz9ML1Jvb3QgMTg5IDAgUi9TaXplIDE5OT4+CnN0YXJ0eHJlZgoxOTgzNzMKJSVFT0YK")),
|
19 |
+
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xCcnCmVuZHN0cmVhbQplbmRvYmoKc3RhcnR4cmVmCjIwOTgyNQolJUVPRgo=")),
|
20 |
+
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLj+CnN0YXJ0eHJlZgoyMTk5MDYKJSVFT0YK")),
|
21 |
+
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJiUlRU9GCg==")),
|
22 |
+
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJe90IDMwOCAwIFIvU2l6ZSAzMTg+PgpzdGFydHhyZWYKMjcwNzU3CiUlRU9GCg==")),
|
23 |
+
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjUNJeLjz9MNCjcgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgNjc1NzgvTyA5L0UgNjAyNDYvTiAxL1QgNjcyODcvSCBbIDQ4MyAxNTRdPj4NZW5kb2JxDSAgICAgICAgICAgICAgICAgICAgDQoyMiAwIG9iag08PC9EZWNvZGVQYXJtczw8L0NvbHVtbnMgNC9QcmVkaWN0b3IgMTI+Pi9GaWx0ZXIvRmxhdGVEZWNvZGUvSURbPDE3NzU4MkJFODc4MzRFQjNBOEM3RkIzQTgyRjFFMEFCPjw5MzI2Qjk4REM4NjQ2RTRCODI3MzZFQUEzOENEQjFBQj5dL0luZGV4WzcgMjhdL0luZm8gNiAwIFIvTGVuZ3RoIDgzL1ByZXYgNjcyODgvUm9vdCA4IDAgUi9TaXplIDM1L1R5cGUvWFPRg0K"))
|
24 |
+
]
|
25 |
+
|
26 |
+
text1 = """
|
27 |
+
attached are several cases and a bank disclosure. Using the cases, please provide changes to the disclosure and keep as much formatting as possible and to ensure there are no legal contradictions between the content of the disclosure and the cases and please provide reasoning for each proposed change. Please also integrate the bank's policies into the disclosure. In the first sentence, please include a reference to the account agreement "for more information on overdrafts" and a placeholder for a URL.
|
28 |
+
Here are the answers to the bank's policy questions:
|
29 |
+
Do you charge on available balance or ledger balance?: {balance_type} (which should replace money in the first sentence)
|
30 |
+
Do you charge for APSN transactions?: {apsn_transactions}
|
31 |
+
How many overdraft fees per day can be charged?: {max_fees_per_day}
|
32 |
+
What is the minimum amount overdrawn to incur a fee?: ${min_overdrawn_fee}
|
33 |
+
What is the minimum transaction amount to trigger an overdraft?: ${min_transaction_overdraft}
|
34 |
+
|
35 |
+
Please output in the following format:
|
36 |
+
{{entire updated disclosure text with changes bolded}}
|
37 |
+
------
|
38 |
+
{{reasons for each change listed and cases cited}}
|
39 |
+
"""
|
40 |
+
|
41 |
+
prompt = PromptTemplate(
|
42 |
+
input_variables=["context", "disclosure", "balance_type", "apsn_transactions", "max_fees_per_day", "min_overdrawn_fee", "min_transaction_overdraft"],
|
43 |
+
template=text1,
|
44 |
+
)
|
45 |
+
|
46 |
+
# Placeholder values for the variables used in prompt formatting
|
47 |
+
legal_cases_context = "Provide the legal context here..."
|
48 |
+
disclosure_text = "Include the initial disclosure text here..."
|
49 |
+
balance_type = "available balance"
|
50 |
+
apsn_transactions = "yes"
|
51 |
+
max_fees_per_day = 3
|
52 |
+
min_overdrawn_fee = 5
|
53 |
+
min_transaction_overdraft = 1
|
54 |
+
|
55 |
+
# Base64 encode the disclosure text
|
56 |
+
encoded_disclosure_text = base64.b64encode(disclosure_text.encode()).decode()
|
57 |
+
|
58 |
+
val = prompt.format(
|
59 |
+
context=legal_cases_context,
|
60 |
+
disclosure=encoded_disclosure_text,
|
61 |
+
balance_type=balance_type,
|
62 |
+
apsn_transactions=apsn_transactions,
|
63 |
+
max_fees_per_day=max_fees_per_day,
|
64 |
+
min_overdrawn_fee=min_overdrawn_fee,
|
65 |
+
min_transaction_overdraft=min_transaction_overdraft,
|
66 |
+
)
|
67 |
+
|
68 |
+
generation_config = GenerationConfig(
|
69 |
+
max_output_tokens=8192,
|
70 |
+
temperature=1,
|
71 |
+
top_p=0.95,
|
72 |
+
)
|
73 |
+
|
74 |
+
safety_settings = SafetySettings(
|
75 |
+
harm_category_settings={
|
76 |
+
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
77 |
+
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
78 |
+
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
79 |
+
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
|
80 |
+
}
|
81 |
+
)
|
82 |
+
|
83 |
+
def generate(document_parts, prompt_text):
|
84 |
+
model_service_client = ModelServiceClient()
|
85 |
+
model_resource_name = model_service_client.model_path("akroda", "us-central1", "gemini-1.5-pro-001")
|
86 |
+
response = model_service_client.generate_content(
|
87 |
+
request=GenerateContentRequest(
|
88 |
+
model=model_resource_name,
|
89 |
+
documents=document_parts,
|
90 |
+
prompt=prompt_text,
|
91 |
+
generation_config=generation_config,
|
92 |
+
safety_settings=safety_settings,
|
93 |
+
)
|
94 |
)
|
95 |
+
return response.generated_text
|
96 |
+
|
97 |
+
def pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
|
98 |
+
document_parts = documents
|
99 |
+
response_text = generate(document_parts, val)
|
100 |
+
return response_text
|
101 |
+
|
102 |
+
# Streamlit Interface
|
103 |
+
st.title("Bank Disclosure Update Pipeline")
|
104 |
+
st.write("Upload your document and provide the necessary details to update the bank disclosure.")
|
105 |
+
|
106 |
+
uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
|
107 |
+
|
108 |
if uploaded_file is not None:
|
109 |
+
file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type}
|
110 |
+
st.write(file_details)
|
111 |
+
|
112 |
+
# Display the content of the uploaded file
|
113 |
+
content = extract_text(uploaded_file)
|
114 |
+
st.text(content)
|
115 |
+
|
116 |
+
# Placeholder for user inputs
|
117 |
+
balance_type = st.text_input("Balance Type", "available balance")
|
118 |
+
apsn_transactions = st.text_input("APSN Transactions", "yes")
|
119 |
+
max_fees_per_day = st.number_input("Max Fees Per Day", min_value=1, value=3)
|
120 |
+
min_overdrawn_fee = st.number_input("Min Overdrawn Fee ($)", min_value=0, value=5)
|
121 |
+
min_transaction_overdraft = st.number_input("Min Transaction Overdraft ($)", min_value=0, value=1)
|
122 |
+
|
123 |
+
if st.button("Generate Updated Disclosure"):
|
124 |
+
# Run the pipeline with the provided inputs
|
125 |
+
result = pipeline(uploaded_file, "gemini-1.5-pro-001", balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft)
|
126 |
+
st.write("Updated Disclosure:")
|
127 |
+
st.text(result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -11,7 +11,6 @@ google_generativeai
|
|
11 |
pdf2docx
|
12 |
pymupdf
|
13 |
python-docx
|
14 |
-
streamlit
|
15 |
pdfplumber
|
16 |
python-docx
|
17 |
redlines
|
@@ -20,4 +19,7 @@ langchain_core
|
|
20 |
langchain_openai
|
21 |
langchain_anthropic
|
22 |
langchain_google_genai
|
23 |
-
docxcompose
|
|
|
|
|
|
|
|
11 |
pdf2docx
|
12 |
pymupdf
|
13 |
python-docx
|
|
|
14 |
pdfplumber
|
15 |
python-docx
|
16 |
redlines
|
|
|
19 |
langchain_openai
|
20 |
langchain_anthropic
|
21 |
langchain_google_genai
|
22 |
+
docxcompose
|
23 |
+
google-cloud-aiplatform==1.12.0
|
24 |
+
pdfminer.six==20201018
|
25 |
+
langchain-core
|