Spaces:
Sleeping
Sleeping
anmolsahai
commited on
Commit
•
53cccdd
1
Parent(s):
7a1bb10
bugfix
Browse files- app.py +1 -1
- langchain_pipeline.py +23 -8
app.py
CHANGED
@@ -18,7 +18,7 @@ def pdf_to_text_with_layout(pdf_file):
|
|
18 |
|
19 |
def clean_text(text):
|
20 |
# Remove non-ASCII and control characters
|
21 |
-
return ''.join(c for c in text if c.
|
22 |
|
23 |
def text_to_word_with_formatting(text, word_path):
|
24 |
doc = Document()
|
|
|
18 |
|
19 |
def clean_text(text):
|
20 |
# Remove non-ASCII and control characters
|
21 |
+
return ''.join(c for c in text if c.is printable() and ord(c) < 65536)
|
22 |
|
23 |
def text_to_word_with_formatting(text, word_path):
|
24 |
doc = Document()
|
langchain_pipeline.py
CHANGED
@@ -10,18 +10,20 @@ import vertexai
|
|
10 |
from vertexai.generative_models import GenerativeModel, Part, FinishReason
|
11 |
import vertexai.preview.generative_models as generative_models
|
12 |
|
13 |
-
def generate():
|
14 |
vertexai.init(project="akroda", location="us-central1")
|
15 |
model = GenerativeModel("gemini-1.5-pro-001")
|
16 |
responses = model.generate_content(
|
17 |
-
|
18 |
generation_config=generation_config,
|
19 |
safety_settings=safety_settings,
|
20 |
stream=True,
|
21 |
)
|
22 |
|
|
|
23 |
for response in responses:
|
24 |
-
|
|
|
25 |
|
26 |
document1 = Part.from_data(
|
27 |
mime_type="application/pdf",
|
@@ -61,9 +63,9 @@ What is the minimum amount overdrawn to incur a fee?: ${min_overdrawn_fee}
|
|
61 |
What is the minimum transaction amount to trigger an overdraft?: ${min_transaction_overdraft}
|
62 |
|
63 |
Please output in the following format:
|
64 |
-
{entire updated disclosure text with changes bolded}
|
65 |
------
|
66 |
-
{reasons for each change listed and cases cited}
|
67 |
"""
|
68 |
|
69 |
prompt = PromptTemplate(
|
@@ -71,9 +73,21 @@ prompt = PromptTemplate(
|
|
71 |
template=text1,
|
72 |
)
|
73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
val = prompt.format(
|
75 |
context=legal_cases_context,
|
76 |
-
disclosure=
|
77 |
balance_type=balance_type,
|
78 |
apsn_transactions=apsn_transactions,
|
79 |
max_fees_per_day=max_fees_per_day,
|
@@ -95,5 +109,6 @@ safety_settings = {
|
|
95 |
}
|
96 |
|
97 |
def pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
|
98 |
-
|
99 |
-
|
|
|
|
10 |
from vertexai.generative_models import GenerativeModel, Part, FinishReason
|
11 |
import vertexai.preview.generative_models as generative_models
|
12 |
|
13 |
+
def generate(document_parts, prompt_text):
|
14 |
vertexai.init(project="akroda", location="us-central1")
|
15 |
model = GenerativeModel("gemini-1.5-pro-001")
|
16 |
responses = model.generate_content(
|
17 |
+
document_parts,
|
18 |
generation_config=generation_config,
|
19 |
safety_settings=safety_settings,
|
20 |
stream=True,
|
21 |
)
|
22 |
|
23 |
+
response_text = ""
|
24 |
for response in responses:
|
25 |
+
response_text += response.text
|
26 |
+
return response_text
|
27 |
|
28 |
document1 = Part.from_data(
|
29 |
mime_type="application/pdf",
|
|
|
63 |
What is the minimum transaction amount to trigger an overdraft?: ${min_transaction_overdraft}
|
64 |
|
65 |
Please output in the following format:
|
66 |
+
{{entire updated disclosure text with changes bolded}}
|
67 |
------
|
68 |
+
{{reasons for each change listed and cases cited}}
|
69 |
"""
|
70 |
|
71 |
prompt = PromptTemplate(
|
|
|
73 |
template=text1,
|
74 |
)
|
75 |
|
76 |
+
# Placeholder values for the variables used in prompt formatting
|
77 |
+
legal_cases_context = "Provide the legal context here..."
|
78 |
+
disclosure_text = "Include the initial disclosure text here..."
|
79 |
+
balance_type = "available balance"
|
80 |
+
apsn_transactions = "yes"
|
81 |
+
max_fees_per_day = 3
|
82 |
+
min_overdrawn_fee = 5
|
83 |
+
min_transaction_overdraft = 1
|
84 |
+
|
85 |
+
# Base64 encode the disclosure text
|
86 |
+
encoded_disclosure_text = base64.b64encode(disclosure_text.encode()).decode()
|
87 |
+
|
88 |
val = prompt.format(
|
89 |
context=legal_cases_context,
|
90 |
+
disclosure=encoded_disclosure_text,
|
91 |
balance_type=balance_type,
|
92 |
apsn_transactions=apsn_transactions,
|
93 |
max_fees_per_day=max_fees_per_day,
|
|
|
109 |
}
|
110 |
|
111 |
def pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
|
112 |
+
document_parts = [document1, document2, document3, document4, document5, document6, document7, document8, document9]
|
113 |
+
response_text = generate(document_parts, val)
|
114 |
+
return response_text
|