Spaces:
Sleeping
Sleeping
File size: 5,593 Bytes
c16c548 3a51e49 c16c548 559b6b9 1ea97da 56ac78e c16c548 56ac78e dbfae32 56ac78e 1ea97da 56ac78e c16c548 7a1bb10 53cccdd d7b9456 53cccdd 7a1bb10 53cccdd e675e34 53cccdd e675e34 c16c548 1ea97da 56ac78e d7b9456 1ea97da 56ac78e 1ea97da 56ac78e 1ea97da 56ac78e d7b9456 4aa9265 56ac78e 53cccdd 56ac78e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
from pdfminer import high_level
from langchain_core.prompts import PromptTemplate
from google.cloud import aiplatform
from google.cloud.aiplatform_v1 import ModelServiceClient
from google.cloud.aiplatform_v1.types import GenerateContentRequest, Document, GenerationConfig, SafetySettings, HarmCategory, HarmBlockThreshold
import base64
# Initialize the Google AI Platform
aiplatform.init(project="akroda", location="us-central1")
# Define the documents (base64-encoded PDF content)
documents = [
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJeODgxNz5dL1Jvb3QgMTU0IDAgUi9TaXplIDE2Nj4+CnN0YXJ0eHJlZgoyMTY0NjkKJSVFT0YK")),
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJeLjz9MKNijU+PgpzdGFydHhyZWYKMTMxMDY0CiUlRU9GCg==")),
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJeLjz9MKNiAwZDU0YTVlNzllMWRhYWY1ZDQ2YjI+XS9Sb290IDE3NyAwIFIvU2l6ZSAxODc+PgpzdGFydHhyZWYKMjA3NTk5CiUlRU9GCg==")),
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJeLjz9ML1Jvb3QgMTg5IDAgUi9TaXplIDE5OT4+CnN0YXJ0eHJlZgoxOTgzNzMKJSVFT0YK")),
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xCcnCmVuZHN0cmVhbQplbmRvYmoKc3RhcnR4cmVmCjIwOTgyNQolJUVPRgo=")),
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLj+CnN0YXJ0eHJlZgoyMTk5MDYKJSVFT0YK")),
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJiUlRU9GCg==")),
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjQKJe90IDMwOCAwIFIvU2l6ZSAzMTg+PgpzdGFydHhyZWYKMjcwNzU3CiUlRU9GCg==")),
Document(content_type="application/pdf", data=base64.b64decode("JVBERi0xLjUNJeLjz9MNCjcgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgNjc1NzgvTyA5L0UgNjAyNDYvTiAxL1QgNjcyODcvSCBbIDQ4MyAxNTRdPj4NZW5kb2JxDSAgICAgICAgICAgICAgICAgICAgDQoyMiAwIG9iag08PC9EZWNvZGVQYXJtczw8L0NvbHVtbnMgNC9QcmVkaWN0b3IgMTI+Pi9GaWx0ZXIvRmxhdGVEZWNvZGUvSURbPDE3NzU4MkJFODc4MzRFQjNBOEM3RkIzQTgyRjFFMEFCPjw5MzI2Qjk4REM4NjQ2RTRCODI3MzZFQUEzOENEQjFBQj5dL0luZGV4WzcgMjhdL0luZm8gNiAwIFIvTGVuZ3RoIDgzL1ByZXYgNjcyODgvUm9vdCA4IDAgUi9TaXplIDM1L1R5cGUvWFPRg0K"))
]
text1 = """
attached are several cases and a bank disclosure. Using the cases, please provide changes to the disclosure and keep as much formatting as possible and to ensure there are no legal contradictions between the content of the disclosure and the cases and please provide reasoning for each proposed change. Please also integrate the bank's policies into the disclosure. In the first sentence, please include a reference to the account agreement "for more information on overdrafts" and a placeholder for a URL.
Here are the answers to the bank's policy questions:
Do you charge on available balance or ledger balance?: {balance_type} (which should replace money in the first sentence)
Do you charge for APSN transactions?: {apsn_transactions}
How many overdraft fees per day can be charged?: {max_fees_per_day}
What is the minimum amount overdrawn to incur a fee?: ${min_overdrawn_fee}
What is the minimum transaction amount to trigger an overdraft?: ${min_transaction_overdraft}
Please output in the following format:
{{entire updated disclosure text with changes bolded}}
------
{{reasons for each change listed and cases cited}}
"""
prompt = PromptTemplate(
input_variables=["context", "disclosure", "balance_type", "apsn_transactions", "max_fees_per_day", "min_overdrawn_fee", "min_transaction_overdraft"],
template=text1,
)
# Placeholder values for the variables used in prompt formatting
legal_cases_context = "Provide the legal context here..."
disclosure_text = "Include the initial disclosure text here..."
balance_type = "available balance"
apsn_transactions = "yes"
max_fees_per_day = 3
min_overdrawn_fee = 5
min_transaction_overdraft = 1
# Base64 encode the disclosure text
encoded_disclosure_text = base64.b64encode(disclosure_text.encode()).decode()
val = prompt.format(
context=legal_cases_context,
disclosure=encoded_disclosure_text,
balance_type=balance_type,
apsn_transactions=apsn_transactions,
max_fees_per_day=max_fees_per_day,
min_overdrawn_fee=min_overdrawn_fee,
min_transaction_overdraft=min_transaction_overdraft,
)
generation_config = GenerationConfig(
max_output_tokens=8192,
temperature=1,
top_p=0.95,
)
safety_settings = SafetySettings(
harm_category_settings={
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}
)
def generate(document_parts, prompt_text):
model_service_client = ModelServiceClient()
model_resource_name = model_service_client.model_path("akroda", "us-central1", "gemini-1.5-pro-001")
response = model_service_client.generate_content(
request=GenerateContentRequest(
model=model_resource_name,
documents=document_parts,
prompt=prompt_text,
generation_config=generation_config,
safety_settings=safety_settings,
)
)
return response.generated_text
def pipeline(file, model_name, balance_type, apsn_transactions, max_fees_per_day, min_overdrawn_fee, min_transaction_overdraft):
document_parts = documents
response_text = generate(document_parts, val)
return response_text
|