ankur-bohra's picture
Add basic structure
0d99179
from .model import InformationExtractedFromABillReceipt as PydanticModel
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser, OutputFixingParser
from langchain.prompts import (
ChatPromptTemplate,
HumanMessagePromptTemplate,
SystemMessagePromptTemplate,
)
model = ChatOpenAI(
temperature=0,
n=1,
model_kwargs={
'stop': None,
'top_p': 1,
'frequency_penalty': 0,
'presence_penalty': 0,
}
)
# Build category chain
system_message_prompt = SystemMessagePromptTemplate.from_template(
"You are an information extraction engine that outputs details from OCR processed "
"documents like uids, total, tax, name, currency, date, seller details, summary. You "
"may use context to make an educated guess about the currency. Use null if you are "
"unable to find certain details\n"
"{format_instructions}"
)
human_message_prompt = HumanMessagePromptTemplate.from_template("{text}")
chat_prompt = ChatPromptTemplate.from_messages(
[system_message_prompt, human_message_prompt]
)
output_parser = PydanticOutputParser(pydantic_object=PydanticModel)
fixing_parser = OutputFixingParser.from_llm(llm=model, parser=output_parser)
chain = LLMChain(llm=model, prompt=chat_prompt, output_parser=fixing_parser)
if __name__ == "__main__":
text = """amazonin
we)
Sold By :
Spigen India Pvt. Ltd.
* Rect/Killa Nos. 38//8/2 min, 192//22/1,196//2/1/1,
37//15/1, 15/2,, Adjacent to Starex School, Village
- Binola, National Highway -8, Tehsil - Manesar
Gurgaon, Haryana, 122413
IN
PAN No: ABACS5056L
GST Registration No: O6ABACS5056L12Z5
Order Number: 407-5335982-7837125
Order Date: 30.05.2023
Tax Invoice/Bill of Supply/Cash Memo
(Original for Recipient)
Billing Address :
Praveen Bohra
E-303, ParkView City 2, Sector 49, Sohna Road
GURGAON, HARYANA, 122018
IN
State/UT Code: 06
Shipping Address :
Praveen Bohra
Praveen Bohra
E-303, ParkView City 2, Sector 49, Sohna Road
GURGAON, HARYANA, 122018
IN
State/UT Code: 06
Place of supply: HARYANA
Place of delivery: HARYANA
Invoice Number : DEL5-21033
Invoice Details : HR-DEL5-918080915-2324
Invoice Date : 30.05.2023
Description at Tax |Tax /|Tax Total
p y Rate |Type |Amount|Amount
Black) | BO8BHLZHBH ( ACS01744INP )
HSN:39269099
1 |Spigen Liquid Air Back Cover Case for iPhone 12 Mini (TPU | Matte
1846.62] 1 |%846.62| 9% |CGST! %76.19 |%999.00
9% |SGST| %76.19
TOTAL:
Amount in Words:
Nine Hundred Ninety-nine only
Whether tax is payable under reverse charge - No
For Spigen India Pvt. Ltd.:
sSoigenrn
Authorized Signatory
Payment Transaction ID: Date & Time: 30/05/2023, 10:48:43 Invoice Value: Mode of Payment: Credit
2rs9ZEF8BwU9VmWiCc2Us hrs 999.00 Card
*ASSPL-Amazon Seller Services Pvt. Ltd., ARIPL-Amazon Retail India Pvt. Ltd. (only where Amazon Retail India Pvt. Ltd. fulfillment center is co-located)
Customers desirous of availing input GST credit are requested to create a Business account and purchase on Amazon.in/business from Business eligible offers
Please note that this invoice is not a demand for payment
Page 1 of 1"""
# result = chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions())
# print(result.json(indent=4))
result = chain.generate(input_list=[{"text": text, "format_instructions": fixing_parser.get_format_instructions()}])
print(result)
result = fixing_parser.parse_with_prompt(result.generations[0][0].text, chain.prompt.format_prompt(text=text, format_instructions=fixing_parser.get_format_instructions()))
print(result)
# result = chain.run(text=text, format_instructions=output_parser.get_format_instructions(), verbose=True)
# print(result)