File size: 7,104 Bytes
e3cb0b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import streamlit as st, base64, json, re
import mysql.connector
from datetime import datetime
from utils import ai, st_def, db
import pandas as pd


st_def.st_logo(title='👋 Scanned Recepit Extract')
tab1, tab2, tab3,tab4 = st.tabs(["Upload", "Display", "Save", "Database"])
uploaded_file = None
base64_image = None
model = 'gpt-4-turbo'

with tab1:
    st.image("./data/scanned_sample.png")
    st.header("Upload Receipt")
    uploaded_file = st.file_uploader("Upload scanned receipt: ", type=["jpg", "jpeg", "png", "pdf"])
    if uploaded_file is not None:
        st.image(uploaded_file, caption='Uploaded Receipt')
        base64_image = base64.b64encode(uploaded_file.read()).decode('utf-8')
        st.text(base64_image)
        st.success("Load successfully. Continue to next tab: Display")

with tab2:
    if not base64_image:
        st.error('Please upload scanned receipt first.')
    else:
        openai_api_key= st_def.st_sidebar()
        if not openai_api_key:
            st.info('Please enter OpenAI’s API key to continue extract the receipt uploaded.')
        else: 
            var_for = """Given the receipt image provided, extract all relevant information and structure the output as detailed JSON that matches the database schema for storing receipt headers and line items. The receipt headers should include store name, slogan, address, store manager, phone number, transaction ID, date, time, cashier, subtotal, sales tax, total, gift card, charged amount, card type, authorization code, chip read, AID, issuer, policy ID, expiration date, survey message, survey website, user ID, password, and eligibility note. The line items should include SKU, description, details, and price for each item on the receipt. Exclude any sensitive information from the output. Format the JSON as follows:
                {"receipt_headers": {"store_name": "",                "slogan": "",                "address": "",                "store_manager": "",
                        "phone_number": "",                "transaction_id": "",                "date": "",                "time": "",
                        "cashier": "",                "subtotal": 0,                "sales_tax": 0,                "total": 0,
                        "gift_card": 0,                "charged_amount": 0,                "card_type": "",                "auth_code": "",
                        "chip_read": "",                "aid": "",                "issuer": "",                "policy_id": "",
                        "expiration_date": "",                "survey_message": "",                "survey_website": "",                "user_id": "",
                        "password": "",                "eligibility_note": ""            },
                    "line_items": [{"sku": "",                "description": "",                "details": "",                "price": 0}]}"""    
            # receipt_data_str = ai.ai_vision(var_for = var_for, openai_api_key=openai_api_key, model_v=model, base64_image=base64_image)
            # with open('re.txt', 'w') as file:
            #     file.write(receipt_data_str)
            # st.write(receipt_data_str)

            with open('re1.txt', 'r') as file:  receipt_data_str = file.read()
            
            start_index = receipt_data_str.find("{")    # Find the starting index of the JSON data (excluding the leading ```)
            end_index = receipt_data_str.rfind("}")+1   # Find the ending index of the JSON data (excluding the trailing ```)
            json_data = receipt_data_str[start_index:end_index] # Extract the JSON data as a substring

            receipt_dict = json.loads(json_data)
            
            col1, col2 = st.columns(2)

            with col1:
                st.header("Scanned Receipt")
                st.image(uploaded_file, caption='Uploaded Receipt')

            with col2:
                st.header("Extracted Data")
                st.write(receipt_dict)
            
            db.mysql_insert_receipt(receipt_dict)

            # Display success message
            st.success("Message received successfully from the LLM.")

    with tab3:
        cursor, conn = db.mysql_conn()
        st.header("Display the Data")
        # Fetch all records from the receipt_headers table, excluding the time column
        cursor.execute("SELECT store_name, slogan, address, store_manager, phone_number, transaction_id, date, cashier, subtotal, sales_tax, total, gift_card, charged_amount, card_type, auth_code, chip_read, aid, issuer, policy_id, expiration_date, survey_message, survey_website, user_id, password, eligibility_note FROM receipt_headers;")
        headers = cursor.fetchall()

        # Display the headers in a table
        st.subheader("Receipt Headers")
        st.table(headers)

        # Fetch and display all records from the line_items table
        st.subheader("Line Items")
        cursor.execute("SELECT * FROM line_items;")
        items = cursor.fetchall()
        st.table(items)

        # Close the cursor and the connection
        cursor.close()
        conn.close()
        
    with tab4:        
        # Receipt headers data
        receipt_headers_data = {
            "store_name": "Starbucks E Food Court #16599",
            "slogan": "OPERATED BY HMS",
            "address": "Harry Reid Airport",
            "store_manager": "",
            "phone_number": "",
            "transaction_id": "CHK 111704",
            "date": "12/2/2022",
            "time": "1:23 PM",
            "cashier": "",
            "subtotal": 6.00,
            "sales_tax": 0.50,
            "total": 6.50,
            "gift_card": 0,
            "charged_amount": 6.50,
            "card_type": "MasterCard",
            "auth_code": "",
            "chip_read": "",
            "aid": "",
            "issuer": "",
            "policy_id": "",
            "expiration_date": "",
            "survey_message": "We value your feedback! Scan the QR code below to share your experience.",
            "survey_website": "https://hmshost.com/contact/",
            "user_id": "",
            "password": "",
            "eligibility_note": ""
        }

        # Line items data
        line_items_data = [
            {
                "sku": "",
                "description": "1 GR GRN TEA LAT",
                "details": "To Go",
                "price": 6.00
            },
            {
                "sku": "d34358axe",
                "description": "2 GR APL",
                "details": "To Go",
                "price": 16.00
            },
            {
                "sku": "12x3ce",
                "description": "COF BAN SILK",
                "details": "",
                "price": 61.00
            }
        ]

        # Create DataFrames
        receipt_headers_df = pd.DataFrame(receipt_headers_data, index=[0])
        line_items_df = pd.DataFrame(line_items_data)

        # Display the DataFrames
        st.write("Receipt Headers DataFrame:")
        st.write(receipt_headers_df)

        st.write("\nLine Items DataFrame:")
        st.write(line_items_df)