Spaces:
Runtime error
Runtime error
File size: 7,456 Bytes
e3cb0b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
import streamlit as st
import base64
import requests
import mysql.connector
import json
from datetime import datetime
# Streamlit app title
st.title("Receipt Extractor")
# OpenAI API Key
api_key = "Your Password"
# Database connection parameters
db_config = {
'host': 'localhost',
'user': 'root',
'password': 'Your Password',
'database': 'receipts'
}
var_for = """Given the receipt image provided, extract all relevant information and structure the output as detailed JSON that matches the database schema for storing receipt headers and line items. The receipt headers should include store name, slogan, address, store manager, phone number, transaction ID, date, time, cashier, subtotal, sales tax, total, gift card, charged amount, card type, authorization code, chip read, AID, issuer, policy ID, expiration date, survey message, survey website, user ID, password, and eligibility note. The line items should include SKU, description, details, and price for each item on the receipt. Exclude any sensitive information from the output. Format the JSON as follows:
{
"receipt_headers": {
"store_name": "",
"slogan": "",
"address": "",
"store_manager": "",
"phone_number": "",
"transaction_id": "",+
"date": "",
"time": "",
"cashier": "",
"subtotal": 0,
"sales_tax": 0,
"total": 0,
"gift_card": 0,
"charged_amount": 0,
"card_type": "",
"auth_code": "",
"chip_read": "",
"aid": "",
"issuer": "",
"policy_id": "",
"expiration_date": "",
"survey_message": "",
"survey_website": "",
"user_id": "",
"password": "",
"eligibility_note": ""
},
"line_items": [
{
"sku": "",
"description": "",
"details": "",
"price": 0
}
]
}"""
# Function to encode the image
def encode_image(image):
return base64.b64encode(image.read()).decode('utf-8')
# Function to process the uploaded image and update the database
def process_image(image):
base64_image = encode_image(image)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": var_for
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 2048
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response_json = response.json()
receipt_data_str = response_json['choices'][0]['message']['content']
# Find the JSON string within the extracted content
receipt_data_json_str = receipt_data_str.split('```json')[1].split('```')[0].strip()
receipt_data = json.loads(receipt_data_json_str)
# Connect to the database
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()
# Insert into receipt_headers
header_insert_query = """
INSERT INTO receipt_headers (store_name, slogan, address, store_manager, phone_number, transaction_id, date, time, cashier, subtotal, sales_tax, total, gift_card, charged_amount, card_type, auth_code, chip_read, aid, issuer, policy_id, expiration_date, survey_message, survey_website, user_id, password, eligibility_note)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
header_info = receipt_data['receipt_headers']
line_items = receipt_data['line_items']
# Format date, time, and expiration_date
formatted_date = datetime.strptime(header_info['date'], '%m/%d/%y').strftime('%Y-%m-%d')
formatted_time = datetime.strptime(header_info['time'], '%I:%M %p').strftime('%H:%M:%S')
formatted_expiration_date = datetime.strptime(header_info['expiration_date'], '%m/%d/%Y').strftime('%Y-%m-%d')
# Prepare header values
header_values = (
header_info['store_name'],
header_info['slogan'],
header_info['address'],
header_info['store_manager'],
header_info['phone_number'],
header_info['transaction_id'],
formatted_date,
formatted_time,
header_info['cashier'],
header_info['subtotal'],
header_info['sales_tax'],
header_info['total'],
header_info['gift_card'],
header_info['charged_amount'],
header_info['card_type'],
header_info['auth_code'],
header_info['chip_read'],
header_info['aid'],
header_info['issuer'],
header_info['policy_id'],
formatted_expiration_date,
header_info['survey_message'],
header_info['survey_website'],
header_info['user_id'],
header_info['password'],
header_info['eligibility_note']
)
# Insert header values
cursor.execute(header_insert_query, header_values)
receipt_id = cursor.lastrowid
# Prepare and insert line items
line_item_insert_query = """
INSERT INTO line_items (receipt_id, sku, description, details, price)
VALUES (%s, %s, %s, %s, %s)
"""
for item in line_items:
price = float(item['price'])
line_item_values = (
receipt_id,
item['sku'],
item['description'],
item.get('details', ''),
price
)
cursor.execute(line_item_insert_query, line_item_values)
# Commit and close the connection
conn.commit()
cursor.close()
conn.close()
return receipt_data
# Streamlit tabs
tab1, tab2 = st.tabs(["Upload Receipt", "Display the Data"])
with tab1:
st.header("Upload Receipt")
uploaded_file = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Display the uploaded image
st.image(uploaded_file, caption='Uploaded Receipt', use_column_width=True)
# Process the uploaded image
receipt_data = process_image(uploaded_file)
# Display success message
st.success("Message received successfully from the LLM.")
# Display the JSON output
st.json(receipt_data)
with tab2:
st.header("Display the Data")
# Connect to the database
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()
# Fetch all records from the receipt_headers table, excluding the time column
cursor.execute("SELECT store_name, slogan, address, store_manager, phone_number, transaction_id, date, cashier, subtotal, sales_tax, total, gift_card, charged_amount, card_type, auth_code, chip_read, aid, issuer, policy_id, expiration_date, survey_message, survey_website, user_id, password, eligibility_note FROM receipt_headers;")
headers = cursor.fetchall()
# Display the headers in a table
st.subheader("Receipt Headers")
st.table(headers)
# Fetch and display all records from the line_items table
st.subheader("Line Items")
cursor.execute("SELECT * FROM line_items;")
items = cursor.fetchall()
st.table(items)
# Close the cursor and the connection
cursor.close()
conn.close() |