Spaces:
Runtime error
Runtime error
import streamlit as st, base64, json, re | |
import mysql.connector | |
from datetime import datetime | |
from utils import ai, st_def, db | |
import pandas as pd | |
st_def.st_logo(title='👋 Scanned Recepit Extract') | |
tab1, tab2, tab3,tab4 = st.tabs(["Upload", "Display", "Save", "Database"]) | |
uploaded_file = None | |
base64_image = None | |
model = 'gpt-4-turbo' | |
with tab1: | |
st.image("./data/scanned_sample.png") | |
st.header("Upload Receipt") | |
uploaded_file = st.file_uploader("Upload scanned receipt: ", type=["jpg", "jpeg", "png", "pdf"]) | |
if uploaded_file is not None: | |
st.image(uploaded_file, caption='Uploaded Receipt') | |
base64_image = base64.b64encode(uploaded_file.read()).decode('utf-8') | |
st.text(base64_image) | |
st.success("Load successfully. Continue to next tab: Display") | |
with tab2: | |
if not base64_image: | |
st.error('Please upload scanned receipt first.') | |
else: | |
openai_api_key= st_def.st_sidebar() | |
if not openai_api_key: | |
st.info('Please enter OpenAI’s API key to continue extract the receipt uploaded.') | |
else: | |
var_for = """Given the receipt image provided, extract all relevant information and structure the output as detailed JSON that matches the database schema for storing receipt headers and line items. The receipt headers should include store name, slogan, address, store manager, phone number, transaction ID, date, time, cashier, subtotal, sales tax, total, gift card, charged amount, card type, authorization code, chip read, AID, issuer, policy ID, expiration date, survey message, survey website, user ID, password, and eligibility note. The line items should include SKU, description, details, and price for each item on the receipt. Exclude any sensitive information from the output. Format the JSON as follows: | |
{"receipt_headers": {"store_name": "", "slogan": "", "address": "", "store_manager": "", | |
"phone_number": "", "transaction_id": "", "date": "", "time": "", | |
"cashier": "", "subtotal": 0, "sales_tax": 0, "total": 0, | |
"gift_card": 0, "charged_amount": 0, "card_type": "", "auth_code": "", | |
"chip_read": "", "aid": "", "issuer": "", "policy_id": "", | |
"expiration_date": "", "survey_message": "", "survey_website": "", "user_id": "", | |
"password": "", "eligibility_note": "" }, | |
"line_items": [{"sku": "", "description": "", "details": "", "price": 0}]}""" | |
# receipt_data_str = ai.ai_vision(var_for = var_for, openai_api_key=openai_api_key, model_v=model, base64_image=base64_image) | |
# with open('re.txt', 'w') as file: | |
# file.write(receipt_data_str) | |
# st.write(receipt_data_str) | |
with open('re1.txt', 'r') as file: receipt_data_str = file.read() | |
start_index = receipt_data_str.find("{") # Find the starting index of the JSON data (excluding the leading ```) | |
end_index = receipt_data_str.rfind("}")+1 # Find the ending index of the JSON data (excluding the trailing ```) | |
json_data = receipt_data_str[start_index:end_index] # Extract the JSON data as a substring | |
receipt_dict = json.loads(json_data) | |
col1, col2 = st.columns(2) | |
with col1: | |
st.header("Scanned Receipt") | |
st.image(uploaded_file, caption='Uploaded Receipt') | |
with col2: | |
st.header("Extracted Data") | |
st.write(receipt_dict) | |
db.mysql_insert_receipt(receipt_dict) | |
# Display success message | |
st.success("Message received successfully from the LLM.") | |
with tab3: | |
cursor, conn = db.mysql_conn() | |
st.header("Display the Data") | |
# Fetch all records from the receipt_headers table, excluding the time column | |
cursor.execute("SELECT store_name, slogan, address, store_manager, phone_number, transaction_id, date, cashier, subtotal, sales_tax, total, gift_card, charged_amount, card_type, auth_code, chip_read, aid, issuer, policy_id, expiration_date, survey_message, survey_website, user_id, password, eligibility_note FROM receipt_headers;") | |
headers = cursor.fetchall() | |
# Display the headers in a table | |
st.subheader("Receipt Headers") | |
st.table(headers) | |
# Fetch and display all records from the line_items table | |
st.subheader("Line Items") | |
cursor.execute("SELECT * FROM line_items;") | |
items = cursor.fetchall() | |
st.table(items) | |
# Close the cursor and the connection | |
cursor.close() | |
conn.close() | |
with tab4: | |
# Receipt headers data | |
receipt_headers_data = { | |
"store_name": "Starbucks E Food Court #16599", | |
"slogan": "OPERATED BY HMS", | |
"address": "Harry Reid Airport", | |
"store_manager": "", | |
"phone_number": "", | |
"transaction_id": "CHK 111704", | |
"date": "12/2/2022", | |
"time": "1:23 PM", | |
"cashier": "", | |
"subtotal": 6.00, | |
"sales_tax": 0.50, | |
"total": 6.50, | |
"gift_card": 0, | |
"charged_amount": 6.50, | |
"card_type": "MasterCard", | |
"auth_code": "", | |
"chip_read": "", | |
"aid": "", | |
"issuer": "", | |
"policy_id": "", | |
"expiration_date": "", | |
"survey_message": "We value your feedback! Scan the QR code below to share your experience.", | |
"survey_website": "https://hmshost.com/contact/", | |
"user_id": "", | |
"password": "", | |
"eligibility_note": "" | |
} | |
# Line items data | |
line_items_data = [ | |
{ | |
"sku": "", | |
"description": "1 GR GRN TEA LAT", | |
"details": "To Go", | |
"price": 6.00 | |
}, | |
{ | |
"sku": "d34358axe", | |
"description": "2 GR APL", | |
"details": "To Go", | |
"price": 16.00 | |
}, | |
{ | |
"sku": "12x3ce", | |
"description": "COF BAN SILK", | |
"details": "", | |
"price": 61.00 | |
} | |
] | |
# Create DataFrames | |
receipt_headers_df = pd.DataFrame(receipt_headers_data, index=[0]) | |
line_items_df = pd.DataFrame(line_items_data) | |
# Display the DataFrames | |
st.write("Receipt Headers DataFrame:") | |
st.write(receipt_headers_df) | |
st.write("\nLine Items DataFrame:") | |
st.write(line_items_df) | |