File size: 4,850 Bytes
bed1057 53b8f4b bed1057 0cd5480 bed1057 b7a3caf e52360f 49f2ace bed1057 0758797 bed1057 511942b bed1057 511942b e384767 b7a3caf 53b8f4b bed1057 b7a3caf 49f2ace 53b8f4b 49f2ace b7a3caf 53b8f4b b7a3caf 511942b d471962 511942b bed1057 b7a3caf bed1057 b7a3caf bed1057 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
import streamlit as st
from PIL import Image
import os
import base64
from helper import (
custom_file_uploader, resize_image, convert_image_to_base64, post_request_and_parse_response,
draw_bounding_boxes_for_textract, extract_text_from_textract_blocks, ChatGPTClient
)
import tempfile
import shutil
from pdf2image import convert_from_bytes
# Load OpenAI API Key from environment variable
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
TEXTRACT_API_URL = os.environ["TEXTRACT_API_URL"]
st.set_page_config(page_title="💬 Chat with OCR 📝", layout="wide")
# Initialize chat history if not in session state
if "messages" not in st.session_state:
st.session_state.messages = []
# Sidebar for image upload
with st.sidebar:
st.title("🖼️ Upload and Display Images")
# Display a placeholder for uploaded image
st.warning("Please upload an image or a single-page PDF file!")
uploaded_file = st.file_uploader("Upload an Image or PDF", type=['TXT', 'PDF'], label_visibility="collapsed")
pil_image = None
if uploaded_file:
# Handle PDF file
if uploaded_file.type == "application/pdf":
try:
# Read PDF as bytes and convert to image directly
pdf_bytes = uploaded_file.read()
pages = convert_from_bytes(pdf_bytes, dpi=200)
if len(pages) != 1:
st.warning("Please upload a PDF with only one page!")
else:
pil_image = pages[0]
except Exception as e:
st.error(f"Failed to convert PDF to image: {e}")
else:
# Handle image file
pil_image = Image.open(uploaded_file)
if pil_image:
resized_image = resize_image(pil_image)
with st.expander("Original Image", expanded=False):
st.image(pil_image, caption="Uploaded Image", use_column_width=True)
# Convert image to base64 and send to Textract API
image_base64 = convert_image_to_base64(resized_image)
payload = {"image": image_base64}
result_dict = post_request_and_parse_response(TEXTRACT_API_URL, payload)
# Draw bounding boxes
image_with_boxes = draw_bounding_boxes_for_textract(resized_image.copy(), result_dict)
with st.expander("Image with Bounding Boxes", expanded=True):
st.image(image_with_boxes, caption="Image with Bounding Boxes", use_column_width=True)
# Extract text from Textract
cleaned_up_body = extract_text_from_textract_blocks(result_dict['body'])
# Display JSON body in the sidebar inside an expander (default not expanded)
with st.expander("View JSON Body", expanded=False):
st.json(result_dict)
# Display cleaned-up body (text extracted from JSON) in the sidebar inside an expander (default not expanded)
with st.expander("View Cleaned-up Text", expanded=False):
st.text(cleaned_up_body)
# Add some space at the bottom of the sidebar before the "Clear Session" button
st.sidebar.markdown("<br><br><br><br>", unsafe_allow_html=True)
# Clear session button at the bottom of the sidebar
if st.sidebar.button("Clear Session"):
st.session_state.messages = []
# Main chat interface
st.title("Chat with OCR Output")
# Display previous messages from session state
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Initialize ChatGPTClient with session state history
if uploaded_file and pil_image:
history_copy = st.session_state.messages.copy()
if cleaned_up_body:
history_copy.append({"role": "system", "content": cleaned_up_body})
bot = ChatGPTClient(
api_key=OPENAI_API_KEY,
protocol="You are fed with the text portion of json file that come out of OCR after scanning an image. User will ask you questions about this json file.",
body=cleaned_up_body
)
bot.history = history_copy # Set ChatGPT history to session state messages
# React to user input
if prompt := st.chat_input("Ask me about the image"):
# Display user message in chat container
st.chat_message("user").markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
# Generate a response using ChatGPTClient
if uploaded_file and pil_image:
response = bot.generate_response(prompt)
else:
response = "Please upload an image before asking questions."
# Display assistant message in chat container
st.chat_message("assistant").markdown(response)
st.session_state.messages.append({"role": "assistant", "content": response})
|