Spaces:
Sleeping
Sleeping
File size: 4,848 Bytes
ed5ccb1 fa86a32 c313d0c fa86a32 de78b4f ed5ccb1 fa86a32 83a5b2f ed5ccb1 de78b4f 83a5b2f de78b4f 83a5b2f de78b4f ed5ccb1 e815ec8 83a5b2f ed5ccb1 83a5b2f ed5ccb1 83a5b2f ed5ccb1 fa86a32 9f3e8e6 ed5ccb1 83a5b2f ed5ccb1 fa86a32 ed5ccb1 83a5b2f fa86a32 ed5ccb1 67de82a 83a5b2f 67de82a 83a5b2f 67de82a 83a5b2f 67de82a 83a5b2f 67de82a 83a5b2f ed5ccb1 9f3e8e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import base64
import streamlit as st
from openai import OpenAI
import os
from dotenv import load_dotenv
import fitz
from PIL import Image
import io
import tempfile
# Load environment variables
load_dotenv()
# Initialize OpenAI client
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
def convert_pdf_to_images(pdf_file):
"""Convert PDF to list of images using PyMuPDF"""
images = []
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
tmp_file.write(pdf_file.getvalue())
pdf_path = tmp_file.name
pdf_document = fitz.open(pdf_path)
for page_number in range(pdf_document.page_count):
page = pdf_document[page_number]
pix = page.get_pixmap()
img_data = pix.tobytes("png")
image = Image.open(io.BytesIO(img_data))
images.append(image)
pdf_document.close()
os.unlink(pdf_path)
return images
def format_response(text):
"""Format the analysis response with clean styling"""
formatted_text = ""
# Split into pages
pages = text.split("Page")
for page_num, page_content in enumerate(pages[1:], 1): # Skip first empty split
formatted_text += f'\n### Page {page_num}\n'
# Process each line
lines = page_content.split('\n')
for line in lines:
# Skip empty lines and lines with asterisks
if line.strip() and not line.strip().startswith('*') and not line.strip().startswith('Here'):
# Remove asterisks and dashes
line = line.replace('**', '').replace('- ', '')
if ':' in line:
label, value = line.split(':', 1)
formatted_text += f'- *{label.strip()}*: {value.strip()}\n'
return formatted_text
def analyze_image(image):
"""Analyze image using OpenAI API"""
try:
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='PNG')
img_byte_arr = img_byte_arr.getvalue()
base64_image = base64.b64encode(img_byte_arr).decode("utf-8")
response = client.chat.completions.create(
model="gpt-4o-mini", # Update to the correct model name
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": """Please analyze the image and extract the following information:
- Sender information
- Recipient information
- Container details
- Weights and measurements
- Dates and reference numbers
- Cargo details
Format the response as 'Label: Value' pairs."""
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}
],
max_tokens=1000
)
return response.choices[0].message.content
except Exception as e:
return f"An error occurred: {str(e)}"
def main():
st.set_page_config(page_title="Document Analysis App", layout="wide")
st.title("Document Analysis App")
uploaded_file = st.file_uploader("Upload document (PDF/Image)", type=['pdf', 'png', 'jpg', 'jpeg'])
if uploaded_file is not None:
if uploaded_file.type == "application/pdf":
# Handle PDF
with st.spinner("Processing PDF..."):
images = convert_pdf_to_images(uploaded_file)
if st.button("Extract Information"):
with st.spinner("Analyzing document..."):
all_results = []
for i, image in enumerate(images, 1):
result = analyze_image(image)
all_results.append(f"Page {i} Information:\n{result}")
combined_results = "\n\n".join(all_results)
st.markdown(format_response(combined_results))
else:
# Handle single image
image = Image.open(uploaded_file)
if st.button("Extract Information"):
with st.spinner("Analyzing document..."):
result = analyze_image(image)
st.markdown(format_response(result))
# Call the main function directly (no need for __name__ == "__main__" in Hugging Face Spaces)
main() |