File size: 4,848 Bytes
ed5ccb1
 
fa86a32
c313d0c
fa86a32
de78b4f
ed5ccb1
 
 
 
fa86a32
 
83a5b2f
 
ed5ccb1
 
de78b4f
 
83a5b2f
 
 
 
 
 
 
 
 
 
 
de78b4f
83a5b2f
 
 
 
de78b4f
ed5ccb1
e815ec8
83a5b2f
 
 
 
 
 
 
ed5ccb1
83a5b2f
 
 
 
 
 
 
 
 
 
 
ed5ccb1
83a5b2f
ed5ccb1
 
 
 
 
 
 
 
 
 
fa86a32
9f3e8e6
ed5ccb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83a5b2f
ed5ccb1
 
fa86a32
ed5ccb1
83a5b2f
fa86a32
ed5ccb1
 
67de82a
 
 
 
 
83a5b2f
 
 
 
67de82a
83a5b2f
 
 
 
 
 
67de82a
83a5b2f
 
67de82a
83a5b2f
 
 
67de82a
83a5b2f
 
 
 
ed5ccb1
9f3e8e6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import base64
import streamlit as st
from openai import OpenAI
import os
from dotenv import load_dotenv
import fitz
from PIL import Image
import io
import tempfile

# Load environment variables
load_dotenv()
# Initialize OpenAI client
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))

def convert_pdf_to_images(pdf_file):
    """Convert PDF to list of images using PyMuPDF"""
    images = []
    with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
        tmp_file.write(pdf_file.getvalue())
        pdf_path = tmp_file.name
        
        pdf_document = fitz.open(pdf_path)
        for page_number in range(pdf_document.page_count):
            page = pdf_document[page_number]
            pix = page.get_pixmap()
            img_data = pix.tobytes("png")
            image = Image.open(io.BytesIO(img_data))
            images.append(image)
            
        pdf_document.close()
    os.unlink(pdf_path)
    return images


def format_response(text):
    """Format the analysis response with clean styling"""
    formatted_text = ""
    
    # Split into pages
    pages = text.split("Page")
    
    for page_num, page_content in enumerate(pages[1:], 1):  # Skip first empty split
        formatted_text += f'\n### Page {page_num}\n'
        
        # Process each line
        lines = page_content.split('\n')
        for line in lines:
            # Skip empty lines and lines with asterisks
            if line.strip() and not line.strip().startswith('*') and not line.strip().startswith('Here'):
                # Remove asterisks and dashes
                line = line.replace('**', '').replace('- ', '')
                
                if ':' in line:
                    label, value = line.split(':', 1)
                    formatted_text += f'- *{label.strip()}*: {value.strip()}\n'
        
    return formatted_text

def analyze_image(image):
    """Analyze image using OpenAI API"""
    try:
        img_byte_arr = io.BytesIO()
        image.save(img_byte_arr, format='PNG')
        img_byte_arr = img_byte_arr.getvalue()
        
        base64_image = base64.b64encode(img_byte_arr).decode("utf-8")
        
        response = client.chat.completions.create(
            model="gpt-4o-mini",  # Update to the correct model name
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": """Please analyze the image and extract the following information:
                            - Sender information
                            - Recipient information
                            - Container details
                            - Weights and measurements
                            - Dates and reference numbers
                            - Cargo details
                            
                            Format the response as 'Label: Value' pairs."""
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            },
                        },
                    ],
                }
            ],
            max_tokens=1000
        )
        
        return response.choices[0].message.content
    except Exception as e:
        return f"An error occurred: {str(e)}"

def main():
    st.set_page_config(page_title="Document Analysis App", layout="wide")

    st.title("Document Analysis App")
    uploaded_file = st.file_uploader("Upload document (PDF/Image)", type=['pdf', 'png', 'jpg', 'jpeg'])

    if uploaded_file is not None:
        if uploaded_file.type == "application/pdf":
            # Handle PDF
            with st.spinner("Processing PDF..."):
                images = convert_pdf_to_images(uploaded_file)

                if st.button("Extract Information"):
                    with st.spinner("Analyzing document..."):
                        all_results = []
                        for i, image in enumerate(images, 1):
                            result = analyze_image(image)
                            all_results.append(f"Page {i} Information:\n{result}")

                        combined_results = "\n\n".join(all_results)
                        st.markdown(format_response(combined_results))

        else:
            # Handle single image
            image = Image.open(uploaded_file)

            if st.button("Extract Information"):
                with st.spinner("Analyzing document..."):
                    result = analyze_image(image)
                    st.markdown(format_response(result))

# Call the main function directly (no need for __name__ == "__main__" in Hugging Face Spaces)
main()