Spaces:

eagle0504
/

ocr-basics

App Files Files Community

eagle0504 commited on 7 days ago

Commit

b7a3caf

•

1 Parent(s): 6a12942

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -32

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ from helper import (
     custom_file_uploader, resize_image, convert_image_to_base64, post_request_and_parse_response,
     draw_bounding_boxes_for_textract, extract_text_from_textract_blocks, ChatGPTClient
 )
 # Load OpenAI API Key from environment variable
 OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
@@ -21,37 +23,57 @@ with st.sidebar:
     st.title("🖼️ Upload and Display Images")
     # Display a placeholder for uploaded image
-    st.warning("Please upload an image file!")
-    uploaded_image = st.file_uploader("Upload an Image", type=['TXT', 'PDF'], label_visibility="collapsed")
-    if uploaded_image:
-        pil_image = Image.open(uploaded_image)
-        resized_image = resize_image(pil_image)
-        with st.expander("Original Image", expanded=False):
-            st.image(pil_image, caption="Uploaded Image", use_column_width=True)
-        # Convert image to base64 and send to Textract API
-        image_base64 = convert_image_to_base64(resized_image)
-        payload = {"image": image_base64}
-        result_dict = post_request_and_parse_response(TEXTRACT_API_URL, payload)
-        # Draw bounding boxes
-        image_with_boxes = draw_bounding_boxes_for_textract(resized_image.copy(), result_dict)
-        with st.expander("Image with Bounding Boxes", expanded=True):
-            st.image(image_with_boxes, caption="Image with Bounding Boxes", use_column_width=True)
-        # Extract text from Textract
-        cleaned_up_body = extract_text_from_textract_blocks(result_dict['body'])
-        # Display JSON body in the sidebar inside an expander (default not expanded)
-        with st.expander("View JSON Body", expanded=False):
-            st.json(result_dict)
-        # Display cleaned-up body (text extracted from JSON) in the sidebar inside an expander (default not expanded)
-        with st.expander("View Cleaned-up Text", expanded=False):
-            st.text(cleaned_up_body)
     # Add some space at the bottom of the sidebar before the "Clear Session" button
     st.sidebar.markdown("<br><br><br><br>", unsafe_allow_html=True)
@@ -69,7 +91,7 @@ for message in st.session_state.messages:
         st.markdown(message["content"])
 # Initialize ChatGPTClient with session state history
-if uploaded_image:
     history_copy = st.session_state.messages.copy()
     if cleaned_up_body:
@@ -89,7 +111,7 @@ if prompt := st.chat_input("Ask me about the image"):
     st.session_state.messages.append({"role": "user", "content": prompt})
     # Generate a response using ChatGPTClient
-    if uploaded_image:
         response = bot.generate_response(prompt)
     else:
         response = "Please upload an image before asking questions."

     custom_file_uploader, resize_image, convert_image_to_base64, post_request_and_parse_response,
     draw_bounding_boxes_for_textract, extract_text_from_textract_blocks, ChatGPTClient
 )
+from pdf2image import convert_from_path
+import tempfile
 # Load OpenAI API Key from environment variable
 OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
     st.title("🖼️ Upload and Display Images")
     # Display a placeholder for uploaded image
+    st.warning("Please upload an image or a single-page PDF file!")
+    uploaded_file = st.file_uploader("Upload an Image or PDF", type=['TXT', 'PDF'], label_visibility="collapsed")
+    pil_image = None
+    if uploaded_file:
+        # Handle PDF file
+        if uploaded_file.type == "application/pdf":
+            with tempfile.NamedTemporaryFile(delete=False) as temp_pdf:
+                temp_pdf.write(uploaded_file.read())
+                temp_pdf_path = temp_pdf.name
+            # Convert PDF to image
+            try:
+                pages = convert_from_path(temp_pdf_path, dpi=200)
+                if len(pages) != 1:
+                    st.warning("Please upload a PDF with only one page!")
+                else:
+                    pil_image = pages[0]
+            except Exception as e:
+                st.error(f"Failed to convert PDF to image: {e}")
+        else:
+            # Handle image file
+            pil_image = Image.open(uploaded_file)
+        if pil_image:
+            resized_image = resize_image(pil_image)
+            with st.expander("Original Image", expanded=False):
+                st.image(pil_image, caption="Uploaded Image", use_column_width=True)
+            # Convert image to base64 and send to Textract API
+            image_base64 = convert_image_to_base64(resized_image)
+            payload = {"image": image_base64}
+            result_dict = post_request_and_parse_response(TEXTRACT_API_URL, payload)
+            # Draw bounding boxes
+            image_with_boxes = draw_bounding_boxes_for_textract(resized_image.copy(), result_dict)
+            with st.expander("Image with Bounding Boxes", expanded=True):
+                st.image(image_with_boxes, caption="Image with Bounding Boxes", use_column_width=True)
+            # Extract text from Textract
+            cleaned_up_body = extract_text_from_textract_blocks(result_dict['body'])
+            # Display JSON body in the sidebar inside an expander (default not expanded)
+            with st.expander("View JSON Body", expanded=False):
+                st.json(result_dict)
+            # Display cleaned-up body (text extracted from JSON) in the sidebar inside an expander (default not expanded)
+            with st.expander("View Cleaned-up Text", expanded=False):
+                st.text(cleaned_up_body)
     # Add some space at the bottom of the sidebar before the "Clear Session" button
     st.sidebar.markdown("<br><br><br><br>", unsafe_allow_html=True)
         st.markdown(message["content"])
 # Initialize ChatGPTClient with session state history
+if uploaded_file and pil_image:
     history_copy = st.session_state.messages.copy()
     if cleaned_up_body:
     st.session_state.messages.append({"role": "user", "content": prompt})
     # Generate a response using ChatGPTClient
+    if uploaded_file and pil_image:
         response = bot.generate_response(prompt)
     else:
         response = "Please upload an image before asking questions."