eagle0504 commited on
Commit
28e97a7
·
verified ·
1 Parent(s): 629124e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -8
app.py CHANGED
@@ -2,13 +2,12 @@ import streamlit as st
2
  from PIL import Image
3
  import os
4
  import base64
 
5
  from helper import (
6
  custom_file_uploader, resize_image, convert_image_to_base64, post_request_and_parse_response,
7
  draw_bounding_boxes_for_textract, extract_text_from_textract_blocks, ChatGPTClient
8
  )
9
- import tempfile
10
- import shutil
11
- from pdf2image import convert_from_bytes
12
 
13
  # Load OpenAI API Key from environment variable
14
  OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
@@ -26,20 +25,26 @@ with st.sidebar:
26
 
27
  # Display a placeholder for uploaded image
28
  st.warning("Please upload an image or a single-page PDF file!")
29
- uploaded_file = st.file_uploader("Upload an Image or PDF", type=['TXT', 'PDF'], label_visibility="collapsed")
30
 
31
  pil_image = None
32
  if uploaded_file:
33
  # Handle PDF file
34
  if uploaded_file.type == "application/pdf":
35
  try:
36
- # Read PDF as bytes and convert to image directly
37
  pdf_bytes = uploaded_file.read()
38
- pages = convert_from_bytes(pdf_bytes, dpi=200)
39
- if len(pages) != 1:
 
 
40
  st.warning("Please upload a PDF with only one page!")
41
  else:
42
- pil_image = pages[0]
 
 
 
 
43
  except Exception as e:
44
  st.error(f"Failed to convert PDF to image: {e}")
45
  else:
 
2
  from PIL import Image
3
  import os
4
  import base64
5
+ import fitz # PyMuPDF
6
  from helper import (
7
  custom_file_uploader, resize_image, convert_image_to_base64, post_request_and_parse_response,
8
  draw_bounding_boxes_for_textract, extract_text_from_textract_blocks, ChatGPTClient
9
  )
10
+ import io
 
 
11
 
12
  # Load OpenAI API Key from environment variable
13
  OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
 
25
 
26
  # Display a placeholder for uploaded image
27
  st.warning("Please upload an image or a single-page PDF file!")
28
+ uploaded_file = st.file_uploader("Upload an Image or PDF", type=['PDF'], label_visibility="collapsed")
29
 
30
  pil_image = None
31
  if uploaded_file:
32
  # Handle PDF file
33
  if uploaded_file.type == "application/pdf":
34
  try:
35
+ # Read PDF as bytes
36
  pdf_bytes = uploaded_file.read()
37
+ pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")
38
+
39
+ # Check if the PDF has only one page
40
+ if pdf_document.page_count != 1:
41
  st.warning("Please upload a PDF with only one page!")
42
  else:
43
+ # Convert the first page to an image
44
+ page = pdf_document.load_page(0)
45
+ pix = page.get_pixmap()
46
+ image_bytes = pix.tobytes()
47
+ pil_image = Image.open(io.BytesIO(image_bytes))
48
  except Exception as e:
49
  st.error(f"Failed to convert PDF to image: {e}")
50
  else: