mangalathkedar commited on
Commit
c313d0c
·
verified ·
1 Parent(s): f7dd71a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -21
app.py CHANGED
@@ -1,17 +1,14 @@
1
  import base64
2
  import streamlit as st
3
- from openai import OpenAI
4
- import pdf2image
5
  from PIL import Image
6
  import io
7
  import tempfile
 
8
 
9
- # Load environment variables
10
-
11
- # Initialize OpenAI client
12
- client = OpenAI()
13
-
14
-
15
 
16
  def convert_pdf_to_images(pdf_file):
17
  """Convert PDF to list of images"""
@@ -19,9 +16,13 @@ def convert_pdf_to_images(pdf_file):
19
  tmp_file.write(pdf_file.getvalue())
20
  pdf_path = tmp_file.name
21
 
22
- images = pdf2image.convert_from_path(pdf_path)
23
- os.unlink(pdf_path)
24
- return images
 
 
 
 
25
 
26
  def format_response(text):
27
  """Format the analysis response with clean styling"""
@@ -38,7 +39,7 @@ def format_response(text):
38
  # Split into pages
39
  pages = text.split("Page")
40
 
41
- for page_num, page_content in enumerate(pages[1:], 1): # Skip first empty split
42
  # Add page header
43
  formatted_text += f'<div style="margin-bottom: 30px;">'
44
  formatted_text += f'<h3 style="color: #2c3e50; margin-bottom: 15px;">Page {page_num}</h3>'
@@ -46,9 +47,7 @@ def format_response(text):
46
  # Process each line
47
  lines = page_content.split('\n')
48
  for line in lines:
49
- # Skip empty lines and lines with asterisks
50
  if line.strip() and not line.strip().startswith('*') and not line.strip().startswith('Here'):
51
- # Remove asterisks and dashes
52
  line = line.replace('**', '').replace('- ', '')
53
 
54
  if ':' in line:
@@ -60,7 +59,6 @@ def format_response(text):
60
 
61
  formatted_text += '</div>'
62
 
63
- # Add separator between pages except for the last page
64
  if page_num < len(pages) - 1:
65
  formatted_text += '<hr style="border: 1px solid #eee; margin: 20px 0;">'
66
 
@@ -76,8 +74,8 @@ def analyze_image(image):
76
 
77
  base64_image = base64.b64encode(img_byte_arr).decode("utf-8")
78
 
79
- response = client.chat.completions.create(
80
- model="gpt-4o-mini",
81
  messages=[
82
  {
83
  "role": "user",
@@ -106,14 +104,14 @@ def analyze_image(image):
106
  max_tokens=1000
107
  )
108
 
109
- return response.choices[0].message.content
110
  except Exception as e:
 
111
  return f"An error occurred: {str(e)}"
112
 
113
  def main():
114
  st.set_page_config(page_title="Document Analysis App", layout="wide")
115
 
116
- # Custom CSS to set light background and improve button styling
117
  st.markdown("""
118
  <style>
119
  .stApp {
@@ -146,7 +144,6 @@ def main():
146
 
147
  if uploaded_file is not None:
148
  if uploaded_file.type == "application/pdf":
149
- # Handle PDF
150
  with st.spinner("Processing PDF..."):
151
  images = convert_pdf_to_images(uploaded_file)
152
 
@@ -161,7 +158,6 @@ def main():
161
  st.markdown(format_response(combined_results), unsafe_allow_html=True)
162
 
163
  else:
164
- # Handle single image
165
  image = Image.open(uploaded_file)
166
 
167
  if st.button("Extract Information"):
 
1
  import base64
2
  import streamlit as st
3
+ import openai
4
+ import os
5
  from PIL import Image
6
  import io
7
  import tempfile
8
+ import pdf2image
9
 
10
+ # Set OpenAI API key from Streamlit secrets
11
+ openai.api_key = st.secrets["OPENAI_API_KEY"]
 
 
 
 
12
 
13
  def convert_pdf_to_images(pdf_file):
14
  """Convert PDF to list of images"""
 
16
  tmp_file.write(pdf_file.getvalue())
17
  pdf_path = tmp_file.name
18
 
19
+ try:
20
+ images = pdf2image.convert_from_path(pdf_path)
21
+ os.unlink(pdf_path)
22
+ return images
23
+ except Exception as e:
24
+ st.error(f"Error converting PDF: {str(e)}")
25
+ return []
26
 
27
  def format_response(text):
28
  """Format the analysis response with clean styling"""
 
39
  # Split into pages
40
  pages = text.split("Page")
41
 
42
+ for page_num, page_content in enumerate(pages[1:], 1):
43
  # Add page header
44
  formatted_text += f'<div style="margin-bottom: 30px;">'
45
  formatted_text += f'<h3 style="color: #2c3e50; margin-bottom: 15px;">Page {page_num}</h3>'
 
47
  # Process each line
48
  lines = page_content.split('\n')
49
  for line in lines:
 
50
  if line.strip() and not line.strip().startswith('*') and not line.strip().startswith('Here'):
 
51
  line = line.replace('**', '').replace('- ', '')
52
 
53
  if ':' in line:
 
59
 
60
  formatted_text += '</div>'
61
 
 
62
  if page_num < len(pages) - 1:
63
  formatted_text += '<hr style="border: 1px solid #eee; margin: 20px 0;">'
64
 
 
74
 
75
  base64_image = base64.b64encode(img_byte_arr).decode("utf-8")
76
 
77
+ response = openai.ChatCompletion.create(
78
+ model="gpt-4-vision-preview",
79
  messages=[
80
  {
81
  "role": "user",
 
104
  max_tokens=1000
105
  )
106
 
107
+ return response.choices[0].message['content']
108
  except Exception as e:
109
+ st.error(f"API Error: {str(e)}")
110
  return f"An error occurred: {str(e)}"
111
 
112
  def main():
113
  st.set_page_config(page_title="Document Analysis App", layout="wide")
114
 
 
115
  st.markdown("""
116
  <style>
117
  .stApp {
 
144
 
145
  if uploaded_file is not None:
146
  if uploaded_file.type == "application/pdf":
 
147
  with st.spinner("Processing PDF..."):
148
  images = convert_pdf_to_images(uploaded_file)
149
 
 
158
  st.markdown(format_response(combined_results), unsafe_allow_html=True)
159
 
160
  else:
 
161
  image = Image.open(uploaded_file)
162
 
163
  if st.button("Extract Information"):