Spaces:
Sleeping
Sleeping
Update pages.py
Browse files
__pycache__/config.cpython-311.pyc
ADDED
Binary file (1.75 kB). View file
|
|
__pycache__/pages.cpython-311.pyc
ADDED
Binary file (2.42 kB). View file
|
|
__pycache__/pages.cpython-312.pyc
ADDED
Binary file (2.44 kB). View file
|
|
__pycache__/section_extract.cpython-311.pyc
ADDED
Binary file (13.1 kB). View file
|
|
__pycache__/section_extract.cpython-312.pyc
ADDED
Binary file (11.3 kB). View file
|
|
pages.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import streamlit as st
|
2 |
from section_extract import find_cover, find_underwriter, find_section
|
3 |
-
from config import keywords_dict, stop_keywords, anti_keywords
|
4 |
|
5 |
def home():
|
6 |
st.title("Prospectus Lens")
|
@@ -15,33 +14,22 @@ def cover():
|
|
15 |
def underwriter():
|
16 |
find_underwriter(
|
17 |
uploaded_file=st.session_state.get("uploaded_file"),
|
18 |
-
section_name="underwriter",
|
19 |
-
keywords_dict=keywords_dict
|
20 |
)
|
21 |
|
22 |
def income_statement():
|
23 |
find_section(
|
24 |
uploaded_file=st.session_state.get("uploaded_file"),
|
25 |
section_name="income_statement",
|
26 |
-
keywords_dict=keywords_dict,
|
27 |
-
stop_keywords=stop_keywords,
|
28 |
-
anti_keywords=anti_keywords
|
29 |
)
|
30 |
|
31 |
def balance_sheet():
|
32 |
find_section(
|
33 |
uploaded_file=st.session_state.get("uploaded_file"),
|
34 |
section_name="balance_sheet",
|
35 |
-
keywords_dict=keywords_dict,
|
36 |
-
stop_keywords=stop_keywords,
|
37 |
-
anti_keywords=anti_keywords
|
38 |
)
|
39 |
|
40 |
def cash_flow():
|
41 |
find_section(
|
42 |
uploaded_file=st.session_state.get("uploaded_file"),
|
43 |
section_name="cash_flow",
|
44 |
-
keywords_dict=keywords_dict,
|
45 |
-
stop_keywords=stop_keywords,
|
46 |
-
anti_keywords=anti_keywords
|
47 |
)
|
|
|
1 |
import streamlit as st
|
2 |
from section_extract import find_cover, find_underwriter, find_section
|
|
|
3 |
|
4 |
def home():
|
5 |
st.title("Prospectus Lens")
|
|
|
14 |
def underwriter():
|
15 |
find_underwriter(
|
16 |
uploaded_file=st.session_state.get("uploaded_file"),
|
|
|
|
|
17 |
)
|
18 |
|
19 |
def income_statement():
|
20 |
find_section(
|
21 |
uploaded_file=st.session_state.get("uploaded_file"),
|
22 |
section_name="income_statement",
|
|
|
|
|
|
|
23 |
)
|
24 |
|
25 |
def balance_sheet():
|
26 |
find_section(
|
27 |
uploaded_file=st.session_state.get("uploaded_file"),
|
28 |
section_name="balance_sheet",
|
|
|
|
|
|
|
29 |
)
|
30 |
|
31 |
def cash_flow():
|
32 |
find_section(
|
33 |
uploaded_file=st.session_state.get("uploaded_file"),
|
34 |
section_name="cash_flow",
|
|
|
|
|
|
|
35 |
)
|
section_extract.py
CHANGED
@@ -3,6 +3,7 @@ import re
|
|
3 |
from PyPDF2 import PdfReader, PdfWriter
|
4 |
from streamlit_pdf_viewer import pdf_viewer
|
5 |
import streamlit as st
|
|
|
6 |
|
7 |
def find_cover(uploaded_file):
|
8 |
"""
|
@@ -14,8 +15,8 @@ def find_cover(uploaded_file):
|
|
14 |
Returns:
|
15 |
None
|
16 |
"""
|
17 |
-
section_title = "
|
18 |
-
st.title(section_title)
|
19 |
|
20 |
if uploaded_file:
|
21 |
try:
|
@@ -27,7 +28,7 @@ def find_cover(uploaded_file):
|
|
27 |
pdf_writer.add_page(first_page)
|
28 |
|
29 |
# Save the first page to a temporary file
|
30 |
-
temp_first_page_path = os.path.join(f"temp_{section_title
|
31 |
with open(temp_first_page_path, "wb") as f:
|
32 |
pdf_writer.write(f)
|
33 |
|
@@ -39,19 +40,18 @@ def find_cover(uploaded_file):
|
|
39 |
st.warning("Please upload a PDF on the Home page first.")
|
40 |
|
41 |
|
42 |
-
def find_underwriter(uploaded_file
|
43 |
"""
|
44 |
Searches for pages in a PDF containing specific keywords for the 'underwriter' section and displays them,
|
45 |
starting from the last 2/3 of the PDF to improve performance.
|
46 |
|
47 |
Parameters:
|
48 |
uploaded_file: The uploaded PDF file.
|
49 |
-
section_name: The name of the section (e.g., "Underwriter").
|
50 |
-
keywords_dict: Dictionary containing keyword sets for different sections.
|
51 |
|
52 |
Returns:
|
53 |
None
|
54 |
"""
|
|
|
55 |
st.title(section_name.title())
|
56 |
|
57 |
keyword_sets = keywords_dict.get(section_name, [])
|
@@ -92,20 +92,18 @@ def find_underwriter(uploaded_file, section_name, keywords_dict):
|
|
92 |
st.warning("Please upload a PDF on the Home page first.")
|
93 |
|
94 |
|
95 |
-
def find_section(uploaded_file, section_name
|
96 |
"""
|
97 |
Extracts and displays sections of a PDF based on keyword matches.
|
98 |
|
99 |
Parameters:
|
100 |
uploaded_file: The uploaded PDF file (Streamlit file uploader object).
|
101 |
section_name: The name of the section to search for (e.g., "income_statement").
|
102 |
-
keywords_dict: A dictionary containing keyword sets for different sections.
|
103 |
-
stop_keywords: A dictionary of keywords to indicate where extraction should stop.
|
104 |
-
anti_keywords: A dictionary of keywords to exclude specific pages from the results.
|
105 |
|
106 |
Returns:
|
107 |
bool: True if processing completed without interruptions; False if stopped or an error occurred.
|
108 |
"""
|
|
|
109 |
st.title(section_name.replace("_", " ").title())
|
110 |
|
111 |
if uploaded_file:
|
|
|
3 |
from PyPDF2 import PdfReader, PdfWriter
|
4 |
from streamlit_pdf_viewer import pdf_viewer
|
5 |
import streamlit as st
|
6 |
+
from config import keywords_dict, stop_keywords, anti_keywords
|
7 |
|
8 |
def find_cover(uploaded_file):
|
9 |
"""
|
|
|
15 |
Returns:
|
16 |
None
|
17 |
"""
|
18 |
+
section_title = "cover"
|
19 |
+
st.title(section_title.title())
|
20 |
|
21 |
if uploaded_file:
|
22 |
try:
|
|
|
28 |
pdf_writer.add_page(first_page)
|
29 |
|
30 |
# Save the first page to a temporary file
|
31 |
+
temp_first_page_path = os.path.join(f"temp_{section_title}.pdf")
|
32 |
with open(temp_first_page_path, "wb") as f:
|
33 |
pdf_writer.write(f)
|
34 |
|
|
|
40 |
st.warning("Please upload a PDF on the Home page first.")
|
41 |
|
42 |
|
43 |
+
def find_underwriter(uploaded_file):
|
44 |
"""
|
45 |
Searches for pages in a PDF containing specific keywords for the 'underwriter' section and displays them,
|
46 |
starting from the last 2/3 of the PDF to improve performance.
|
47 |
|
48 |
Parameters:
|
49 |
uploaded_file: The uploaded PDF file.
|
|
|
|
|
50 |
|
51 |
Returns:
|
52 |
None
|
53 |
"""
|
54 |
+
section_name = "underwriter"
|
55 |
st.title(section_name.title())
|
56 |
|
57 |
keyword_sets = keywords_dict.get(section_name, [])
|
|
|
92 |
st.warning("Please upload a PDF on the Home page first.")
|
93 |
|
94 |
|
95 |
+
def find_section(uploaded_file, section_name):
|
96 |
"""
|
97 |
Extracts and displays sections of a PDF based on keyword matches.
|
98 |
|
99 |
Parameters:
|
100 |
uploaded_file: The uploaded PDF file (Streamlit file uploader object).
|
101 |
section_name: The name of the section to search for (e.g., "income_statement").
|
|
|
|
|
|
|
102 |
|
103 |
Returns:
|
104 |
bool: True if processing completed without interruptions; False if stopped or an error occurred.
|
105 |
"""
|
106 |
+
|
107 |
st.title(section_name.replace("_", " ").title())
|
108 |
|
109 |
if uploaded_file:
|