umangchaudhry commited on
Commit
0ad40ce
·
verified ·
1 Parent(s): 43c0182

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +111 -0
  2. summary_tool_questions.md +21 -0
  3. summary_tool_system_prompt.md +30 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from io import BytesIO
4
+ from tempfile import NamedTemporaryFile
5
+ from langchain.chains import create_retrieval_chain
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain_community.document_loaders import PyPDFLoader
10
+ from langchain_core.vectorstores import InMemoryVectorStore
11
+ from langchain_openai import OpenAIEmbeddings
12
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
13
+
14
+ # Function to process PDF, run Q&A, and return results
15
+ def process_pdf(api_key, uploaded_file, questions_path, prompt_path, display_placeholder):
16
+ # Set up OpenAI API key
17
+ os.environ["OPENAI_API_KEY"] = api_key
18
+
19
+ # Temporarily save the uploaded file to disk
20
+ with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
21
+ temp_pdf.write(uploaded_file.read()) # Write the uploaded file to the temp file
22
+ temp_pdf_path = temp_pdf.name
23
+
24
+ # Load the PDF document using PyPDFLoader
25
+ loader = PyPDFLoader(temp_pdf_path)
26
+ docs = loader.load()
27
+
28
+ # Split the document into smaller chunks for embedding
29
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
30
+ splits = text_splitter.split_documents(docs)
31
+
32
+ # Create vector store and retriever
33
+ vectorstore = InMemoryVectorStore.from_documents(
34
+ documents=splits, embedding=OpenAIEmbeddings()
35
+ )
36
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
37
+
38
+ # Read the system prompt from a Markdown (.md) file
39
+ if os.path.exists(prompt_path):
40
+ with open(prompt_path, "r") as file:
41
+ system_prompt = file.read()
42
+ else:
43
+ raise FileNotFoundError(f"The specified file was not found: {prompt_path}")
44
+
45
+ # Ensure the system prompt includes {context} for document input
46
+ prompt = ChatPromptTemplate.from_messages(
47
+ [
48
+ ("system", system_prompt),
49
+ ("human", "{input}"),
50
+ ]
51
+ )
52
+
53
+ # Create the retrieval and question-answering chains
54
+ llm = ChatOpenAI(model="gpt-4o")
55
+ question_answer_chain = create_stuff_documents_chain(llm, prompt, document_variable_name="context")
56
+ rag_chain = create_retrieval_chain(retriever, question_answer_chain)
57
+
58
+ # Load questions from a Markdown file
59
+ if os.path.exists(questions_path):
60
+ with open(questions_path, "r") as file:
61
+ questions = [line.strip() for line in file.readlines() if line.strip()]
62
+ else:
63
+ raise FileNotFoundError(f"The specified file was not found: {questions_path}")
64
+
65
+ # Generate question and answer pairs incrementally
66
+ qa_results = []
67
+ for question in questions:
68
+ result = rag_chain.invoke({"input": question})
69
+ answer = result["answer"]
70
+ qa_text = f"### Question: {question}\n**Answer:** {answer}\n"
71
+ qa_results.append(qa_text)
72
+ # Update the placeholder with each new Q&A pair
73
+ display_placeholder.markdown("\n".join(qa_results), unsafe_allow_html=True)
74
+
75
+ # Clean up the temporary file
76
+ os.remove(temp_pdf_path)
77
+
78
+ return qa_results
79
+
80
+ # Streamlit app layout
81
+ st.title("Climate Policy Summary Tool")
82
+
83
+ # Input OpenAI API key
84
+ api_key = st.text_input("Enter your OpenAI API key:", type="password")
85
+
86
+ # File upload section for PDF
87
+ uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")
88
+
89
+ # Define static paths for prompt and questions
90
+ prompt_file_path = "summary_tool_system_prompt.md"
91
+ questions_file_path = "summary_tool_questions.md"
92
+
93
+ # When user clicks "Generate"
94
+ if st.button("Generate") and api_key and uploaded_file:
95
+ # Create a placeholder to update with each Q&A
96
+ display_placeholder = st.empty()
97
+
98
+ with st.spinner("Processing..."):
99
+ try:
100
+ results = process_pdf(api_key, uploaded_file, questions_file_path, prompt_file_path, display_placeholder)
101
+
102
+ # Allow the user to download the results as a Markdown file
103
+ markdown_text = "\n".join(results)
104
+ st.download_button(
105
+ label="Download Results as Markdown",
106
+ data=markdown_text,
107
+ file_name="qa_results.md",
108
+ mime="text/markdown"
109
+ )
110
+ except Exception as e:
111
+ st.error(f"An error occurred: {e}")
summary_tool_questions.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Provide a summary of information available from the plan including the city name, population, geography, major industries, per capita income, or similar summary information.
2
+
3
+ To what extent does the plan address resilience, mitigation, and adaptation?
4
+
5
+ Does the plan describe a scenario of climate change the planning jurisdiction will face and for which it is planning climate policies for resilience, mitigation, and adaptation? If so, please summarize any such scenario.
6
+
7
+ What climate impacts does the plan identify as presenting climate risks for the planning jurisdiction? For each identified climate impact, summarize the plan’s description of: 1. The climate impact. 2. The climate risks it poses to the planning jurisdiction. 3. Areas or populations with the most climate vulnerability to the identified climate impact.
8
+
9
+ What resilience options does the plan identify? For each identified resilience option, summarize the information provided in the plan regarding cost, timing, location, and other important attributes.
10
+
11
+ What mitigation options does the plan identify? For each identified mitigation option, summarize the information provided in the plan regarding cost, timing, location, and other important attributes.
12
+
13
+ What adaptation options does the plan identify? For each identified adaptation option, summarize: 1. The climate threat it is addressing. 2. Information provided in the plan regarding cost, timing, location, and other important attributes.
14
+
15
+ What climate policies does the plan identify to advance climate justice? For each climate policy, summarize: 1. The climate threat it is addressing. 2. Areas or populations with the most climate vulnerability to the identified climate impact.
16
+
17
+ Is there any climate policy identified in the plan that could be a potential maladaptation? For each identified climate policy, summarize: 1. The climate threat it is addressing. 2. The resilience, mitigation, or adaptation option that the climate policy proposes.
18
+
19
+ Summarize the plan's description of how the planning jurisdiction will monitor and evaluate progress on the implementation of resilience options, mitigation options, and adaptation options proposed in the plan.
20
+
21
+ Generate a table that summarizes the following features from the document: Location/City Name, Population, Threats Identified, Measures Identified
summary_tool_system_prompt.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are a researcher specializing in extracting climate-related information from climate adaptation and resilience plans. You are provided with a document (referred to as the "plan") from a state or city in the United States (referred to as the "planning jurisdiction") that has adopted policies addressing climate change, which may pertain to resilience, mitigation, and adaptation.
2
+
3
+ Your task is to analyze the plan and answer a consistent set of questions based strictly on the content provided. Your responses must:
4
+
5
+ - **Base all answers strictly on the document.**
6
+ - **Include direct quotations formatted with citations** in the format *(Chapter name, Section header, Page number etc. if applicable)*.
7
+ - **Format all responses using Markdown syntax.**
8
+
9
+ ### **Definitions**
10
+
11
+ Refer to the following definitions when answering the questions:
12
+
13
+ - **Climate Change:** A long-term shift in weather patterns and temperatures, primarily caused by human activities emitting greenhouse gases (GHGs).
14
+ - **Greenhouse Gases (GHGs):** Atmospheric gases like CO₂, CH₄, N₂O that absorb and emit radiation, leading to the greenhouse effect.
15
+ - **Anthropogenic Emissions:** Emissions of GHGs resulting from human activities such as burning fossil fuels and deforestation.
16
+ - **Climate Impacts:** Consequences of climate-related hazards on natural and human systems, affecting lives, ecosystems, economies, and infrastructure.
17
+ - **Climate Risk:** Potential negative consequences from climate impacts, resulting from the interaction of hazard, exposure, and vulnerability.
18
+ - **Climate Vulnerability:** The degree to which a system is susceptible to harm from climate change and its ability to adapt.
19
+ - **Climate Policies:** Strategies and measures adopted to implement resilience, mitigation, and adaptation options.
20
+ - **Resilience:** The ability of systems to cope with climate hazards by maintaining essential functions and adapting to changes.
21
+ - **Resilience Options:** Strategies to build resilience through policy changes, infrastructure improvements, planning, etc.
22
+ - **Mitigation:** Efforts to reduce or prevent emission of GHGs.
23
+ - **Mitigation Options:** Technologies or practices that contribute to mitigation, like renewable energy or waste minimization.
24
+ - **Adaptation:** Adjusting systems to actual or expected climate changes to minimize harm or exploit beneficial opportunities.
25
+ - **Adaptation Options:** Strategies addressing climate change adaptation, including structural, institutional, ecological, and behavioral measures.
26
+ - **Climate Justice:** Ensuring equitable sharing of the burdens and benefits of climate change impacts.
27
+ - **Maladaptation:** Actions that may increase vulnerability to climate change or diminish resilience.
28
+ - **Scenario:** A plausible description of how the future may develop based on a coherent set of assumptions.
29
+
30
+ {context}