|
import os |
|
import io |
|
import requests |
|
import streamlit as st |
|
from openai import OpenAI |
|
from PyPDF2 import PdfReader |
|
import urllib.parse |
|
from dotenv import load_dotenv |
|
from openai import OpenAI |
|
from io import BytesIO |
|
from streamlit_extras.colored_header import colored_header |
|
from streamlit_extras.add_vertical_space import add_vertical_space |
|
from streamlit_extras.switch_page_button import switch_page |
|
import json |
|
import pandas as pd |
|
from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode |
|
|
|
st.set_page_config(page_title="Building Regulations Chatbot", layout="wide", initial_sidebar_state="expanded") |
|
|
|
load_dotenv() |
|
|
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
|
|
if 'pdf_contents' not in st.session_state: |
|
st.session_state.pdf_contents = [] |
|
if 'chat_history' not in st.session_state: |
|
st.session_state.chat_history = [] |
|
if 'processed_pdfs' not in st.session_state: |
|
st.session_state.processed_pdfs = False |
|
if 'id_counter' not in st.session_state: |
|
st.session_state.id_counter = 0 |
|
if 'assistant_id' not in st.session_state: |
|
st.session_state.assistant_id = None |
|
if 'thread_id' not in st.session_state: |
|
st.session_state.thread_id = None |
|
if 'file_ids' not in st.session_state: |
|
st.session_state.file_ids = [] |
|
|
|
|
|
|
|
def get_vector_stores(): |
|
try: |
|
vector_stores = client.beta.vector_stores.list() |
|
return vector_stores |
|
except Exception as e: |
|
return f"Error retrieving vector stores: {str(e)}" |
|
|
|
def fetch_pdfs(city_code): |
|
url = f"http://91.203.213.50:5000/oereblex/{city_code}" |
|
response = requests.get(url) |
|
if response.status_code == 200: |
|
data = response.json() |
|
print("First data:", data.get('data', [])[0] if data.get('data') else None) |
|
return data.get('data', []) |
|
else: |
|
st.error(f"Failed to fetch PDFs for city code {city_code}") |
|
return None |
|
|
|
def download_pdf(url, doc_title): |
|
|
|
if not url.startswith(('http://', 'https://')): |
|
url = 'https://' + url |
|
|
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
|
|
|
|
sanitized_title = ''.join(c for c in doc_title if c.isalnum() or c in (' ', '_', '-')).rstrip() |
|
sanitized_title = sanitized_title.replace(' ', '_') |
|
filename = f"{sanitized_title}.pdf" |
|
|
|
|
|
if os.path.exists(filename): |
|
filename = f"{sanitized_title}_{st.session_state.id_counter}.pdf" |
|
st.session_state.id_counter += 1 |
|
|
|
|
|
with open(filename, 'wb') as f: |
|
f.write(response.content) |
|
|
|
return filename |
|
except requests.RequestException as e: |
|
st.error(f"Failed to download PDF from {url}. Error: {str(e)}") |
|
return None |
|
|
|
|
|
def upload_file_to_openai(file_path): |
|
try: |
|
file = client.files.create( |
|
file=open(file_path, 'rb'), |
|
purpose='assistants' |
|
) |
|
return file.id |
|
except Exception as e: |
|
st.error(f"Failed to upload file {file_path}. Error: {str(e)}") |
|
return None |
|
|
|
def create_assistant(): |
|
assistant = client.beta.assistants.create( |
|
name="Building Regulations Assistant", |
|
instructions="You are an expert on building regulations. Use the provided documents to answer questions accurately.", |
|
model="gpt-4o-mini", |
|
tools=[{"type": "file_search"}] |
|
) |
|
st.session_state.assistant_id = assistant.id |
|
return assistant.id |
|
|
|
def chat_with_assistant(file_ids, user_message): |
|
print("----- Starting chat_with_assistant -----") |
|
print("Received file_ids:", file_ids) |
|
print("Received user_message:", user_message) |
|
|
|
|
|
attachments = [{"file_id": file_id, "tools": [{"type": "file_search"}]} for file_id in file_ids] |
|
print("Attachments created:", attachments) |
|
|
|
if st.session_state.thread_id is None: |
|
print("No existing thread_id found. Creating a new thread.") |
|
thread = client.beta.threads.create( |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": user_message, |
|
"attachments": attachments, |
|
} |
|
] |
|
) |
|
st.session_state.thread_id = thread.id |
|
print("New thread created with id:", st.session_state.thread_id) |
|
else: |
|
print(f"Existing thread_id found: {st.session_state.thread_id}. Adding message to the thread.") |
|
|
|
message = client.beta.threads.messages.create( |
|
thread_id=st.session_state.thread_id, |
|
role="user", |
|
content=user_message, |
|
attachments=attachments |
|
) |
|
print("Message added to thread with id:", message.id) |
|
|
|
|
|
|
|
try: |
|
thread = client.beta.threads.retrieve(thread_id=st.session_state.thread_id) |
|
print("Retrieved thread:", thread) |
|
except Exception as e: |
|
print(f"Error retrieving thread with id {st.session_state.thread_id}: {e}") |
|
return "An error occurred while processing your request.", [] |
|
|
|
|
|
try: |
|
tool_resources = thread.tool_resources.file_search |
|
print("Thread tool resources (file_search):", tool_resources) |
|
except AttributeError: |
|
print("No tool_resources.file_search found in thread.") |
|
|
|
print("Assistant ID:", st.session_state.assistant_id) |
|
print("Thread ID:", thread.id) |
|
|
|
|
|
try: |
|
run = client.beta.threads.runs.create_and_poll( |
|
thread_id=thread.id, assistant_id=st.session_state.assistant_id |
|
) |
|
print("Run created and polled:", run) |
|
except Exception as e: |
|
print("Error during run creation and polling:", e) |
|
return "An error occurred while processing your request.", [] |
|
|
|
|
|
try: |
|
messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id)) |
|
print("Retrieved messages:", messages) |
|
except Exception as e: |
|
print("Error retrieving messages:", e) |
|
return "An error occurred while retrieving messages.", [] |
|
|
|
|
|
if messages and messages[0].content: |
|
message_content = messages[0].content[0].text |
|
print("Raw message content:", message_content) |
|
|
|
annotations = message_content.annotations |
|
print("Annotations found:", annotations) |
|
|
|
citations = [] |
|
for index, annotation in enumerate(annotations): |
|
print(f"Processing annotation {index}: {annotation.text}") |
|
message_content.value = message_content.value.replace(annotation.text, f"[{index}]") |
|
if file_citation := getattr(annotation, "file_citation", None): |
|
try: |
|
cited_file = client.files.retrieve(file_citation.file_id) |
|
citation_entry = f"[{index}] {cited_file.filename}" |
|
citations.append(citation_entry) |
|
print(f"Citation added: {citation_entry}") |
|
except Exception as e: |
|
print(f"Error retrieving cited file for annotation {index}: {e}") |
|
|
|
print("Final message content after replacements:", message_content.value) |
|
print("All citations:", citations) |
|
print("----- Ending chat_with_assistant -----") |
|
return message_content.value, citations |
|
else: |
|
print("No messages or content found in the retrieved messages.") |
|
return "No response received from the assistant.", [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
/* Style for the chat container */ |
|
.chat-container { |
|
display: flex; |
|
flex-direction: column; |
|
} |
|
|
|
/* Style for individual chat messages */ |
|
.chat-message { |
|
margin-bottom: 20px; /* Increased space between messages */ |
|
} |
|
|
|
/* Style for user messages */ |
|
.chat-message.user > div:first-child { |
|
color: #1E90FF; /* Dodger Blue for "You" */ |
|
font-size: 1.2em; |
|
margin-bottom: 5px; |
|
} |
|
|
|
/* Style for assistant messages */ |
|
.chat-message.assistant > div:first-child { |
|
color: #32CD32; /* Lime Green for "Assistant" */ |
|
font-size: 1.2em; |
|
margin-bottom: 5px; |
|
} |
|
|
|
/* Style for the message content */ |
|
.message-content { |
|
/* Removed the background color to maintain original background */ |
|
padding: 10px; |
|
border-radius: 5px; |
|
/* Optionally, you can set a semi-transparent background or match it with your theme */ |
|
/* background-color: rgba(241, 241, 241, 0.8); */ |
|
} |
|
|
|
/* Optional: Add more spacing between messages */ |
|
.chat-message.user, .chat-message.assistant { |
|
padding-top: 10px; |
|
padding-bottom: 10px; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
page = st.sidebar.selectbox("Choose a page", ["Documents", "Home", "Admin"]) |
|
|
|
if page == "Home": |
|
st.title("Building Regulations Chatbot", anchor=False) |
|
|
|
|
|
with st.sidebar: |
|
colored_header("Selected Documents", description="Documents for chat") |
|
if 'selected_pdfs' in st.session_state and not st.session_state.selected_pdfs.empty: |
|
for _, pdf in st.session_state.selected_pdfs.iterrows(): |
|
st.write(f"- {pdf['Doc Title']}") |
|
else: |
|
st.write("No documents selected. Please go to the Documents page.") |
|
|
|
|
|
colored_header("Chat", description="Ask questions about building regulations") |
|
|
|
|
|
st.markdown('<div class="chat-container" id="chat-container">', unsafe_allow_html=True) |
|
for chat in st.session_state.chat_history: |
|
with st.container(): |
|
if chat['role'] == 'user': |
|
st.markdown(f""" |
|
<div class="chat-message user"> |
|
<div><strong>You</strong></div> |
|
<div class="message-content">{chat['content']}</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
else: |
|
st.markdown(f""" |
|
<div class="chat-message assistant"> |
|
<div><strong>Assistant</strong></div> |
|
<div class="message-content">{chat['content']}</div> |
|
</div> |
|
""", unsafe_allow_html=True) |
|
st.markdown('</div>', unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
<script> |
|
const chatContainer = document.getElementById('chat-container'); |
|
if (chatContainer) { |
|
chatContainer.scrollTop = chatContainer.scrollHeight; |
|
} |
|
</script> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
with st.form("chat_form", clear_on_submit=True): |
|
user_input = st.text_area("Ask a question about building regulations...", height=100) |
|
col1, col2 = st.columns([3, 1]) |
|
with col2: |
|
submit = st.form_submit_button("Send", use_container_width=True) |
|
|
|
if submit and user_input.strip() != "": |
|
|
|
st.session_state.chat_history.append({"role": "user", "content": user_input}) |
|
print("chat history:", st.session_state.chat_history) |
|
if not st.session_state.file_ids: |
|
st.error("Please process PDFs first.") |
|
else: |
|
with st.spinner("Generating response..."): |
|
try: |
|
response, citations = chat_with_assistant(st.session_state.file_ids, user_input) |
|
|
|
print("response:", response) |
|
print("citations:", citations) |
|
st.session_state.chat_history.append({"role": "assistant", "content": response+"\n\n"+"\n".join(citations)}) |
|
print("chat history:", st.session_state.chat_history) |
|
except Exception as e: |
|
st.error(f"Error generating response: {str(e)}") |
|
|
|
|
|
st.rerun() |
|
|
|
|
|
add_vertical_space(2) |
|
st.markdown("---") |
|
col1, col2 = st.columns(2) |
|
with col1: |
|
st.caption("Powered by OpenAI GPT-4 and Pinecone") |
|
with col2: |
|
st.caption("© 2023 Your Company Name") |
|
|
|
elif page == "Documents": |
|
st.title("Document Selection") |
|
|
|
city_code_input = st.text_input("Enter city code:", key="city_code_input") |
|
load_documents_button = st.button("Load Documents", key="load_documents_button") |
|
|
|
if load_documents_button and city_code_input: |
|
with st.spinner("Fetching PDFs..."): |
|
pdfs = fetch_pdfs(city_code_input) |
|
if pdfs: |
|
st.session_state.available_pdfs = pdfs |
|
st.success(f"Found {len(pdfs)} PDFs") |
|
else: |
|
st.error("No PDFs found") |
|
|
|
if 'available_pdfs' in st.session_state: |
|
st.write(f"Total PDFs: {len(st.session_state.available_pdfs)}") |
|
|
|
|
|
df = pd.DataFrame(st.session_state.available_pdfs) |
|
|
|
|
|
df = df[['municipality', 'abbreviation', 'doc_title', 'file_title', 'file_href', 'enactment_date', 'prio']] |
|
df = df.rename(columns={ |
|
"municipality": "Municipality", |
|
"abbreviation": "Abbreviation", |
|
"doc_title": "Doc Title", |
|
"file_title": "File Title", |
|
"file_href": "File Href", |
|
"enactment_date": "Enactment Date", |
|
"prio": "Prio" |
|
}) |
|
|
|
|
|
df.insert(0, "Select", False) |
|
|
|
|
|
gb = GridOptionsBuilder.from_dataframe(df) |
|
gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True) |
|
gb.configure_column("Select", header_name="Select", cellRenderer='checkboxRenderer') |
|
gb.configure_column("File Href", cellRenderer='linkRenderer') |
|
gb.configure_selection(selection_mode="multiple", use_checkbox=True) |
|
gb.configure_side_bar() |
|
gridOptions = gb.build() |
|
|
|
|
|
grid_response = AgGrid( |
|
df, |
|
gridOptions=gridOptions, |
|
enable_enterprise_modules=True, |
|
update_mode=GridUpdateMode.MODEL_CHANGED, |
|
data_return_mode=DataReturnMode.FILTERED_AND_SORTED, |
|
fit_columns_on_grid_load=False, |
|
) |
|
|
|
|
|
selected_rows = grid_response['selected_rows'] |
|
|
|
|
|
st.write("Debug - Selected Rows Structure:", selected_rows) |
|
|
|
if st.button("Process Selected PDFs"): |
|
if len(selected_rows) > 0: |
|
|
|
st.session_state.selected_pdfs = pd.DataFrame(selected_rows) |
|
st.session_state.assistant_id = create_assistant() |
|
with st.spinner("Processing PDFs and creating/updating assistant..."): |
|
file_ids = [] |
|
|
|
for _, pdf in st.session_state.selected_pdfs.iterrows(): |
|
|
|
st.write("Debug - PDF item:", pdf) |
|
|
|
file_href = pdf['File Href'] |
|
doc_title = pdf['Doc Title'] |
|
|
|
|
|
file_name = download_pdf(file_href, doc_title) |
|
if file_name: |
|
file_path = f"./{file_name}" |
|
file_id = upload_file_to_openai(file_path) |
|
if file_id: |
|
file_ids.append(file_id) |
|
else: |
|
st.warning(f"Failed to upload {doc_title}. Skipping this file.") |
|
else: |
|
st.warning(f"Failed to download {doc_title}. Skipping this file.") |
|
|
|
st.session_state.file_ids = file_ids |
|
st.success("PDFs processed successfully. You can now chat on the Home page.") |
|
else: |
|
st.warning("Select at least one PDF.") |
|
|
|
if st.button("Go to Home"): |
|
switch_page("Home") |
|
|
|
elif page == "Admin": |
|
st.title("Admin Panel") |
|
st.header("Vector Stores Information") |
|
|
|
vector_stores = get_vector_stores() |
|
json_vector_stores = json.dumps([vs.model_dump() for vs in vector_stores]) |
|
st.write(json_vector_stores) |
|
|
|
|
|
if st.button("Back to Home"): |
|
switch_page("Home") |