Jashan1 commited on
Commit
5b40240
1 Parent(s): f3174b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +519 -520
app.py CHANGED
@@ -1,520 +1,519 @@
1
- import os
2
- import io
3
- import requests
4
- import streamlit as st
5
- from openai import OpenAI
6
- from PyPDF2 import PdfReader
7
- import urllib.parse
8
- from dotenv import load_dotenv
9
- from openai import OpenAI
10
- from io import BytesIO
11
- from streamlit_extras.colored_header import colored_header
12
- from streamlit_extras.add_vertical_space import add_vertical_space
13
- from streamlit_extras.switch_page_button import switch_page
14
- import json
15
- import pandas as pd
16
- from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode
17
- import time
18
- import random
19
- import aiohttp
20
- import asyncio
21
- from PyPDF2 import PdfWriter
22
-
23
- load_dotenv()
24
-
25
- # ---------------------- Configuration ----------------------
26
- st.set_page_config(page_title="Building Regulations Chatbot", layout="wide", initial_sidebar_state="expanded")
27
- # Load environment variables from .env file
28
- load_dotenv()
29
-
30
- # Set OpenAI API key
31
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
32
-
33
- # ---------------------- Session State Initialization ----------------------
34
-
35
- if 'pdf_contents' not in st.session_state:
36
- st.session_state.pdf_contents = []
37
- if 'chat_history' not in st.session_state:
38
- st.session_state.chat_history = []
39
- if 'processed_pdfs' not in st.session_state:
40
- st.session_state.processed_pdfs = False
41
- if 'id_counter' not in st.session_state:
42
- st.session_state.id_counter = 0
43
- if 'assistant_id' not in st.session_state:
44
- st.session_state.assistant_id = None
45
- if 'thread_id' not in st.session_state:
46
- st.session_state.thread_id = None
47
- if 'file_ids' not in st.session_state:
48
- st.session_state.file_ids = []
49
-
50
-
51
- # ---------------------- Helper Functions ----------------------
52
-
53
- def get_vector_stores():
54
- try:
55
- vector_stores = client.beta.vector_stores.list()
56
- return vector_stores
57
- except Exception as e:
58
- return f"Error retrieving vector stores: {str(e)}"
59
-
60
-
61
- def fetch_pdfs(city_code):
62
- url = f"http://91.203.213.50:5000/oereblex/{city_code}"
63
- response = requests.get(url)
64
- if response.status_code == 200:
65
- data = response.json()
66
- print("First data:", data.get('data', [])[0] if data.get('data') else None)
67
- return data.get('data', [])
68
- else:
69
- st.error(f"Failed to fetch PDFs for city code {city_code}")
70
- return None
71
-
72
-
73
- def download_pdf(url, doc_title):
74
- # Add 'https://' scheme if it's missing
75
- if not url.startswith(('http://', 'https://')):
76
- url = 'https://' + url
77
-
78
- try:
79
- response = requests.get(url)
80
- response.raise_for_status() # Raise an exception for bad status codes
81
-
82
- # Sanitize doc_title to create a valid filename
83
- sanitized_title = ''.join(c for c in doc_title if c.isalnum() or c in (' ', '_', '-')).rstrip()
84
- sanitized_title = sanitized_title.replace(' ', '_')
85
- filename = f"{sanitized_title}.pdf"
86
-
87
- # Ensure filename is unique by appending the id_counter if necessary
88
- if os.path.exists(filename):
89
- filename = f"{sanitized_title}_{st.session_state.id_counter}.pdf"
90
- st.session_state.id_counter += 1
91
-
92
- # Save the PDF content to a file
93
- with open(filename, 'wb') as f:
94
- f.write(response.content)
95
-
96
- return filename
97
- except requests.RequestException as e:
98
- st.error(f"Failed to download PDF from {url}. Error: {str(e)}")
99
- return None
100
-
101
-
102
- # Helper function to upload file to OpenAI
103
- def upload_file_to_openai(file_path):
104
- try:
105
- file = client.files.create(
106
- file=open(file_path, 'rb'),
107
- purpose='assistants'
108
- )
109
- return file.id
110
- except Exception as e:
111
- st.error(f"Failed to upload file {file_path}. Error: {str(e)}")
112
- return None
113
-
114
-
115
- def create_assistant():
116
- assistant = client.beta.assistants.create(
117
- name="Building Regulations Assistant",
118
- instructions="You are an expert on building regulations. Use the provided documents to answer questions accurately.",
119
- model="gpt-4o-mini",
120
- tools=[{"type": "file_search"}]
121
- )
122
- st.session_state.assistant_id = assistant.id
123
- return assistant.id
124
-
125
-
126
- def format_response(response, citations):
127
- """Format the response with proper markdown structure."""
128
- formatted_text = f"""
129
- ### Response
130
- {response}
131
-
132
- {"### Citations" if citations else ""}
133
- {"".join([f"- {citation}\n" for citation in citations]) if citations else ""}
134
- """
135
- return formatted_text.strip()
136
-
137
- def response_generator(response, citations):
138
- """Generator for streaming response with structured output."""
139
- # First yield the response header
140
- yield "### Response\n\n"
141
- time.sleep(0.1)
142
-
143
- # Yield the main response word by word
144
- words = response.split()
145
- for i, word in enumerate(words):
146
- yield word + " "
147
- # Add natural pauses at punctuation
148
- if word.endswith(('.', '!', '?', ':')):
149
- time.sleep(0.1)
150
- else:
151
- time.sleep(0.05)
152
-
153
- # If there are citations, yield them with proper formatting
154
- if citations:
155
- # Add some spacing before citations
156
- yield "\n\n### Citations\n\n"
157
- time.sleep(0.1)
158
-
159
- for citation in citations:
160
- yield f"- {citation}\n"
161
- time.sleep(0.05)
162
-
163
- def chat_with_assistant(file_ids, user_message):
164
- print("----- Starting chat_with_assistant -----")
165
- print("Received file_ids:", file_ids)
166
- print("Received user_message:", user_message)
167
-
168
- # Create attachments for each file_id
169
- attachments = [{"file_id": file_id, "tools": [{"type": "file_search"}]} for file_id in file_ids]
170
- print("Attachments created:", attachments)
171
-
172
- if st.session_state.thread_id is None:
173
- print("No existing thread_id found. Creating a new thread.")
174
- thread = client.beta.threads.create(
175
- messages=[
176
- {
177
- "role": "user",
178
- "content": user_message,
179
- "attachments": attachments,
180
- }
181
- ]
182
- )
183
- st.session_state.thread_id = thread.id
184
- print("New thread created with id:", st.session_state.thread_id)
185
- else:
186
- print(f"Existing thread_id found: {st.session_state.thread_id}. Adding message to the thread.")
187
- message = client.beta.threads.messages.create(
188
- thread_id=st.session_state.thread_id,
189
- role="user",
190
- content=user_message,
191
- attachments=attachments
192
- )
193
- print("Message added to thread with id:", message.id)
194
-
195
- try:
196
- thread = client.beta.threads.retrieve(thread_id=st.session_state.thread_id)
197
- print("Retrieved thread:", thread)
198
- except Exception as e:
199
- print(f"Error retrieving thread with id {st.session_state.thread_id}: {e}")
200
- return "An error occurred while processing your request.", []
201
-
202
- try:
203
- run = client.beta.threads.runs.create_and_poll(
204
- thread_id=thread.id, assistant_id=st.session_state.assistant_id
205
- )
206
- print("Run created and polled:", run)
207
- except Exception as e:
208
- print("Error during run creation and polling:", e)
209
- return "An error occurred while processing your request.", []
210
-
211
- try:
212
- messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
213
- print("Retrieved messages:", messages)
214
- except Exception as e:
215
- print("Error retrieving messages:", e)
216
- return "An error occurred while retrieving messages.", []
217
-
218
- # Process the first message content
219
- if messages and messages[0].content:
220
- message_content = messages[0].content[0].text
221
- print("Raw message content:", message_content)
222
-
223
- annotations = message_content.annotations
224
- citations = []
225
- seen_citations = set()
226
-
227
- # Process annotations and citations
228
- for index, annotation in enumerate(annotations):
229
- message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
230
- if file_citation := getattr(annotation, "file_citation", None):
231
- try:
232
- cited_file = client.files.retrieve(file_citation.file_id)
233
- citation_entry = f"[{index}] {cited_file.filename}"
234
- if citation_entry not in seen_citations:
235
- citations.append(citation_entry)
236
- seen_citations.add(citation_entry)
237
- except Exception as e:
238
- print(f"Error retrieving cited file for annotation {index}: {e}")
239
-
240
- # Create a container for the response with proper styling
241
- response_container = st.container()
242
- with response_container:
243
- message_placeholder = st.empty()
244
- streaming_content = ""
245
-
246
- # Stream the response with structure
247
- for chunk in response_generator(message_content.value, citations):
248
- streaming_content += chunk
249
- # Use markdown for proper formatting during streaming
250
- message_placeholder.markdown(streaming_content + "▌")
251
-
252
- # Final formatted response
253
- final_formatted_response = format_response(message_content.value, citations)
254
- message_placeholder.markdown(final_formatted_response)
255
-
256
- return final_formatted_response, citations
257
- else:
258
- return "No response received from the assistant.", []
259
-
260
-
261
- # ---------------------- Streamlit App ----------------------
262
-
263
- # ---------------------- Custom CSS Injection ----------------------
264
-
265
- # Inject custom CSS to style chat messages
266
- st.markdown("""
267
- <style>
268
- /* Style for the chat container */
269
- .chat-container {
270
- display: flex;
271
- flex-direction: column;
272
- gap: 1.5rem;
273
- }
274
-
275
- /* Style for individual chat messages */
276
- .chat-message {
277
- margin-bottom: 1.5rem;
278
- }
279
-
280
- /* Style for user messages */
281
- .chat-message.user > div:first-child {
282
- color: #1E90FF; /* Dodger Blue for "You" */
283
- font-weight: bold;
284
- margin-bottom: 0.5rem;
285
- }
286
-
287
- /* Style for assistant messages */
288
- .chat-message.assistant > div:first-child {
289
- color: #32CD32; /* Lime Green for "Assistant" */
290
- font-weight: bold;
291
- margin-bottom: 0.5rem;
292
- }
293
-
294
- /* Style for the message content */
295
- .message-content {
296
- padding: 1rem;
297
- border-radius: 0.5rem;
298
- line-height: 1.5;
299
- }
300
-
301
- .message-content h3 {
302
- color: #444;
303
- margin-top: 1rem;
304
- margin-bottom: 0.5rem;
305
- font-size: 1.1rem;
306
- }
307
-
308
- .message-content ul {
309
- margin-top: 0.5rem;
310
- margin-bottom: 0.5rem;
311
- padding-left: 1.5rem;
312
- }
313
-
314
- .message-content li {
315
- margin-bottom: 0.25rem;
316
- }
317
- </style>
318
- """, unsafe_allow_html=True)
319
-
320
- page = st.sidebar.selectbox("Choose a page", ["Documents", "Home", "Admin"])
321
-
322
- if page == "Home":
323
- st.title("Building Regulations Chatbot", anchor=False)
324
-
325
- # Sidebar improvements
326
- with st.sidebar:
327
- colored_header("Selected Documents", description="Documents for chat")
328
- if 'selected_pdfs' in st.session_state and not st.session_state.selected_pdfs.empty:
329
- for _, pdf in st.session_state.selected_pdfs.iterrows():
330
- st.write(f"- {pdf['Doc Title']}")
331
- else:
332
- st.write("No documents selected. Please go to the Documents page.")
333
-
334
- # Main chat area improvements
335
- colored_header("Chat", description="Ask questions about building regulations")
336
-
337
- # Chat container with custom CSS class
338
- st.markdown('<div class="chat-container" id="chat-container">', unsafe_allow_html=True)
339
- for chat in st.session_state.chat_history:
340
- with st.container():
341
- if chat['role'] == 'user':
342
- st.markdown(f"""
343
- <div class="chat-message user">
344
- <div><strong>You</strong></div>
345
- <div class="message-content">{chat['content']}</div>
346
- </div>
347
- """, unsafe_allow_html=True)
348
- else:
349
- st.markdown(f"""
350
- <div class="chat-message assistant">
351
- <div><strong>Assistant</strong></div>
352
- <div class="message-content">{chat['content']}</div>
353
- </div>
354
- """, unsafe_allow_html=True)
355
- st.markdown('</div>', unsafe_allow_html=True)
356
-
357
- # Inject JavaScript to auto-scroll the chat container
358
- st.markdown("""
359
- <script>
360
- const chatContainer = document.getElementById('chat-container');
361
- if (chatContainer) {
362
- chatContainer.scrollTop = chatContainer.scrollHeight;
363
- }
364
- </script>
365
- """, unsafe_allow_html=True)
366
-
367
- # Chat input improvements
368
- with st.form("chat_form", clear_on_submit=True):
369
- user_input = st.text_area("Ask a question about building regulations...", height=100)
370
- col1, col2 = st.columns([3, 1])
371
- with col2:
372
- submit = st.form_submit_button("Send", use_container_width=True)
373
-
374
- if submit and user_input.strip() != "":
375
- # Add user message to chat history
376
- st.session_state.chat_history.append({"role": "user", "content": user_input})
377
-
378
- if not st.session_state.file_ids:
379
- st.error("Please process PDFs first.")
380
- else:
381
- with st.spinner("Generating response..."):
382
- try:
383
- response, citations = chat_with_assistant(st.session_state.file_ids, user_input)
384
- # The response is already formatted, so we can add it directly to chat history
385
- st.session_state.chat_history.append({
386
- "role": "assistant",
387
- "content": response
388
- })
389
- except Exception as e:
390
- st.error(f"Error generating response: {str(e)}")
391
-
392
- # Rerun the app to update the chat display
393
- st.rerun()
394
-
395
- # Footer improvements
396
- add_vertical_space(2)
397
- st.markdown("---")
398
- col1, col2 = st.columns(2)
399
- with col1:
400
- st.caption("Powered by OpenAI GPT-4 and Pinecone")
401
- with col2:
402
- st.caption("© 2023 Your Company Name")
403
-
404
- elif page == "Documents":
405
- st.title("Document Selection")
406
-
407
- city_code_input = st.text_input("Enter city code:", key="city_code_input")
408
- load_documents_button = st.button("Load Documents", key="load_documents_button")
409
-
410
- if load_documents_button and city_code_input:
411
- with st.spinner("Fetching PDFs..."):
412
- pdfs = fetch_pdfs(city_code_input)
413
- if pdfs:
414
- st.session_state.available_pdfs = pdfs
415
- st.success(f"Found {len(pdfs)} PDFs")
416
- else:
417
- st.error("No PDFs found")
418
-
419
- if 'available_pdfs' in st.session_state:
420
- st.write(f"Total PDFs: {len(st.session_state.available_pdfs)}")
421
-
422
- # Create a DataFrame from the available PDFs
423
- df = pd.DataFrame(st.session_state.available_pdfs)
424
-
425
- # Select and rename only the specified columns
426
- df = df[['municipality', 'abbreviation', 'doc_title', 'file_title', 'file_href', 'enactment_date', 'prio']]
427
- df = df.rename(columns={
428
- "municipality": "Municipality",
429
- "abbreviation": "Abbreviation",
430
- "doc_title": "Doc Title",
431
- "file_title": "File Title",
432
- "file_href": "File Href",
433
- "enactment_date": "Enactment Date",
434
- "prio": "Prio"
435
- })
436
-
437
- # Add a checkbox column to the DataFrame at the beginning
438
- df.insert(0, "Select", False)
439
-
440
- # Configure grid options
441
- gb = GridOptionsBuilder.from_dataframe(df)
442
- gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
443
- gb.configure_column("Select", header_name="Select", cellRenderer='checkboxRenderer')
444
- gb.configure_column("File Href", cellRenderer='linkRenderer')
445
- gb.configure_selection(selection_mode="multiple", use_checkbox=True)
446
- gb.configure_side_bar()
447
- gridOptions = gb.build()
448
-
449
- # Display the AgGrid
450
- grid_response = AgGrid(
451
- df,
452
- gridOptions=gridOptions,
453
- enable_enterprise_modules=True,
454
- update_mode=GridUpdateMode.MODEL_CHANGED,
455
- data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
456
- fit_columns_on_grid_load=False,
457
- )
458
-
459
- # Get the selected rows
460
- selected_rows = grid_response['selected_rows']
461
-
462
- # Debug: Print the structure of selected_rows
463
- st.write("Debug - Selected Rows Structure:", selected_rows)
464
-
465
- if st.button("Process Selected PDFs"):
466
- if len(selected_rows) > 0: # Check if there are any selected rows
467
- # Convert selected_rows to a DataFrame
468
- st.session_state.selected_pdfs = pd.DataFrame(selected_rows)
469
- st.session_state.assistant_id = create_assistant()
470
- with st.spinner("Processing PDFs and creating/updating assistant..."):
471
- file_ids = []
472
-
473
- for _, pdf in st.session_state.selected_pdfs.iterrows():
474
- # Debug: Print each pdf item
475
- st.write("Debug - PDF item:", pdf)
476
-
477
- file_href = pdf['File Href']
478
- doc_title = pdf['Doc Title']
479
-
480
- # Pass doc_title to download_pdf
481
- file_name = download_pdf(file_href, doc_title)
482
- if file_name:
483
- file_path = f"./{file_name}"
484
- file_id = upload_file_to_openai(file_path)
485
- if file_id:
486
- file_ids.append(file_id)
487
- else:
488
- st.warning(f"Failed to upload {doc_title}. Skipping this file.")
489
- else:
490
- st.warning(f"Failed to download {doc_title}. Skipping this file.")
491
-
492
- st.session_state.file_ids = file_ids
493
- st.success("PDFs processed successfully. You can now chat on the Home page.")
494
- else:
495
- st.warning("Select at least one PDF.")
496
-
497
-
498
- elif page == "Admin":
499
- st.title("Admin Panel")
500
- st.header("Vector Stores Information")
501
-
502
- vector_stores = get_vector_stores()
503
- json_vector_stores = json.dumps([vs.model_dump() for vs in vector_stores])
504
- st.write(json_vector_stores)
505
-
506
- # Add a button to go back to the main page
507
-
508
-
509
-
510
-
511
-
512
-
513
-
514
-
515
-
516
-
517
-
518
-
519
-
520
-
 
1
+ import os
2
+ import io
3
+ import requests
4
+ import streamlit as st
5
+ from openai import OpenAI
6
+ from PyPDF2 import PdfReader
7
+ import urllib.parse
8
+ from dotenv import load_dotenv
9
+ from openai import OpenAI
10
+ from io import BytesIO
11
+ from streamlit_extras.colored_header import colored_header
12
+ from streamlit_extras.add_vertical_space import add_vertical_space
13
+ from streamlit_extras.switch_page_button import switch_page
14
+ import json
15
+ import pandas as pd
16
+ from st_aggrid import AgGrid, GridOptionsBuilder, GridUpdateMode, DataReturnMode
17
+ import time
18
+ import random
19
+ import aiohttp
20
+ import asyncio
21
+ from PyPDF2 import PdfWriter
22
+
23
+ load_dotenv()
24
+
25
+ # ---------------------- Configuration ----------------------
26
+ st.set_page_config(page_title="Building Regulations Chatbot", layout="wide", initial_sidebar_state="expanded")
27
+ # Load environment variables from .env file
28
+ load_dotenv()
29
+
30
+ # Set OpenAI API key
31
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
32
+
33
+ # ---------------------- Session State Initialization ----------------------
34
+
35
+ if 'pdf_contents' not in st.session_state:
36
+ st.session_state.pdf_contents = []
37
+ if 'chat_history' not in st.session_state:
38
+ st.session_state.chat_history = []
39
+ if 'processed_pdfs' not in st.session_state:
40
+ st.session_state.processed_pdfs = False
41
+ if 'id_counter' not in st.session_state:
42
+ st.session_state.id_counter = 0
43
+ if 'assistant_id' not in st.session_state:
44
+ st.session_state.assistant_id = None
45
+ if 'thread_id' not in st.session_state:
46
+ st.session_state.thread_id = None
47
+ if 'file_ids' not in st.session_state:
48
+ st.session_state.file_ids = []
49
+
50
+
51
+ # ---------------------- Helper Functions ----------------------
52
+
53
+ def get_vector_stores():
54
+ try:
55
+ vector_stores = client.beta.vector_stores.list()
56
+ return vector_stores
57
+ except Exception as e:
58
+ return f"Error retrieving vector stores: {str(e)}"
59
+
60
+
61
+ def fetch_pdfs(city_code):
62
+ url = f"http://91.203.213.50:5000/oereblex/{city_code}"
63
+ response = requests.get(url)
64
+ if response.status_code == 200:
65
+ data = response.json()
66
+ print("First data:", data.get('data', [])[0] if data.get('data') else None)
67
+ return data.get('data', [])
68
+ else:
69
+ st.error(f"Failed to fetch PDFs for city code {city_code}")
70
+ return None
71
+
72
+
73
+ def download_pdf(url, doc_title):
74
+ # Add 'https://' scheme if it's missing
75
+ if not url.startswith(('http://', 'https://')):
76
+ url = 'https://' + url
77
+
78
+ try:
79
+ response = requests.get(url)
80
+ response.raise_for_status() # Raise an exception for bad status codes
81
+
82
+ # Sanitize doc_title to create a valid filename
83
+ sanitized_title = ''.join(c for c in doc_title if c.isalnum() or c in (' ', '_', '-')).rstrip()
84
+ sanitized_title = sanitized_title.replace(' ', '_')
85
+ filename = f"{sanitized_title}.pdf"
86
+
87
+ # Ensure filename is unique by appending the id_counter if necessary
88
+ if os.path.exists(filename):
89
+ filename = f"{sanitized_title}_{st.session_state.id_counter}.pdf"
90
+ st.session_state.id_counter += 1
91
+
92
+ # Save the PDF content to a file
93
+ with open(filename, 'wb') as f:
94
+ f.write(response.content)
95
+
96
+ return filename
97
+ except requests.RequestException as e:
98
+ st.error(f"Failed to download PDF from {url}. Error: {str(e)}")
99
+ return None
100
+
101
+
102
+ # Helper function to upload file to OpenAI
103
+ def upload_file_to_openai(file_path):
104
+ try:
105
+ file = client.files.create(
106
+ file=open(file_path, 'rb'),
107
+ purpose='assistants'
108
+ )
109
+ return file.id
110
+ except Exception as e:
111
+ st.error(f"Failed to upload file {file_path}. Error: {str(e)}")
112
+ return None
113
+
114
+
115
+ def create_assistant():
116
+ assistant = client.beta.assistants.create(
117
+ name="Building Regulations Assistant",
118
+ instructions="You are an expert on building regulations. Use the provided documents to answer questions accurately.",
119
+ model="gpt-4o-mini",
120
+ tools=[{"type": "file_search"}]
121
+ )
122
+ st.session_state.assistant_id = assistant.id
123
+ return assistant.id
124
+
125
+
126
+ def format_response(response, citations):
127
+ """Format the response with proper markdown structure."""
128
+ formatted_text = f"""
129
+ ### Response
130
+ {response}
131
+
132
+ {"### Citations" if citations else ""}
133
+ {"".join([f"- {citation}\n" for citation in citations]) if citations else ""}
134
+ """return formatted_text.strip()
135
+
136
+ def response_generator(response, citations):
137
+ """Generator for streaming response with structured output."""
138
+ # First yield the response header
139
+ yield "### Response\n\n"
140
+ time.sleep(0.1)
141
+
142
+ # Yield the main response word by word
143
+ words = response.split()
144
+ for i, word in enumerate(words):
145
+ yield word + " "
146
+ # Add natural pauses at punctuation
147
+ if word.endswith(('.', '!', '?', ':')):
148
+ time.sleep(0.1)
149
+ else:
150
+ time.sleep(0.05)
151
+
152
+ # If there are citations, yield them with proper formatting
153
+ if citations:
154
+ # Add some spacing before citations
155
+ yield "\n\n### Citations\n\n"
156
+ time.sleep(0.1)
157
+
158
+ for citation in citations:
159
+ yield f"- {citation}\n"
160
+ time.sleep(0.05)
161
+
162
+ def chat_with_assistant(file_ids, user_message):
163
+ print("----- Starting chat_with_assistant -----")
164
+ print("Received file_ids:", file_ids)
165
+ print("Received user_message:", user_message)
166
+
167
+ # Create attachments for each file_id
168
+ attachments = [{"file_id": file_id, "tools": [{"type": "file_search"}]} for file_id in file_ids]
169
+ print("Attachments created:", attachments)
170
+
171
+ if st.session_state.thread_id is None:
172
+ print("No existing thread_id found. Creating a new thread.")
173
+ thread = client.beta.threads.create(
174
+ messages=[
175
+ {
176
+ "role": "user",
177
+ "content": user_message,
178
+ "attachments": attachments,
179
+ }
180
+ ]
181
+ )
182
+ st.session_state.thread_id = thread.id
183
+ print("New thread created with id:", st.session_state.thread_id)
184
+ else:
185
+ print(f"Existing thread_id found: {st.session_state.thread_id}. Adding message to the thread.")
186
+ message = client.beta.threads.messages.create(
187
+ thread_id=st.session_state.thread_id,
188
+ role="user",
189
+ content=user_message,
190
+ attachments=attachments
191
+ )
192
+ print("Message added to thread with id:", message.id)
193
+
194
+ try:
195
+ thread = client.beta.threads.retrieve(thread_id=st.session_state.thread_id)
196
+ print("Retrieved thread:", thread)
197
+ except Exception as e:
198
+ print(f"Error retrieving thread with id {st.session_state.thread_id}: {e}")
199
+ return "An error occurred while processing your request.", []
200
+
201
+ try:
202
+ run = client.beta.threads.runs.create_and_poll(
203
+ thread_id=thread.id, assistant_id=st.session_state.assistant_id
204
+ )
205
+ print("Run created and polled:", run)
206
+ except Exception as e:
207
+ print("Error during run creation and polling:", e)
208
+ return "An error occurred while processing your request.", []
209
+
210
+ try:
211
+ messages = list(client.beta.threads.messages.list(thread_id=thread.id, run_id=run.id))
212
+ print("Retrieved messages:", messages)
213
+ except Exception as e:
214
+ print("Error retrieving messages:", e)
215
+ return "An error occurred while retrieving messages.", []
216
+
217
+ # Process the first message content
218
+ if messages and messages[0].content:
219
+ message_content = messages[0].content[0].text
220
+ print("Raw message content:", message_content)
221
+
222
+ annotations = message_content.annotations
223
+ citations = []
224
+ seen_citations = set()
225
+
226
+ # Process annotations and citations
227
+ for index, annotation in enumerate(annotations):
228
+ message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
229
+ if file_citation := getattr(annotation, "file_citation", None):
230
+ try:
231
+ cited_file = client.files.retrieve(file_citation.file_id)
232
+ citation_entry = f"[{index}] {cited_file.filename}"
233
+ if citation_entry not in seen_citations:
234
+ citations.append(citation_entry)
235
+ seen_citations.add(citation_entry)
236
+ except Exception as e:
237
+ print(f"Error retrieving cited file for annotation {index}: {e}")
238
+
239
+ # Create a container for the response with proper styling
240
+ response_container = st.container()
241
+ with response_container:
242
+ message_placeholder = st.empty()
243
+ streaming_content = ""
244
+
245
+ # Stream the response with structure
246
+ for chunk in response_generator(message_content.value, citations):
247
+ streaming_content += chunk
248
+ # Use markdown for proper formatting during streaming
249
+ message_placeholder.markdown(streaming_content + "▌")
250
+
251
+ # Final formatted response
252
+ final_formatted_response = format_response(message_content.value, citations)
253
+ message_placeholder.markdown(final_formatted_response)
254
+
255
+ return final_formatted_response, citations
256
+ else:
257
+ return "No response received from the assistant.", []
258
+
259
+
260
+ # ---------------------- Streamlit App ----------------------
261
+
262
+ # ---------------------- Custom CSS Injection ----------------------
263
+
264
+ # Inject custom CSS to style chat messages
265
+ st.markdown("""
266
+ <style>
267
+ /* Style for the chat container */
268
+ .chat-container {
269
+ display: flex;
270
+ flex-direction: column;
271
+ gap: 1.5rem;
272
+ }
273
+
274
+ /* Style for individual chat messages */
275
+ .chat-message {
276
+ margin-bottom: 1.5rem;
277
+ }
278
+
279
+ /* Style for user messages */
280
+ .chat-message.user > div:first-child {
281
+ color: #1E90FF; /* Dodger Blue for "You" */
282
+ font-weight: bold;
283
+ margin-bottom: 0.5rem;
284
+ }
285
+
286
+ /* Style for assistant messages */
287
+ .chat-message.assistant > div:first-child {
288
+ color: #32CD32; /* Lime Green for "Assistant" */
289
+ font-weight: bold;
290
+ margin-bottom: 0.5rem;
291
+ }
292
+
293
+ /* Style for the message content */
294
+ .message-content {
295
+ padding: 1rem;
296
+ border-radius: 0.5rem;
297
+ line-height: 1.5;
298
+ }
299
+
300
+ .message-content h3 {
301
+ color: #444;
302
+ margin-top: 1rem;
303
+ margin-bottom: 0.5rem;
304
+ font-size: 1.1rem;
305
+ }
306
+
307
+ .message-content ul {
308
+ margin-top: 0.5rem;
309
+ margin-bottom: 0.5rem;
310
+ padding-left: 1.5rem;
311
+ }
312
+
313
+ .message-content li {
314
+ margin-bottom: 0.25rem;
315
+ }
316
+ </style>
317
+ """, unsafe_allow_html=True)
318
+
319
+ page = st.sidebar.selectbox("Choose a page", ["Documents", "Home", "Admin"])
320
+
321
+ if page == "Home":
322
+ st.title("Building Regulations Chatbot", anchor=False)
323
+
324
+ # Sidebar improvements
325
+ with st.sidebar:
326
+ colored_header("Selected Documents", description="Documents for chat")
327
+ if 'selected_pdfs' in st.session_state and not st.session_state.selected_pdfs.empty:
328
+ for _, pdf in st.session_state.selected_pdfs.iterrows():
329
+ st.write(f"- {pdf['Doc Title']}")
330
+ else:
331
+ st.write("No documents selected. Please go to the Documents page.")
332
+
333
+ # Main chat area improvements
334
+ colored_header("Chat", description="Ask questions about building regulations")
335
+
336
+ # Chat container with custom CSS class
337
+ st.markdown('<div class="chat-container" id="chat-container">', unsafe_allow_html=True)
338
+ for chat in st.session_state.chat_history:
339
+ with st.container():
340
+ if chat['role'] == 'user':
341
+ st.markdown(f"""
342
+ <div class="chat-message user">
343
+ <div><strong>You</strong></div>
344
+ <div class="message-content">{chat['content']}</div>
345
+ </div>
346
+ """, unsafe_allow_html=True)
347
+ else:
348
+ st.markdown(f"""
349
+ <div class="chat-message assistant">
350
+ <div><strong>Assistant</strong></div>
351
+ <div class="message-content">{chat['content']}</div>
352
+ </div>
353
+ """, unsafe_allow_html=True)
354
+ st.markdown('</div>', unsafe_allow_html=True)
355
+
356
+ # Inject JavaScript to auto-scroll the chat container
357
+ st.markdown("""
358
+ <script>
359
+ const chatContainer = document.getElementById('chat-container');
360
+ if (chatContainer) {
361
+ chatContainer.scrollTop = chatContainer.scrollHeight;
362
+ }
363
+ </script>
364
+ """, unsafe_allow_html=True)
365
+
366
+ # Chat input improvements
367
+ with st.form("chat_form", clear_on_submit=True):
368
+ user_input = st.text_area("Ask a question about building regulations...", height=100)
369
+ col1, col2 = st.columns([3, 1])
370
+ with col2:
371
+ submit = st.form_submit_button("Send", use_container_width=True)
372
+
373
+ if submit and user_input.strip() != "":
374
+ # Add user message to chat history
375
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
376
+
377
+ if not st.session_state.file_ids:
378
+ st.error("Please process PDFs first.")
379
+ else:
380
+ with st.spinner("Generating response..."):
381
+ try:
382
+ response, citations = chat_with_assistant(st.session_state.file_ids, user_input)
383
+ # The response is already formatted, so we can add it directly to chat history
384
+ st.session_state.chat_history.append({
385
+ "role": "assistant",
386
+ "content": response
387
+ })
388
+ except Exception as e:
389
+ st.error(f"Error generating response: {str(e)}")
390
+
391
+ # Rerun the app to update the chat display
392
+ st.rerun()
393
+
394
+ # Footer improvements
395
+ add_vertical_space(2)
396
+ st.markdown("---")
397
+ col1, col2 = st.columns(2)
398
+ with col1:
399
+ st.caption("Powered by OpenAI GPT-4 and Pinecone")
400
+ with col2:
401
+ st.caption("© 2023 Your Company Name")
402
+
403
+ elif page == "Documents":
404
+ st.title("Document Selection")
405
+
406
+ city_code_input = st.text_input("Enter city code:", key="city_code_input")
407
+ load_documents_button = st.button("Load Documents", key="load_documents_button")
408
+
409
+ if load_documents_button and city_code_input:
410
+ with st.spinner("Fetching PDFs..."):
411
+ pdfs = fetch_pdfs(city_code_input)
412
+ if pdfs:
413
+ st.session_state.available_pdfs = pdfs
414
+ st.success(f"Found {len(pdfs)} PDFs")
415
+ else:
416
+ st.error("No PDFs found")
417
+
418
+ if 'available_pdfs' in st.session_state:
419
+ st.write(f"Total PDFs: {len(st.session_state.available_pdfs)}")
420
+
421
+ # Create a DataFrame from the available PDFs
422
+ df = pd.DataFrame(st.session_state.available_pdfs)
423
+
424
+ # Select and rename only the specified columns
425
+ df = df[['municipality', 'abbreviation', 'doc_title', 'file_title', 'file_href', 'enactment_date', 'prio']]
426
+ df = df.rename(columns={
427
+ "municipality": "Municipality",
428
+ "abbreviation": "Abbreviation",
429
+ "doc_title": "Doc Title",
430
+ "file_title": "File Title",
431
+ "file_href": "File Href",
432
+ "enactment_date": "Enactment Date",
433
+ "prio": "Prio"
434
+ })
435
+
436
+ # Add a checkbox column to the DataFrame at the beginning
437
+ df.insert(0, "Select", False)
438
+
439
+ # Configure grid options
440
+ gb = GridOptionsBuilder.from_dataframe(df)
441
+ gb.configure_default_column(enablePivot=True, enableValue=True, enableRowGroup=True)
442
+ gb.configure_column("Select", header_name="Select", cellRenderer='checkboxRenderer')
443
+ gb.configure_column("File Href", cellRenderer='linkRenderer')
444
+ gb.configure_selection(selection_mode="multiple", use_checkbox=True)
445
+ gb.configure_side_bar()
446
+ gridOptions = gb.build()
447
+
448
+ # Display the AgGrid
449
+ grid_response = AgGrid(
450
+ df,
451
+ gridOptions=gridOptions,
452
+ enable_enterprise_modules=True,
453
+ update_mode=GridUpdateMode.MODEL_CHANGED,
454
+ data_return_mode=DataReturnMode.FILTERED_AND_SORTED,
455
+ fit_columns_on_grid_load=False,
456
+ )
457
+
458
+ # Get the selected rows
459
+ selected_rows = grid_response['selected_rows']
460
+
461
+ # Debug: Print the structure of selected_rows
462
+ st.write("Debug - Selected Rows Structure:", selected_rows)
463
+
464
+ if st.button("Process Selected PDFs"):
465
+ if len(selected_rows) > 0: # Check if there are any selected rows
466
+ # Convert selected_rows to a DataFrame
467
+ st.session_state.selected_pdfs = pd.DataFrame(selected_rows)
468
+ st.session_state.assistant_id = create_assistant()
469
+ with st.spinner("Processing PDFs and creating/updating assistant..."):
470
+ file_ids = []
471
+
472
+ for _, pdf in st.session_state.selected_pdfs.iterrows():
473
+ # Debug: Print each pdf item
474
+ st.write("Debug - PDF item:", pdf)
475
+
476
+ file_href = pdf['File Href']
477
+ doc_title = pdf['Doc Title']
478
+
479
+ # Pass doc_title to download_pdf
480
+ file_name = download_pdf(file_href, doc_title)
481
+ if file_name:
482
+ file_path = f"./{file_name}"
483
+ file_id = upload_file_to_openai(file_path)
484
+ if file_id:
485
+ file_ids.append(file_id)
486
+ else:
487
+ st.warning(f"Failed to upload {doc_title}. Skipping this file.")
488
+ else:
489
+ st.warning(f"Failed to download {doc_title}. Skipping this file.")
490
+
491
+ st.session_state.file_ids = file_ids
492
+ st.success("PDFs processed successfully. You can now chat on the Home page.")
493
+ else:
494
+ st.warning("Select at least one PDF.")
495
+
496
+
497
+ elif page == "Admin":
498
+ st.title("Admin Panel")
499
+ st.header("Vector Stores Information")
500
+
501
+ vector_stores = get_vector_stores()
502
+ json_vector_stores = json.dumps([vs.model_dump() for vs in vector_stores])
503
+ st.write(json_vector_stores)
504
+
505
+ # Add a button to go back to the main page
506
+
507
+
508
+
509
+
510
+
511
+
512
+
513
+
514
+
515
+
516
+
517
+
518
+
519
+