akshansh36 commited on
Commit
5de3b29
·
verified ·
1 Parent(s): d9ac64c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -56
app.py CHANGED
@@ -1,8 +1,7 @@
1
  import streamlit as st
2
  import streamlit_chat
3
- import json
4
  import os
5
- import datetime
6
  from pymongo import MongoClient
7
  from bson import ObjectId
8
  from dotenv import load_dotenv
@@ -11,6 +10,10 @@ from langchain_google_genai import GoogleGenerativeAIEmbeddings
11
  from langchain_google_genai import ChatGoogleGenerativeAI
12
  from langchain_core.prompts import ChatPromptTemplate
13
  import re
 
 
 
 
14
  st.set_page_config(layout="wide", page_title="IOCL Chatbot", page_icon="📄")
15
  load_dotenv()
16
  import logging
@@ -25,7 +28,7 @@ logging.basicConfig(
25
  pymongo_logger = logging.getLogger('pymongo')
26
  pymongo_logger.setLevel(logging.WARNING)
27
 
28
- PINECONE_API=os.getenv("PINECONE_API_KEY")
29
  pc = pinecone.Pinecone(
30
  api_key=PINECONE_API
31
  )
@@ -35,7 +38,7 @@ index = pc.Index(index_name)
35
  MONGO_URI = os.getenv("MONGO_URI")
36
  client = MongoClient(MONGO_URI)
37
  db = client["chatbot_db"]
38
- chat_sessions = db["chat_sessions3"]
39
 
40
  # Set LLM models
41
  FLASH_API = os.getenv("FLASH_API")
@@ -55,8 +58,13 @@ if 'regenerate' not in st.session_state:
55
 
56
  # Function to create a new chat session in MongoDB
57
  def create_new_chat_session():
 
 
 
 
 
58
  new_session = {
59
- "created_at": datetime.datetime.utcnow(),
60
  "messages": [] # Empty at first
61
  }
62
  session_id = chat_sessions.insert_one(new_session).inserted_id
@@ -74,11 +82,12 @@ def load_chat_session(session_id):
74
 
75
  # Function to update chat session in MongoDB (store last 15 question-answer pairs)
76
  # Function to update chat session in MongoDB (store entire chat history)
77
- def update_chat_session(session_id, question, answer,improved_question):
78
  # Append the new question-answer pair to the full messages array
79
  chat_sessions.update_one(
80
  {"_id": ObjectId(session_id)},
81
- {"$push": {"messages": {"$each": [{"question": question,'improved_question':improved_question,"answer": answer}]}}}
 
82
  )
83
 
84
 
@@ -104,7 +113,6 @@ def regenerate_response():
104
  with st.spinner("Please wait, regenerating the response!"):
105
  # Generate a new response for the last question using only the previous history
106
 
107
-
108
  query = get_context_from_messages(last_question, previous_history)
109
  if query:
110
  logging.info(f"Extracted query is :{query}\n")
@@ -115,17 +123,39 @@ def regenerate_response():
115
  query = last_question
116
 
117
  query_embedding = embeddings.embed_query(query)
118
- search_results = index.query(vector=query_embedding, top_k=5, include_metadata=True)
119
- matches = search_results['matches']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- content = ""
122
- for i, match in enumerate(matches):
123
- chunk = match['metadata']['chunk']
124
- url = match['metadata']['url']
125
- content += f"chunk{i}: {chunk}\n" + f"url{i}: {url}\n"
 
 
126
 
127
- new_reply= generate_summary(content, query, previous_history)
 
128
 
 
129
 
130
  st.session_state['chat_history'][-1]["answer"] = new_reply
131
 
@@ -140,33 +170,31 @@ def regenerate_response():
140
  st.error("Error occured in Regenerating response, please try again later.")
141
 
142
 
143
-
144
-
145
  def generate_summary(chunks, query, chat_history):
146
  try:
147
- # Limit the history sent to the LLM to the latest 3 question-answer pairs
148
- limited_history = chat_history[-3:] if len(chat_history) > 3 else chat_history
149
 
150
  # Create conversation history for the LLM, only using the last 15 entries
151
  history_text = "\n".join([f"User: {q['improved_question']}\nLLM: {q['answer']}" for q in limited_history])
152
 
153
  # Define the system and user prompts including the limited history
154
  prompt = ChatPromptTemplate.from_messages([
155
- ("system", """You are a chatbot specializing in answering queries related to Indian Oil Corporation Limited (IOCL). You will be provided with chunks of data from the IOCL website to answer user queries. Each chunk will include associated URLs, You must give the url of the chunks which you are using to answer the query.
156
  Key Guidelines:
157
 
158
- 1.If the user query is not clear, or you think multiple answers are possbile, you should ask for clarification with proper reasoning.
159
- 2.Do not mention chunk name in any of your replies.
160
  2.Detailed and Clear: Provide thorough, clear, and concise responses without omitting relevant information from the data chunks.
161
  3.Natural Summarization: When answering, you must not directly quote chunk names,formats. Instead, summarize or interpret the data naturally and conversationally.
162
  4.Use Conversation History: Refer back to the conversation history to maintain consistency and build on prior responses, if applicable.
163
- 5.Ignore Unanswered Queries: If the conversation history contains previous responses like "The answer is not available in the context," disregard them when formulating your current response.
164
  6.Graceful Handling of General Queries: If a user sends greetings, introduction, salutations, or unrelated questions, respond appropriately and conversationally.
165
  7.Include Source URLs: Always include the URLs from the relevant chunks of data that you're using to answer the query.
166
  8.Thoroughly looks for answer to the query in the provided chunks before replying, if you feel the query is irrelevant or answer is not present then you can ask user to clarify or tell that it cannot be answered.
167
- 9.Sometimes chunks might contain very less data still use it if its relevant.
168
 
169
- """),
 
170
 
171
  ("human", f'''
172
  "Query":\n {query}\n
@@ -191,12 +219,13 @@ def generate_summary(chunks, query, chat_history):
191
  st.error(f"Error answering your question: {e}")
192
  return None
193
 
 
194
  def get_context_from_messages(query, chat_history):
195
  try:
196
 
197
  logging.info(f"Getting context from original query: {query}")
198
 
199
- # Limit the history sent to the LLM to the latest 3 question-answer pairs
200
  limited_history = chat_history[-3:] if len(chat_history) > 3 else chat_history
201
 
202
  # Create conversation history for the LLM, only using the last 15 entries
@@ -204,11 +233,14 @@ def get_context_from_messages(query, chat_history):
204
 
205
  # Define the system and user prompts including the limited history
206
  prompt = ChatPromptTemplate.from_messages([
207
- ("system", """"I will provide you with a user query and up to the last 3 messages from the chat history which includes both questions and answers.Your task is to understand the user query nicely and restructure it if required such that it makes complete sense and is completely self contained.
208
  The provided queries are related to Indian Oil Corporation limited (IOCL).
209
  1. If the query is a follow-up, use the provided chat history to reconstruct a well-defined, contextually complete query that can stand alone."
210
- 2. if the query is self contained, if applicable try to improve it to make is coherent.
211
  3. if the user query is salutations, greetings or not relevant in that case give the query back as it is.
 
 
 
212
  I have provided an output format below, stricly follow it. Do not give anything else other than just the output.
213
  expected_output_format: "query: String or None"
214
  """),
@@ -253,10 +285,13 @@ if st.sidebar.button("New Chat"):
253
  existing_sessions = chat_sessions.find().sort("created_at", -1)
254
  for session in existing_sessions:
255
  session_id = str(session['_id'])
256
- session_date = session['created_at'].strftime("%Y-%m-%d %H:%M:%S")
257
- col1, col2 = st.sidebar.columns([8, 1])
258
 
259
- # Display session name
 
 
 
 
 
260
  with col1:
261
  if st.button(f"Session {session_date}", key=session_id):
262
  st.session_state['current_chat_id'] = session_id
@@ -283,41 +318,66 @@ if user_question:
283
 
284
  with st.spinner("Please wait, I am thinking!!"):
285
  # Store the user's question and get the assistant's response
286
- query=get_context_from_messages(user_question,st.session_state['chat_history'])
287
  if query:
288
  logging.info(f"Extracted query is :{query}\n")
289
- extracted_query=get_query_from_llm_answer(query)
290
  if extracted_query:
291
- query=extracted_query
292
  else:
293
- query=user_question
294
-
295
- query_embedding=embeddings.embed_query(query)
296
- search_results = index.query(vector=query_embedding, top_k=15, include_metadata=True)
297
- matches=search_results['matches']
298
-
299
- content=""
300
- for i,match in enumerate(matches):
301
- chunk=match['metadata']['chunk']
302
- url=match['metadata']['url']
303
- content += f"chunk{i}: {chunk}\n" + f"url{i}: {url}\n"
304
-
305
-
306
- print(f"content being passed is {content}")
307
- reply = generate_summary(content, query, st.session_state['chat_history'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
 
309
  if reply:
310
  # Append the new question-answer pair to chat history
311
- st.session_state['chat_history'].append({"question": user_question, "answer": reply,"improved_question":query})
 
 
312
 
313
  # Update the current chat session in MongoDB
314
  if st.session_state['current_chat_id']:
315
- update_chat_session(st.session_state['current_chat_id'], user_question, reply,query)
316
-
317
  else:
318
- st.error("Error processing your request, Please try again later.")
319
- else:
320
- st.error("Error processing your request, Please try again later.")
321
  # Display the updated chat history (show last 15 question-answer pairs)
322
  for i, pair in enumerate(st.session_state['chat_history']):
323
  question = pair["question"]
 
1
  import streamlit as st
2
  import streamlit_chat
 
3
  import os
4
+ from datetime import datetime
5
  from pymongo import MongoClient
6
  from bson import ObjectId
7
  from dotenv import load_dotenv
 
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
  from langchain_core.prompts import ChatPromptTemplate
12
  import re
13
+ from pytz import timezone
14
+ from pytz import timezone, utc
15
+ from datetime import datetime
16
+
17
  st.set_page_config(layout="wide", page_title="IOCL Chatbot", page_icon="📄")
18
  load_dotenv()
19
  import logging
 
28
  pymongo_logger = logging.getLogger('pymongo')
29
  pymongo_logger.setLevel(logging.WARNING)
30
 
31
+ PINECONE_API = os.getenv("PINECONE_API_KEY")
32
  pc = pinecone.Pinecone(
33
  api_key=PINECONE_API
34
  )
 
38
  MONGO_URI = os.getenv("MONGO_URI")
39
  client = MongoClient(MONGO_URI)
40
  db = client["chatbot_db"]
41
+ chat_sessions = db["chat_sessions2"]
42
 
43
  # Set LLM models
44
  FLASH_API = os.getenv("FLASH_API")
 
58
 
59
  # Function to create a new chat session in MongoDB
60
  def create_new_chat_session():
61
+ # Get the current time in IST
62
+ ind_time = datetime.now(timezone("Asia/Kolkata"))
63
+ # Convert IST time to UTC for storing in MongoDB
64
+ utc_time = ind_time.astimezone(utc)
65
+
66
  new_session = {
67
+ "created_at": utc_time, # Store in UTC
68
  "messages": [] # Empty at first
69
  }
70
  session_id = chat_sessions.insert_one(new_session).inserted_id
 
82
 
83
  # Function to update chat session in MongoDB (store last 15 question-answer pairs)
84
  # Function to update chat session in MongoDB (store entire chat history)
85
+ def update_chat_session(session_id, question, answer, improved_question):
86
  # Append the new question-answer pair to the full messages array
87
  chat_sessions.update_one(
88
  {"_id": ObjectId(session_id)},
89
+ {"$push": {
90
+ "messages": {"$each": [{"question": question, 'improved_question': improved_question, "answer": answer}]}}}
91
  )
92
 
93
 
 
113
  with st.spinner("Please wait, regenerating the response!"):
114
  # Generate a new response for the last question using only the previous history
115
 
 
116
  query = get_context_from_messages(last_question, previous_history)
117
  if query:
118
  logging.info(f"Extracted query is :{query}\n")
 
123
  query = last_question
124
 
125
  query_embedding = embeddings.embed_query(query)
126
+ # Step 1: Fetch top 5 chunks based on similarity
127
+ search_results = index.query(vector=query_embedding, top_k=10, include_metadata=True)
128
+ matches = search_results.get('matches', [])
129
+
130
+ # Step 2: Use the tag from the first chunk (if available)
131
+ first_chunk_tag = matches[0]['metadata'].get('tag', None) if matches else None
132
+
133
+ # Decision logic based on the first chunk's tag
134
+ if first_chunk_tag:
135
+ # Use the first chunk's tag to filter and retrieve all relevant chunks
136
+ all_chunks_response = index.query(
137
+ vector=query_embedding,
138
+ filter={"tag": first_chunk_tag},
139
+ top_k=15, # Adjust top_k as needed
140
+ include_metadata=True
141
+ )
142
+ all_chunks = all_chunks_response.get('matches', [])
143
+ else:
144
+ # If the first chunk has no tag, pass the top 5 chunks as they are
145
+ all_chunks = matches
146
 
147
+ # Check if `all_chunks` has any content before iterating
148
+ if all_chunks:
149
+ content = ""
150
+ for i, match in enumerate(all_chunks):
151
+ chunk = match['metadata']['chunk']
152
+ url = match['metadata']['url']
153
+ content += f"chunk{i}: {chunk}\n" + f"Reference url{i}: {url}\n"
154
 
155
+ else:
156
+ st.warning("No relevant chunks found for this query. Please try a different question.")
157
 
158
+ new_reply = generate_summary(content, query, previous_history)
159
 
160
  st.session_state['chat_history'][-1]["answer"] = new_reply
161
 
 
170
  st.error("Error occured in Regenerating response, please try again later.")
171
 
172
 
173
+ # When generating a response, pass only the latest 15 messages to the LLM
 
174
  def generate_summary(chunks, query, chat_history):
175
  try:
176
+ # Limit the history sent to the LLM to the latest 15 question-answer pairs
177
+ limited_history = chat_history[-5:] if len(chat_history) > 5 else chat_history
178
 
179
  # Create conversation history for the LLM, only using the last 15 entries
180
  history_text = "\n".join([f"User: {q['improved_question']}\nLLM: {q['answer']}" for q in limited_history])
181
 
182
  # Define the system and user prompts including the limited history
183
  prompt = ChatPromptTemplate.from_messages([
184
+ ("system", """You are a chatbot specializing in answering queries related to Indian Oil Corporation Limited (IOCL). You will be provided with chunks of data from the IOCL website to answer user queries. Each chunk will include associated URLs, which you must reference in your responses to show the source of the information.
185
  Key Guidelines:
186
 
187
+ 1.If the user query is not clear, or you think multiple answers are possbile, you can ask for clarification with proper reasoning. Do not mention chunk name in any of your replies.
 
188
  2.Detailed and Clear: Provide thorough, clear, and concise responses without omitting relevant information from the data chunks.
189
  3.Natural Summarization: When answering, you must not directly quote chunk names,formats. Instead, summarize or interpret the data naturally and conversationally.
190
  4.Use Conversation History: Refer back to the conversation history to maintain consistency and build on prior responses, if applicable.
191
+ 5.Ignore Unanswered Queries: If the conversation history contains previous responses like cannot answer, information not present disregard them when formulating your current response.
192
  6.Graceful Handling of General Queries: If a user sends greetings, introduction, salutations, or unrelated questions, respond appropriately and conversationally.
193
  7.Include Source URLs: Always include the URLs from the relevant chunks of data that you're using to answer the query.
194
  8.Thoroughly looks for answer to the query in the provided chunks before replying, if you feel the query is irrelevant or answer is not present then you can ask user to clarify or tell that it cannot be answered.
 
195
 
196
+
197
+ """),
198
 
199
  ("human", f'''
200
  "Query":\n {query}\n
 
219
  st.error(f"Error answering your question: {e}")
220
  return None
221
 
222
+
223
  def get_context_from_messages(query, chat_history):
224
  try:
225
 
226
  logging.info(f"Getting context from original query: {query}")
227
 
228
+ # Limit the history sent to the LLM to the latest 15 question-answer pairs
229
  limited_history = chat_history[-3:] if len(chat_history) > 3 else chat_history
230
 
231
  # Create conversation history for the LLM, only using the last 15 entries
 
233
 
234
  # Define the system and user prompts including the limited history
235
  prompt = ChatPromptTemplate.from_messages([
236
+ ("system", """"I will provide you with a user query and up to the last 3 messages from the chat history, including both questions and answers.Your task is to reformulate the user query nicely which makes complete sense and is completely self contained.
237
  The provided queries are related to Indian Oil Corporation limited (IOCL).
238
  1. If the query is a follow-up, use the provided chat history to reconstruct a well-defined, contextually complete query that can stand alone."
239
+ 2. if the query is self contained, if applicable try to improve it to make it coherent.
240
  3. if the user query is salutations, greetings or not relevant in that case give the query back as it is.
241
+ 4. Even if the user query is just a one word query, you must formulate a well defined query.
242
+ 5. Its not necessary to always use the chat history, sometimes a query will be self sufficient all it would need is improvement in language
243
+
244
  I have provided an output format below, stricly follow it. Do not give anything else other than just the output.
245
  expected_output_format: "query: String or None"
246
  """),
 
285
  existing_sessions = chat_sessions.find().sort("created_at", -1)
286
  for session in existing_sessions:
287
  session_id = str(session['_id'])
 
 
288
 
289
+ # Retrieve stored UTC time and convert it to IST for display
290
+ utc_time = session['created_at']
291
+ ist_time = utc_time.replace(tzinfo=utc).astimezone(timezone("Asia/Kolkata"))
292
+ session_date = ist_time.strftime("%Y-%m-%d %H:%M:%S") # Format for display
293
+
294
+ col1, col2 = st.sidebar.columns([8, 1])
295
  with col1:
296
  if st.button(f"Session {session_date}", key=session_id):
297
  st.session_state['current_chat_id'] = session_id
 
318
 
319
  with st.spinner("Please wait, I am thinking!!"):
320
  # Store the user's question and get the assistant's response
321
+ query = get_context_from_messages(user_question, st.session_state['chat_history'])
322
  if query:
323
  logging.info(f"Extracted query is :{query}\n")
324
+ extracted_query = get_query_from_llm_answer(query)
325
  if extracted_query:
326
+ query = extracted_query
327
  else:
328
+ query = user_question
329
+
330
+ query_embedding = embeddings.embed_query(query)
331
+ # Retrieve top 10 chunks based on similarity
332
+ search_results = index.query(vector=query_embedding, top_k=10, include_metadata=True)
333
+ matches = search_results.get('matches', [])
334
+
335
+ # Use the tag from the first chunk if available
336
+ first_chunk_tag = matches[0]['metadata'].get('tag', None) if matches else None
337
+
338
+ # Decision logic based on the first chunk's tag
339
+ if first_chunk_tag:
340
+ # Use the first chunk's tag to filter and retrieve all relevant chunks
341
+ all_chunks_response = index.query(
342
+ vector=query_embedding,
343
+ filter={"tag": first_chunk_tag},
344
+ top_k=15, # Adjust as needed
345
+ include_metadata=True
346
+ )
347
+ all_chunks = all_chunks_response.get('matches', [])
348
+ else:
349
+ # If the first chunk has no tag, pass the top 10 chunks as they are
350
+ all_chunks = matches
351
+
352
+ # Check if `all_chunks` has any content before iterating
353
+ if all_chunks:
354
+ # Prepare content for the LLM using metadata from each chunk
355
+ content = ""
356
+ for i, match in enumerate(all_chunks):
357
+ chunk = match['metadata'].get('chunk', '')
358
+ url = match['metadata'].get('url', '')
359
+ content += f"chunk{i}: {chunk}\nReference url{i}: {url}\n"
360
+
361
+ # Generate the summary using the LLM
362
+ print(f"content being sent is {content}")
363
+ reply = generate_summary(content, query, st.session_state['chat_history'])
364
+ else:
365
+ st.warning("No relevant chunks found for this query. Please try a different question.")
366
+ reply = None # Ensure `reply` is defined even if `all_chunks` is empty
367
 
368
+ # Display or handle the reply if generated successfully
369
  if reply:
370
  # Append the new question-answer pair to chat history
371
+ st.session_state['chat_history'].append(
372
+ {"question": user_question, "answer": reply, "improved_question": query}
373
+ )
374
 
375
  # Update the current chat session in MongoDB
376
  if st.session_state['current_chat_id']:
377
+ update_chat_session(st.session_state['current_chat_id'], user_question, reply, query)
 
378
  else:
379
+ st.error("Error processing your request. Please try again later.")
380
+
 
381
  # Display the updated chat history (show last 15 question-answer pairs)
382
  for i, pair in enumerate(st.session_state['chat_history']):
383
  question = pair["question"]