Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import streamlit_chat
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
import datetime
|
6 |
+
from pymongo import MongoClient
|
7 |
+
from bson import ObjectId
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
10 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
11 |
+
from langchain_core.prompts import ChatPromptTemplate
|
12 |
+
|
13 |
+
st.set_page_config(layout="wide", page_title="IOCL Chatbot", page_icon="📄")
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
# MongoDB connection setup
|
17 |
+
MONGO_URI = os.getenv("MONGO_URI")
|
18 |
+
client = MongoClient(MONGO_URI)
|
19 |
+
db = client["chatbot_db"]
|
20 |
+
chat_sessions = db["chat_sessions"]
|
21 |
+
|
22 |
+
# Set LLM models
|
23 |
+
FLASH_API = os.getenv("FLASH_API")
|
24 |
+
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=FLASH_API)
|
25 |
+
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0, max_tokens=None, google_api_key=FLASH_API)
|
26 |
+
|
27 |
+
# Load the extracted JSON data
|
28 |
+
with open('iocl_extracted_2.json', 'r') as file:
|
29 |
+
extracted_data = json.load(file)
|
30 |
+
reference_data = json.dumps(extracted_data)
|
31 |
+
|
32 |
+
# Initialize session state for current chat session
|
33 |
+
if 'current_chat_id' not in st.session_state:
|
34 |
+
st.session_state['current_chat_id'] = None
|
35 |
+
if 'chat_history' not in st.session_state:
|
36 |
+
st.session_state['chat_history'] = []
|
37 |
+
if 'regenerate' not in st.session_state:
|
38 |
+
st.session_state['regenerate'] = False # Track regenerate button state
|
39 |
+
|
40 |
+
|
41 |
+
# Function to create a new chat session in MongoDB
|
42 |
+
def create_new_chat_session():
|
43 |
+
new_session = {
|
44 |
+
"created_at": datetime.datetime.utcnow(),
|
45 |
+
"messages": [] # Empty at first
|
46 |
+
}
|
47 |
+
session_id = chat_sessions.insert_one(new_session).inserted_id
|
48 |
+
return str(session_id)
|
49 |
+
|
50 |
+
|
51 |
+
# Function to load a chat session by MongoDB ID
|
52 |
+
# Function to load the chat session by MongoDB ID (load full history for display)
|
53 |
+
def load_chat_session(session_id):
|
54 |
+
session = chat_sessions.find_one({"_id": ObjectId(session_id)})
|
55 |
+
if session:
|
56 |
+
# Load the full chat history (no slicing here)
|
57 |
+
st.session_state['chat_history'] = session['messages']
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
# Function to update chat session in MongoDB (store last 15 question-answer pairs)
|
62 |
+
# Function to update chat session in MongoDB (store entire chat history)
|
63 |
+
def update_chat_session(session_id, question, answer):
|
64 |
+
# Append the new question-answer pair to the full messages array
|
65 |
+
chat_sessions.update_one(
|
66 |
+
{"_id": ObjectId(session_id)},
|
67 |
+
{"$push": {"messages": {"$each": [{"question": question, "answer": answer}]}}}
|
68 |
+
)
|
69 |
+
|
70 |
+
|
71 |
+
# Function to replace the last response in MongoDB
|
72 |
+
def replace_last_response_in_mongo(session_id, new_answer):
|
73 |
+
last_message_index = len(st.session_state['chat_history']) - 1
|
74 |
+
if last_message_index >= 0:
|
75 |
+
# Replace the last response in MongoDB
|
76 |
+
chat_sessions.update_one(
|
77 |
+
{"_id": ObjectId(session_id)},
|
78 |
+
{"$set": {f"messages.{last_message_index}.answer": new_answer}}
|
79 |
+
)
|
80 |
+
|
81 |
+
# Function to regenerate the response
|
82 |
+
def regenerate_response():
|
83 |
+
if st.session_state['chat_history']:
|
84 |
+
last_question = st.session_state['chat_history'][-1]["question"] # Get the last question
|
85 |
+
# Exclude the last response from the history when sending the question to LLM
|
86 |
+
previous_history = st.session_state['chat_history'][:-1] # Exclude the last Q&A pair
|
87 |
+
|
88 |
+
with st.spinner("Please wait, regenerating the response!"):
|
89 |
+
# Generate a new response for the last question using only the previous history
|
90 |
+
new_reply = generate_summary(str(reference_data), last_question, previous_history)
|
91 |
+
|
92 |
+
# Replace the last response in the session state with the new reply
|
93 |
+
st.session_state['chat_history'][-1]["answer"] = new_reply
|
94 |
+
|
95 |
+
# Update MongoDB with the new response
|
96 |
+
if st.session_state['current_chat_id']:
|
97 |
+
replace_last_response_in_mongo(st.session_state['current_chat_id'], new_reply)
|
98 |
+
|
99 |
+
st.session_state['regenerate'] = False # Reset regenerate flag
|
100 |
+
st.rerun()
|
101 |
+
|
102 |
+
|
103 |
+
# Function to generate a detailed response based on the query and JSON data
|
104 |
+
|
105 |
+
|
106 |
+
# When generating a response, pass only the latest 15 messages to the LLM
|
107 |
+
def generate_summary(reference_data, query, chat_history):
|
108 |
+
try:
|
109 |
+
# Escape curly braces in the JSON data to avoid conflicts with prompt placeholders
|
110 |
+
escaped_reference_data = reference_data.replace("{", "{{").replace("}", "}}")
|
111 |
+
|
112 |
+
# Limit the history sent to the LLM to the latest 15 question-answer pairs
|
113 |
+
limited_history = chat_history[-15:] if len(chat_history) > 15 else chat_history
|
114 |
+
|
115 |
+
# Create conversation history for the LLM, only using the last 15 entries
|
116 |
+
history_text = "\n".join([f"User: {q['question']}\nLLM: {q['answer']}" for q in limited_history])
|
117 |
+
|
118 |
+
# Define the system and user prompts including the limited history
|
119 |
+
prompt = ChatPromptTemplate.from_messages([
|
120 |
+
("system", """You are a chatbot who specialises in answering questions related to Indan Oil Corporation Limitied(IOCL).
|
121 |
+
This is the extracted data from the Indian Oil Corporation Limited (IOCL) website. The extracted data contains detailed information about the company and its various operations. You will be provided with a query, and you must use this data to answer it comprehensively.
|
122 |
+
Additionally, the conversation history may contain relevant context or prior queries and responses. Use this history to ensure your answer is accurate and coherent, building on previous information if necessary.
|
123 |
+
|
124 |
+
Key Guidelines:
|
125 |
+
1.Accuracy is paramount: If the extracted data or conversation history does not contain the information required to answer the query, clearly state, "The answer is not available in the context." Do not attempt to provide a speculative or incorrect response.
|
126 |
+
2.Be detailed: Provide clear, concise, and thorough answers without omitting any relevant information from the extracted data.
|
127 |
+
3.Avoid quoting field names: When responding, avoid directly quoting or referencing field names or formats from the extracted data. Instead, present the information naturally, as if summarizing or interpreting the data. Try to give the answer in points.
|
128 |
+
4.Use the conversation history: When applicable, refer to earlier parts of the conversation to ensure consistency and accuracy in your response.
|
129 |
+
5.Sometime a query might be a followup question,without proper context in that case try using previous conversation history and try to utilise latest messages to answer it if possible.
|
130 |
+
6.Answer the queries in conversational style.
|
131 |
+
7.If any links or source PDFs links are available within the extracted data part which you are referring to answer the query, you must include these in your responses, suggesting that users can refer to them for more detailed information.
|
132 |
+
"""),
|
133 |
+
("human", f'''
|
134 |
+
Previous Conversation History: \n{history_text}\n
|
135 |
+
"Extracted Data": \n{escaped_reference_data}\n
|
136 |
+
"Query":\n {query}\n
|
137 |
+
'''
|
138 |
+
)
|
139 |
+
])
|
140 |
+
|
141 |
+
# Chain the prompt with LLM for response generation
|
142 |
+
chain = prompt | llm
|
143 |
+
result = chain.invoke({"Extracted Data": escaped_reference_data, "Query": query})
|
144 |
+
|
145 |
+
# Return the generated response
|
146 |
+
return result.content
|
147 |
+
|
148 |
+
except Exception as e:
|
149 |
+
st.error(f"Error answering your question: {e}")
|
150 |
+
return "Error answering question."
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
# Sidebar for showing chat sessions and creating new sessions
|
155 |
+
st.sidebar.header("Chat Sessions")
|
156 |
+
|
157 |
+
# Button for creating a new chat
|
158 |
+
if st.sidebar.button("New Chat"):
|
159 |
+
new_chat_id = create_new_chat_session()
|
160 |
+
st.session_state['current_chat_id'] = new_chat_id
|
161 |
+
st.session_state['chat_history'] = []
|
162 |
+
|
163 |
+
# List existing chat sessions with delete button (dustbin icon)
|
164 |
+
existing_sessions = chat_sessions.find().sort("created_at", -1)
|
165 |
+
for session in existing_sessions:
|
166 |
+
session_id = str(session['_id'])
|
167 |
+
session_date = session['created_at'].strftime("%Y-%m-%d %H:%M:%S")
|
168 |
+
col1, col2 = st.sidebar.columns([8, 1])
|
169 |
+
|
170 |
+
# Display session name
|
171 |
+
with col1:
|
172 |
+
if st.button(f"Session {session_date}", key=session_id):
|
173 |
+
st.session_state['current_chat_id'] = session_id
|
174 |
+
load_chat_session(session_id)
|
175 |
+
|
176 |
+
# Display delete icon (dustbin)
|
177 |
+
with col2:
|
178 |
+
if st.button("🗑️", key=f"delete_{session_id}"):
|
179 |
+
chat_sessions.delete_one({"_id": ObjectId(session_id)})
|
180 |
+
st.rerun() # Refresh the app to remove the deleted session from the sidebar
|
181 |
+
|
182 |
+
# Main chat interface
|
183 |
+
st.markdown('<div class="fixed-header"><h1>Welcome To IOCL Chatbot</h1></div>', unsafe_allow_html=True)
|
184 |
+
st.markdown("<hr>", unsafe_allow_html=True)
|
185 |
+
|
186 |
+
# Input box for the question
|
187 |
+
user_question = st.chat_input(f"Ask a Question related to IOCL Website")
|
188 |
+
|
189 |
+
if user_question:
|
190 |
+
# Automatically create a new session if none exists
|
191 |
+
if not st.session_state['current_chat_id']:
|
192 |
+
new_chat_id = create_new_chat_session()
|
193 |
+
st.session_state['current_chat_id'] = new_chat_id
|
194 |
+
|
195 |
+
with st.spinner("Please wait, I am thinking!!"):
|
196 |
+
# Store the user's question and get the assistant's response
|
197 |
+
reply = generate_summary(str(reference_data), user_question, st.session_state['chat_history'])
|
198 |
+
|
199 |
+
|
200 |
+
# Append the new question-answer pair to chat history
|
201 |
+
st.session_state['chat_history'].append({"question": user_question, "answer": reply})
|
202 |
+
|
203 |
+
# Update the current chat session in MongoDB
|
204 |
+
if st.session_state['current_chat_id']:
|
205 |
+
update_chat_session(st.session_state['current_chat_id'], user_question, reply)
|
206 |
+
|
207 |
+
# Display the updated chat history (show last 15 question-answer pairs)
|
208 |
+
for i, pair in enumerate(st.session_state['chat_history']):
|
209 |
+
question = pair["question"]
|
210 |
+
answer = pair["answer"]
|
211 |
+
streamlit_chat.message(question, is_user=True, key=f"chat_message_user_{i}")
|
212 |
+
streamlit_chat.message(answer, is_user=False, key=f"chat_message_assistant_{i}")
|
213 |
+
|
214 |
+
# Display regenerate button under the last response
|
215 |
+
if st.session_state['chat_history'] and not st.session_state['regenerate']:
|
216 |
+
if st.button("🔄 Regenerate", key="regenerate_button"):
|
217 |
+
st.session_state['regenerate'] = True
|
218 |
+
regenerate_response()
|