ak0601 commited on
Commit
263997c
·
verified ·
1 Parent(s): 79bd4f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +258 -124
app.py CHANGED
@@ -1,157 +1,291 @@
1
- import os
2
- import streamlit as st
3
- import google.generativeai as genai
4
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
- from langchain_google_genai import ChatGoogleGenerativeAI
6
- from langchain_community.document_loaders import PyPDFLoader
7
- from langchain.text_splitter import RecursiveCharacterTextSplitter
8
- from langchain_community.vectorstores import Chroma
9
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
- from langchain_core.messages import HumanMessage, SystemMessage
11
- from langchain.chains import create_history_aware_retriever, create_retrieval_chain
12
- from langchain.chains.combine_documents import create_stuff_documents_chain
13
- from dotenv import load_dotenv
14
- from langchain.embeddings import HuggingFaceEmbeddings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- from sentence_transformers import SentenceTransformer
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- import pysqlite3
19
- import sys
20
- sys.modules['sqlite3'] = pysqlite3
21
 
22
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- # Retrieve Google API key
25
- GOOGLE_API_KEY = str(os.getenv('GOOGLE_API_KEY'))
26
- HF_TOKEN = str(os.getenv("HF_TOKEN"))
 
 
27
 
28
- if not GOOGLE_API_KEY:
29
- raise ValueError("Gemini API key not found. Please set it in the .env file.")
 
30
 
 
31
  os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
32
- os.environ["HF_TOKEN"] = HF_TOKEN
33
- # Streamlit app configuration
34
- st.set_page_config(page_title="English Chatbot", layout="centered")
35
- st.title("English Tutor Bot")
36
-
37
- # Initialize Google Generative AI LLM
38
- llm = ChatGoogleGenerativeAI(
39
- model="gemini-1.5-pro-latest",
40
- temperature=0.2,
41
- max_tokens=500,
42
- timeout=None,
43
- max_retries=2,
44
- )
45
 
46
  # Initialize embeddings using HuggingFace
47
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
48
 
49
  def load_preprocessed_vectorstore():
 
50
  try:
51
- loader = PyPDFLoader("sound.pdf")
52
  documents = loader.load()
53
-
54
  text_splitter = RecursiveCharacterTextSplitter(
55
  separators=["\n\n", "\n", ". ", " ", ""],
56
- chunk_size=500,
57
- chunk_overlap=150
58
- )
59
-
60
  document_chunks = text_splitter.split_documents(documents)
61
 
62
  vector_store = Chroma.from_documents(
63
  embedding=embeddings,
64
  documents=document_chunks,
65
- persist_directory="./data32"
66
  )
67
  return vector_store
68
  except Exception as e:
69
- st.error(f"Error creating vector store: {e}")
70
  return None
71
 
72
- def get_context_retriever_chain(vector_store):
73
- retriever = vector_store.as_retriever()
74
-
75
- prompt = ChatPromptTemplate.from_messages([
76
- MessagesPlaceholder(variable_name="chat_history"),
77
- ("human", "{input}"),
78
- ("system", """You are an expert english tutor, your task is to help users to learn english. Given the chat history and the latest user question, which might reference context in the chat history, Answer the question
79
- by taking reference from the document.
80
- If the question is directly addressed within the provided document, provide a relevant answer.
81
- If the question is not explicitly addressed in the document, return the following message:
82
- 'This question is beyond the scope of the available information. Please contact your mentor for further assistance.'
83
- Do NOT answer the question directly, just reformulate it if needed and otherwise return it as is.""")
84
- ])
85
-
86
- retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
87
- return retriever_chain
88
-
89
- def get_conversational_chain(retriever_chain):
90
- prompt = ChatPromptTemplate.from_messages([
91
- ("system", """Hello! I'm your English Tutor, I am here to help you with learning english and can also take quiz to test your skills.
92
- Note: I will only provide information that is available within our database to ensure accuracy. Let's get started!
93
- """
94
- "\n\n"
95
- "{context}"),
96
- MessagesPlaceholder(variable_name="chat_history"),
97
- ("human", "{input}")
98
- ])
99
-
100
- stuff_documents_chain = create_stuff_documents_chain(llm, prompt)
101
- return create_retrieval_chain(retriever_chain, stuff_documents_chain)
102
-
103
- def get_response(user_query):
104
- retriever_chain = get_context_retriever_chain(st.session_state.vector_store)
105
- conversation_rag_chain = get_conversational_chain(retriever_chain)
106
 
 
 
 
 
 
 
 
107
  formatted_chat_history = []
108
- for message in st.session_state.chat_history:
109
- if isinstance(message, HumanMessage):
110
- formatted_chat_history.append({"author": "user", "content": message.content})
111
- elif isinstance(message, SystemMessage):
112
- formatted_chat_history.append({"author": "assistant", "content": message.content})
113
-
114
- response = conversation_rag_chain.invoke({
115
  "chat_history": formatted_chat_history,
116
- "input": user_query
117
  })
118
-
 
 
 
119
  return response['answer']
120
 
121
- # Load the preprocessed vector store from the local directory
122
- st.session_state.vector_store = load_preprocessed_vectorstore()
123
-
124
- # Initialize chat history if not present
125
- if "chat_history" not in st.session_state:
126
- st.session_state.chat_history = [
127
- {"author": "assistant", "content": "Hello, I am a English Tutor Bot. How can I help you?"}
128
- ]
129
-
130
- # Main app logic
131
- if st.session_state.get("vector_store") is None:
132
- st.error("Failed to load preprocessed data. Please ensure the data exists in './data' directory.")
133
- else:
134
- # Display chat history
135
- with st.container():
136
- for message in st.session_state.chat_history:
137
- if message["author"] == "assistant":
138
- with st.chat_message("system"):
139
- st.write(message["content"])
140
- elif message["author"] == "user":
141
- with st.chat_message("human"):
142
- st.write(message["content"])
143
-
144
- # Add user input box below the chat
145
- with st.container():
146
- with st.form(key="chat_form", clear_on_submit=True):
147
- user_query = st.text_input("Type your message here...", key="user_input")
148
- submit_button = st.form_submit_button("Send")
149
-
150
- if submit_button and user_query:
151
- # Get bot response
152
- response = get_response(user_query)
153
- st.session_state.chat_history.append({"author": "user", "content": user_query})
154
- st.session_state.chat_history.append({"author": "assistant", "content": response})
155
-
156
- # Rerun the app to refresh the chat display
157
- st.rerun()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import os
2
+ # import streamlit as st
3
+ # import google.generativeai as genai
4
+ # from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ # from langchain_google_genai import ChatGoogleGenerativeAI
6
+ # from langchain_community.document_loaders import PyPDFLoader
7
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ # from langchain_community.vectorstores import Chroma
9
+ # from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
10
+ # from langchain_core.messages import HumanMessage, SystemMessage
11
+ # from langchain.chains import create_history_aware_retriever, create_retrieval_chain
12
+ # from langchain.chains.combine_documents import create_stuff_documents_chain
13
+ # from dotenv import load_dotenv
14
+ # from langchain.embeddings import HuggingFaceEmbeddings
15
+
16
+ # from sentence_transformers import SentenceTransformer
17
+
18
+ # import pysqlite3
19
+ # import sys
20
+ # sys.modules['sqlite3'] = pysqlite3
21
+
22
+ # import os
23
+
24
+ # # Retrieve Google API key
25
+ # GOOGLE_API_KEY = str(os.getenv('GOOGLE_API_KEY'))
26
+ # HF_TOKEN = str(os.getenv("HF_TOKEN"))
27
+
28
+ # if not GOOGLE_API_KEY:
29
+ # raise ValueError("Gemini API key not found. Please set it in the .env file.")
30
+
31
+ # os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
32
+ # os.environ["HF_TOKEN"] = HF_TOKEN
33
+ # # Streamlit app configuration
34
+ # st.set_page_config(page_title="English Chatbot", layout="centered")
35
+ # st.title("English Tutor Bot")
36
+
37
+ # # Initialize Google Generative AI LLM
38
+ # llm = ChatGoogleGenerativeAI(
39
+ # model="gemini-1.5-pro-latest",
40
+ # temperature=0.2,
41
+ # max_tokens=500,
42
+ # timeout=None,
43
+ # max_retries=2,
44
+ # )
45
+
46
+ # # Initialize embeddings using HuggingFace
47
+ # embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
48
+
49
+ # def load_preprocessed_vectorstore():
50
+ # try:
51
+ # loader = PyPDFLoader("sound.pdf")
52
+ # documents = loader.load()
53
+
54
+ # text_splitter = RecursiveCharacterTextSplitter(
55
+ # separators=["\n\n", "\n", ". ", " ", ""],
56
+ # chunk_size=500,
57
+ # chunk_overlap=150
58
+ # )
59
+
60
+ # document_chunks = text_splitter.split_documents(documents)
61
+
62
+ # vector_store = Chroma.from_documents(
63
+ # embedding=embeddings,
64
+ # documents=document_chunks,
65
+ # persist_directory="./data32"
66
+ # )
67
+ # return vector_store
68
+ # except Exception as e:
69
+ # st.error(f"Error creating vector store: {e}")
70
+ # return None
71
+
72
+ # def get_context_retriever_chain(vector_store):
73
+ # retriever = vector_store.as_retriever()
74
+
75
+ # prompt = ChatPromptTemplate.from_messages([
76
+ # MessagesPlaceholder(variable_name="chat_history"),
77
+ # ("human", "{input}"),
78
+ # ("system", """You are an expert english tutor, your task is to help users to learn english. Given the chat history and the latest user question, which might reference context in the chat history, Answer the question
79
+ # by taking reference from the document.
80
+ # If the question is directly addressed within the provided document, provide a relevant answer.
81
+ # If the question is not explicitly addressed in the document, return the following message:
82
+ # 'This question is beyond the scope of the available information. Please contact your mentor for further assistance.'
83
+ # Do NOT answer the question directly, just reformulate it if needed and otherwise return it as is.""")
84
+ # ])
85
+
86
+ # retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
87
+ # return retriever_chain
88
+
89
+ # def get_conversational_chain(retriever_chain):
90
+ # prompt = ChatPromptTemplate.from_messages([
91
+ # ("system", """Hello! I'm your English Tutor, I am here to help you with learning english and can also take quiz to test your skills.
92
+ # Note: I will only provide information that is available within our database to ensure accuracy. Let's get started!
93
+ # """
94
+ # "\n\n"
95
+ # "{context}"),
96
+ # MessagesPlaceholder(variable_name="chat_history"),
97
+ # ("human", "{input}")
98
+ # ])
99
+
100
+ # stuff_documents_chain = create_stuff_documents_chain(llm, prompt)
101
+ # return create_retrieval_chain(retriever_chain, stuff_documents_chain)
102
+
103
+ # def get_response(user_query):
104
+ # retriever_chain = get_context_retriever_chain(st.session_state.vector_store)
105
+ # conversation_rag_chain = get_conversational_chain(retriever_chain)
106
+
107
+ # formatted_chat_history = []
108
+ # for message in st.session_state.chat_history:
109
+ # if isinstance(message, HumanMessage):
110
+ # formatted_chat_history.append({"author": "user", "content": message.content})
111
+ # elif isinstance(message, SystemMessage):
112
+ # formatted_chat_history.append({"author": "assistant", "content": message.content})
113
+
114
+ # response = conversation_rag_chain.invoke({
115
+ # "chat_history": formatted_chat_history,
116
+ # "input": user_query
117
+ # })
118
+
119
+ # return response['answer']
120
+
121
+ # # Load the preprocessed vector store from the local directory
122
+ # st.session_state.vector_store = load_preprocessed_vectorstore()
123
+
124
+ # # Initialize chat history if not present
125
+ # if "chat_history" not in st.session_state:
126
+ # st.session_state.chat_history = [
127
+ # {"author": "assistant", "content": "Hello, I am a English Tutor Bot. How can I help you?"}
128
+ # ]
129
+
130
+ # # Main app logic
131
+ # if st.session_state.get("vector_store") is None:
132
+ # st.error("Failed to load preprocessed data. Please ensure the data exists in './data' directory.")
133
+ # else:
134
+ # # Display chat history
135
+ # with st.container():
136
+ # for message in st.session_state.chat_history:
137
+ # if message["author"] == "assistant":
138
+ # with st.chat_message("system"):
139
+ # st.write(message["content"])
140
+ # elif message["author"] == "user":
141
+ # with st.chat_message("human"):
142
+ # st.write(message["content"])
143
 
144
+ # # Add user input box below the chat
145
+ # with st.container():
146
+ # with st.form(key="chat_form", clear_on_submit=True):
147
+ # user_query = st.text_input("Type your message here...", key="user_input")
148
+ # submit_button = st.form_submit_button("Send")
149
+
150
+ # if submit_button and user_query:
151
+ # # Get bot response
152
+ # response = get_response(user_query)
153
+ # st.session_state.chat_history.append({"author": "user", "content": user_query})
154
+ # st.session_state.chat_history.append({"author": "assistant", "content": response})
155
+
156
+ # # Rerun the app to refresh the chat display
157
+ # st.rerun()
158
 
 
 
 
159
 
160
  import os
161
+ import logging
162
+ import pathlib
163
+ from telegram import Update
164
+ from telegram.ext import Updater, CommandHandler, MessageHandler, CallbackContext, Filters
165
+ from langchain.prompts import PromptTemplate
166
+ from langchain.memory import ConversationBufferMemory
167
+ from langchain.chains import LLMChain, create_retrieval_chain
168
+ from langchain_google_genai import GoogleGenerativeAI
169
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
170
+ from langchain_community.document_loaders import Docx2txtLoader
171
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
172
+ from langchain_community.vectorstores import Chroma
173
+ from langchain.embeddings import HuggingFaceEmbeddings
174
 
175
+ # Enable logging
176
+ logging.basicConfig(
177
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO
178
+ )
179
+ logger = logging.getLogger(__name__)
180
 
181
+ GOOGLE_API_KEY = "AIzaSyAytkzRS0Xp0pCyo6WqKJ4m1o330bF-gPk"
182
+ OPENAI_API_KEY = "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"
183
+ TOKEN = '8126949340:AAGmr4ByOLlYXtEQuleOsinS2w_wUogldj0'
184
 
185
+ # Set up OpenAI API key
186
  os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  # Initialize embeddings using HuggingFace
189
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
190
 
191
  def load_preprocessed_vectorstore():
192
+ """Load documents and create a vector store."""
193
  try:
194
+ loader = Docx2txtLoader("./Pre.docx")
195
  documents = loader.load()
196
+
197
  text_splitter = RecursiveCharacterTextSplitter(
198
  separators=["\n\n", "\n", ". ", " ", ""],
199
+ chunk_size=3000,
200
+ chunk_overlap=1000)
201
+
 
202
  document_chunks = text_splitter.split_documents(documents)
203
 
204
  vector_store = Chroma.from_documents(
205
  embedding=embeddings,
206
  documents=document_chunks,
207
+ persist_directory="./data"
208
  )
209
  return vector_store
210
  except Exception as e:
211
+ logger.error(f"Error creating vector store: {e}")
212
  return None
213
 
214
+ # Initialize the vector store
215
+ vector_store = load_preprocessed_vectorstore()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
+ # Define the Langchain chain with a retrieval mechanism
218
+ def get_response(user_message, context):
219
+ retriever_chain = vector_store.as_retriever()
220
+ prompt_template = PromptTemplate(input_variables=["chat_history", "input"], template="{chat_history}\nUser: {input}\nAssistant:")
221
+
222
+ conversation_chain = create_retrieval_chain(retriever_chain, prompt_template)
223
+
224
  formatted_chat_history = []
225
+ if 'chat_history' in context.user_data:
226
+ formatted_chat_history.extend(context.user_data['chat_history'])
227
+
228
+ response = conversation_chain.invoke({
 
 
 
229
  "chat_history": formatted_chat_history,
230
+ "input": user_message
231
  })
232
+
233
+ # Update chat history in user data
234
+ context.user_data['chat_history'] = formatted_chat_history + [{"author": "user", "content": user_message}, {"author": "assistant", "content": response['answer']}]
235
+
236
  return response['answer']
237
 
238
+ # Start the bot
239
+ def start(update: Update, context: CallbackContext) -> None:
240
+ """Send a message when the command /start is issued."""
241
+ user = update.effective_user
242
+ update.message.reply_text(f'Hi {user.first_name}! I\'m a bot powered by OpenAI. Ask me anything.')
243
+
244
+ # Help command
245
+ def help_command(update: Update, context: CallbackContext) -> None:
246
+ """Send a message when the command /help is issued."""
247
+ update.message.reply_text('Ask me any question, and I\'ll try to answer using my knowledge!')
248
+
249
+ # Handle messages
250
+ def handle_message(update: Update, context: CallbackContext) -> None:
251
+ """Handle user messages and generate responses using Langchain."""
252
+ user_message = update.message.text
253
+
254
+ try:
255
+ # Generate a response using Langchain and OpenAI
256
+ response = get_response(user_message, context)
257
+ update.message.reply_text(response)
258
+ except Exception as e:
259
+ update.message.reply_text("Sorry, I couldn't process your request at the moment.")
260
+ logger.error(f"Error: {e}")
261
+
262
+ # Error handler
263
+ def error_handler(update: Update, context: CallbackContext) -> None:
264
+ """Log Errors caused by Updates."""
265
+ logger.warning(f'Update "{update}" caused error "{context.error}"')
266
+
267
+ def main() -> None:
268
+ """Start the bot."""
269
+ updater = Updater(TOKEN)
270
+
271
+ # Get the dispatcher to register handlers
272
+ dispatcher = updater.dispatcher
273
+
274
+ # On different commands - answer in Telegram
275
+ dispatcher.add_handler(CommandHandler("start", start))
276
+ dispatcher.add_handler(CommandHandler("help", help_command))
277
+
278
+ # On non-command i.e. message - handle the message
279
+ dispatcher.add_handler(MessageHandler(Filters.text & ~Filters.command, handle_message))
280
+
281
+ # Log all errors
282
+ dispatcher.add_error_handler(error_handler)
283
+
284
+ # Start the Bot
285
+ updater.start_polling()
286
+
287
+ # Run the bot until you press Ctrl-C or the process receives SIGINT, SIGTERM, or SIGABRT
288
+ updater.idle()
289
+
290
+ if __name__ == '__main__':
291
+ main()