ishworrsubedii commited on
Commit
3fa5f95
·
1 Parent(s): ba4a6fd

username --- changed to -- user_id

Browse files
Files changed (2) hide show
  1. app.py +32 -22
  2. functions.py +63 -55
app.py CHANGED
@@ -20,15 +20,16 @@ app.add_middleware(
20
  allow_headers=["*"],
21
  )
22
 
23
- app.include_router(speech_translator_router, prefix="/speech")
24
 
25
 
26
  @app.post("/signup")
27
- async def sign_up(email, password):
28
  try:
29
  res, _ = supabase.auth.sign_up(
30
  {"email": email, "password": password, "role": "user"}
31
  )
 
32
  response = {
33
  "status": "success",
34
  "code": 200,
@@ -56,6 +57,8 @@ async def sign_in(email, password):
56
  user_id = res.user.id
57
  access_token = res.session.access_token
58
  refresh_token = res.session.refresh_token
 
 
59
  store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
60
  try:
61
  store_id = store_session_check[1][0]["StoreID"]
@@ -113,10 +116,17 @@ async def set_session_data(access_token, refresh_token):
113
 
114
 
115
  @app.post("/logout")
116
- async def sign_out():
117
- res = supabase.auth.sign_out()
 
 
 
 
 
118
 
119
- return res
 
 
120
 
121
 
122
  @app.post("/oauth")
@@ -129,13 +139,13 @@ async def oauth(provider):
129
  @app.post("/newChatbot")
130
  async def newChatbot(chatbotName: str, username: str):
131
  currentBotCount = len(listTables(username=username)["output"])
132
- limit = client.table("ConversAI_UserConfig").select("chatbotLimit").eq("username", username).execute().data[0][
133
  "chatbotLimit"]
134
  if currentBotCount >= int(limit):
135
  return {
136
  "output": "CHATBOT LIMIT EXCEEDED"
137
  }
138
- client.table("ConversAI_ChatbotInfo").insert({"username": username, "chatbotname": chatbotName}).execute()
139
  chatbotName = f"convai-{username}-{chatbotName}"
140
  return createTable(tablename=chatbotName)
141
 
@@ -149,12 +159,12 @@ async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
149
  text += page.extract_text()
150
  username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
151
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
152
- currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
153
- limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("username", username).execute().data[0][
154
  "tokenLimit"]
155
  newCount = currentCount + len(text)
156
  if newCount < int(limit):
157
- client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("username", username).eq(
158
  "chatbotname", chatbotname).execute()
159
  return addDocuments(text=text, vectorstore=vectorstore)
160
  else:
@@ -174,12 +184,12 @@ async def returnText(pdf: UploadFile = File(...)):
174
  async def addText(vectorstore: str, text: str):
175
  username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
176
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
177
- currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
178
  newCount = currentCount + len(text)
179
- limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("username", username).execute().data[0][
180
  "tokenLimit"]
181
  if newCount < int(limit):
182
- client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("username", username).eq(
183
  "chatbotname", chatbotname).execute()
184
  return addDocuments(text=text, vectorstore=vectorstore)
185
  else:
@@ -198,13 +208,13 @@ class AddQAPair(BaseModel):
198
  async def addText(addQaPair: AddQAPair):
199
  username, chatbotname = addQaPair.vectorstore.split("-")[1], addQaPair.vectorstore.split("-")[2]
200
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
201
- currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
202
  qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}"
203
  newCount = currentCount + len(qa)
204
- limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("username", username).execute().data[0][
205
  "tokenLimit"]
206
  if newCount < int(limit):
207
- client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("username", username).eq(
208
  "chatbotname", chatbotname).execute()
209
  return addDocuments(text=qa, vectorstore=addQaPair.vectorstore)
210
  else:
@@ -222,12 +232,12 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
222
  [f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
223
  username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
224
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
225
- currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
226
  newCount = currentCount + len(text)
227
- limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("username", username).execute().data[0][
228
  "tokenLimit"]
229
  if newCount < int(limit):
230
- client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("username", username).eq(
231
  "chatbotname", chatbotname).execute()
232
  return addDocuments(text=text, vectorstore=vectorstore)
233
  else:
@@ -244,7 +254,7 @@ async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-7
244
  @app.post("/deleteChatbot")
245
  async def delete(chatbotName: str):
246
  username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
247
- client.table('ConversAI_ChatbotInfo').delete().eq('username', username).eq('chatbotname', chatbotName).execute()
248
  return deleteTable(tableName=chatbotName)
249
 
250
 
@@ -265,7 +275,7 @@ async def getCount(vectorstore: str):
265
  username, chatbotName = vectorstore.split("-")[1], vectorstore.split("-")[2]
266
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
267
  return {
268
- "currentCount": df[(df['username'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
269
  }
270
 
271
 
@@ -294,4 +304,4 @@ async def analyzeAndAnswer(query: str, file: UploadFile = File(...)):
294
  except:
295
  return {
296
  "output": "UNABLE TO ANSWER QUERY"
297
- }
 
20
  allow_headers=["*"],
21
  )
22
 
23
+ # app.include_router(speech_translator_router, prefix="/speech")
24
 
25
 
26
  @app.post("/signup")
27
+ async def sign_up(email, username, password):
28
  try:
29
  res, _ = supabase.auth.sign_up(
30
  {"email": email, "password": password, "role": "user"}
31
  )
32
+ createUser(username=username)
33
  response = {
34
  "status": "success",
35
  "code": 200,
 
57
  user_id = res.user.id
58
  access_token = res.session.access_token
59
  refresh_token = res.session.refresh_token
60
+ createUser(username=user_id)
61
+
62
  store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
63
  try:
64
  store_id = store_session_check[1][0]["StoreID"]
 
116
 
117
 
118
  @app.post("/logout")
119
+ async def sign_out(store_id):
120
+ try:
121
+ supabase.table("Stores").delete().eq(
122
+ "StoreID", store_id
123
+ ).execute()
124
+ res = supabase.auth.sign_out()
125
+ response = {"message": "success"}
126
 
127
+ return response
128
+ except Exception as e:
129
+ raise HTTPException(status_code=400, detail=str(e))
130
 
131
 
132
  @app.post("/oauth")
 
139
  @app.post("/newChatbot")
140
  async def newChatbot(chatbotName: str, username: str):
141
  currentBotCount = len(listTables(username=username)["output"])
142
+ limit = client.table("ConversAI_UserConfig").select("chatbotLimit").eq("user_id", username).execute().data[0][
143
  "chatbotLimit"]
144
  if currentBotCount >= int(limit):
145
  return {
146
  "output": "CHATBOT LIMIT EXCEEDED"
147
  }
148
+ client.table("ConversAI_ChatbotInfo").insert({"user_id": username, "chatbotname": chatbotName}).execute()
149
  chatbotName = f"convai-{username}-{chatbotName}"
150
  return createTable(tablename=chatbotName)
151
 
 
159
  text += page.extract_text()
160
  username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
161
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
162
+ currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
163
+ limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
164
  "tokenLimit"]
165
  newCount = currentCount + len(text)
166
  if newCount < int(limit):
167
+ client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
168
  "chatbotname", chatbotname).execute()
169
  return addDocuments(text=text, vectorstore=vectorstore)
170
  else:
 
184
  async def addText(vectorstore: str, text: str):
185
  username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
186
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
187
+ currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
188
  newCount = currentCount + len(text)
189
+ limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
190
  "tokenLimit"]
191
  if newCount < int(limit):
192
+ client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
193
  "chatbotname", chatbotname).execute()
194
  return addDocuments(text=text, vectorstore=vectorstore)
195
  else:
 
208
  async def addText(addQaPair: AddQAPair):
209
  username, chatbotname = addQaPair.vectorstore.split("-")[1], addQaPair.vectorstore.split("-")[2]
210
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
211
+ currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
212
  qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}"
213
  newCount = currentCount + len(qa)
214
+ limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
215
  "tokenLimit"]
216
  if newCount < int(limit):
217
+ client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
218
  "chatbotname", chatbotname).execute()
219
  return addDocuments(text=qa, vectorstore=addQaPair.vectorstore)
220
  else:
 
232
  [f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
233
  username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
234
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
235
+ currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
236
  newCount = currentCount + len(text)
237
+ limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
238
  "tokenLimit"]
239
  if newCount < int(limit):
240
+ client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
241
  "chatbotname", chatbotname).execute()
242
  return addDocuments(text=text, vectorstore=vectorstore)
243
  else:
 
254
  @app.post("/deleteChatbot")
255
  async def delete(chatbotName: str):
256
  username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
257
+ client.table('ConversAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname', chatbotName).execute()
258
  return deleteTable(tableName=chatbotName)
259
 
260
 
 
275
  username, chatbotName = vectorstore.split("-")[1], vectorstore.split("-")[2]
276
  df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
277
  return {
278
+ "currentCount": df[(df['user_id'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
279
  }
280
 
281
 
 
304
  except:
305
  return {
306
  "output": "UNABLE TO ANSWER QUERY"
307
+ }
functions.py CHANGED
@@ -32,19 +32,18 @@ import base64
32
  import time
33
  import requests
34
 
35
-
36
  load_dotenv("secrets.env")
37
  client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
38
  qdrantClient = QdrantClient(url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"])
39
  model_kwargs = {"device": "cuda"}
40
  encode_kwargs = {"normalize_embeddings": True}
41
  vectorEmbeddings = HuggingFaceEmbeddings(
42
- model_name = "BAAI/bge-m3",
43
- model_kwargs = model_kwargs,
44
- encode_kwargs = encode_kwargs
45
  )
46
- reader = easyocr.Reader(['en'], gpu = True, model_storage_directory = "/app/EasyOCRModels")
47
- sparseEmbeddings = FastEmbedSparse(model = "Qdrant/BM25")
48
  prompt = """
49
  INSTRUCTIONS:
50
  =====================================
@@ -81,46 +80,48 @@ store = InMemoryStore()
81
  chatHistoryStore = dict()
82
 
83
 
84
- def createUser(username: str, password: str) -> None:
85
  try:
86
  userData = client.table("ConversAI_UserInfo").select("*").execute().data
87
- if username not in [userData[x]["username"] for x in range(len(userData))]:
88
- client.table("ConversAI_UserInfo").insert({"username": username, "password": password}).execute()
89
- client.table("ConversAI_UserConfig").insert({"username": username}).execute()
90
  return {
91
  "output": "SUCCESS"
92
  }
93
- else:
94
  return {
95
  "output": "USER ALREADY EXISTS"
96
  }
97
  except Exception as e:
98
  return {
99
  "error": e
100
- }
101
 
102
 
103
- def matchPassword(username: str, password: str) -> str:
104
- response = (
105
- client.table("ConversAI_UserInfo")
106
- .select("*")
107
- .eq("username", username)
108
- .execute()
109
- )
110
- try: return {
111
- "output": password == response.data[0]["password"]
112
- }
113
- except: return {
114
- "output": "USER DOESN'T EXIST"
115
- }
 
 
116
 
117
 
118
  def createTable(tablename: str):
119
  global vectorEmbeddings
120
  global sparseEmbeddings
121
  qdrant = QdrantVectorStore.from_documents(
122
- documents = [],
123
- embedding = vectorEmbeddings,
124
  sparse_embedding=sparseEmbeddings,
125
  url=os.environ["QDRANT_URL"],
126
  prefer_grpc=True,
@@ -132,21 +133,22 @@ def createTable(tablename: str):
132
  "output": "SUCCESS"
133
  }
134
 
 
135
  def addDocuments(text: str, vectorstore: str):
136
  global vectorEmbeddings
137
  global sparseEmbeddings
138
  global store
139
  parentSplitter = RecursiveCharacterTextSplitter(
140
- chunk_size = 2100,
141
- add_start_index = True
142
  )
143
  childSplitter = RecursiveCharacterTextSplitter(
144
- chunk_size = 300,
145
- add_start_index = True
146
  )
147
- texts = [Document(page_content = text)]
148
  vectorstore = QdrantVectorStore.from_existing_collection(
149
- embedding = vectorEmbeddings,
150
  sparse_embedding=sparseEmbeddings,
151
  collection_name=vectorstore,
152
  url=os.environ["QDRANT_URL"],
@@ -159,7 +161,7 @@ def addDocuments(text: str, vectorstore: str):
159
  child_splitter=childSplitter,
160
  parent_splitter=parentSplitter
161
  )
162
- retriever.add_documents(documents = texts)
163
  return {
164
  "output": "SUCCESS"
165
  }
@@ -169,7 +171,8 @@ def format_docs(docs: str):
169
  context = "\n\n".join(doc.page_content for doc in docs)
170
  if context == "":
171
  context = "No context found"
172
- else: pass
 
173
  return context
174
 
175
 
@@ -186,19 +189,19 @@ def trimMessages(chain_input):
186
  pass
187
  else:
188
  chatHistoryStore[storeName].clear()
189
- for message in messages[-1: ]:
190
  chatHistoryStore[storeName].add_message(message)
191
  return True
192
 
193
 
194
  def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192") -> str:
195
- global prompt
196
  global client
197
  global vectorEmbeddings
198
  global sparseEmbeddings
199
  vectorStoreName = vectorstore
200
  vectorstore = QdrantVectorStore.from_existing_collection(
201
- embedding = vectorEmbeddings,
202
  sparse_embedding=sparseEmbeddings,
203
  collection_name=vectorstore,
204
  url=os.environ["QDRANT_URL"],
@@ -216,25 +219,25 @@ def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192")
216
  base_compressor=compressor, base_retriever=retriever
217
  )
218
  baseChain = (
219
- {"context": RunnableLambda(lambda x: x["question"]) | retriever | RunnableLambda(format_docs), "question": RunnablePassthrough(), "chatHistory": RunnablePassthrough()}
220
- | prompt
221
- | ChatGroq(model = llmModel, temperature = 0.75, max_tokens = 512)
222
- | StrOutputParser()
223
- )
 
224
  messageChain = RunnableWithMessageHistory(
225
  baseChain,
226
  get_session_history,
227
- input_messages_key = "question",
228
- history_messages_key = "chatHistory"
229
  )
230
- chain = RunnablePassthrough.assign(messages_trimmed = trimMessages) | messageChain
231
  return {
232
  "output": chain.invoke(
233
  {"question": query},
234
  {"configurable": {"session_id": vectorStoreName}}
235
  )
236
  }
237
-
238
 
239
 
240
  def deleteTable(tableName: str):
@@ -249,21 +252,24 @@ def deleteTable(tableName: str):
249
  "error": e
250
  }
251
 
 
252
  def listTables(username: str):
253
  try:
254
  global qdrantClient
255
  qdrantCollections = qdrantClient.get_collections()
256
  return {
257
- "output": list(filter(lambda x: True if x.split("-")[1] == username else False, [x.name for x in qdrantCollections.collections]))
 
258
  }
259
  except Exception as e:
260
  return {
261
  "error": e
262
  }
263
-
264
 
265
- def getLinks(url: str, timeout = 30):
 
266
  start = time.time()
 
267
  def getLinksFromPage(url: str) -> list:
268
  response = requests.get(url)
269
  soup = BeautifulSoup(response.content, "lxml")
@@ -281,6 +287,7 @@ def getLinks(url: str, timeout = 30):
281
  else:
282
  continue
283
  return links
 
284
  links = getLinksFromPage(url)
285
  uniqueLinks = set()
286
  for link in links:
@@ -292,22 +299,23 @@ def getLinks(url: str, timeout = 30):
292
  return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
293
 
294
 
295
-
296
  def getTextFromImagePDF(pdfBytes):
297
  def getText(image):
298
  global reader
299
  return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
 
300
  allImages = convert_from_bytes(pdfBytes)
301
  texts = [getText(image) for image in allImages]
302
  return "\n\n\n".join(texts)
303
 
 
304
  def getTranscript(urls: str):
305
  urls = urls.split(",")
306
  texts = []
307
  for url in urls:
308
  try:
309
  loader = YoutubeLoader.from_youtube_url(
310
- url, add_video_info = False
311
  )
312
  doc = " ".join([x.page_content for x in loader.load()])
313
  texts.append(doc)
@@ -318,12 +326,12 @@ def getTranscript(urls: str):
318
 
319
 
320
  def analyzeData(query, dataframe):
321
- llm = ChatGroq(name = "llama-3.1-8b-instant")
322
- df = SmartDataframe(dataframe, config = {"llm": llm, "verbose": False})
323
  response = df.chat(query)
324
  if os.path.isfile(response):
325
  with open(response, "rb") as file:
326
  b64string = base64.b64encode(file.read()).decode("utf-8")
327
  return f"data:image/png;base64,{b64string}"
328
  else:
329
- return response
 
32
  import time
33
  import requests
34
 
 
35
  load_dotenv("secrets.env")
36
  client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
37
  qdrantClient = QdrantClient(url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"])
38
  model_kwargs = {"device": "cuda"}
39
  encode_kwargs = {"normalize_embeddings": True}
40
  vectorEmbeddings = HuggingFaceEmbeddings(
41
+ model_name="BAAI/bge-m3",
42
+ model_kwargs=model_kwargs,
43
+ encode_kwargs=encode_kwargs
44
  )
45
+ reader = easyocr.Reader(['en'], gpu=True, model_storage_directory="/app/EasyOCRModels")
46
+ sparseEmbeddings = FastEmbedSparse(model="Qdrant/BM25")
47
  prompt = """
48
  INSTRUCTIONS:
49
  =====================================
 
80
  chatHistoryStore = dict()
81
 
82
 
83
+ def createUser(username: str) -> dict:
84
  try:
85
  userData = client.table("ConversAI_UserInfo").select("*").execute().data
86
+ if username not in [userData[x]["user_id"] for x in range(len(userData))]:
87
+ client.table("ConversAI_UserInfo").insert({"user_id": username}).execute()
88
+ client.table("ConversAI_UserConfig").insert({"user_id": username}).execute()
89
  return {
90
  "output": "SUCCESS"
91
  }
92
+ else:
93
  return {
94
  "output": "USER ALREADY EXISTS"
95
  }
96
  except Exception as e:
97
  return {
98
  "error": e
99
+ }
100
 
101
 
102
+ # def matchPassword(username: str, password: str) -> str:
103
+ # response = (
104
+ # client.table("ConversAI_UserInfo")
105
+ # .select("*")
106
+ # .eq("username", username)
107
+ # .execute()
108
+ # )
109
+ # try:
110
+ # return {
111
+ # "output": password == response.data[0]["password"]
112
+ # }
113
+ # except:
114
+ # return {
115
+ # "output": "USER DOESN'T EXIST"
116
+ # }
117
 
118
 
119
  def createTable(tablename: str):
120
  global vectorEmbeddings
121
  global sparseEmbeddings
122
  qdrant = QdrantVectorStore.from_documents(
123
+ documents=[],
124
+ embedding=vectorEmbeddings,
125
  sparse_embedding=sparseEmbeddings,
126
  url=os.environ["QDRANT_URL"],
127
  prefer_grpc=True,
 
133
  "output": "SUCCESS"
134
  }
135
 
136
+
137
  def addDocuments(text: str, vectorstore: str):
138
  global vectorEmbeddings
139
  global sparseEmbeddings
140
  global store
141
  parentSplitter = RecursiveCharacterTextSplitter(
142
+ chunk_size=2100,
143
+ add_start_index=True
144
  )
145
  childSplitter = RecursiveCharacterTextSplitter(
146
+ chunk_size=300,
147
+ add_start_index=True
148
  )
149
+ texts = [Document(page_content=text)]
150
  vectorstore = QdrantVectorStore.from_existing_collection(
151
+ embedding=vectorEmbeddings,
152
  sparse_embedding=sparseEmbeddings,
153
  collection_name=vectorstore,
154
  url=os.environ["QDRANT_URL"],
 
161
  child_splitter=childSplitter,
162
  parent_splitter=parentSplitter
163
  )
164
+ retriever.add_documents(documents=texts)
165
  return {
166
  "output": "SUCCESS"
167
  }
 
171
  context = "\n\n".join(doc.page_content for doc in docs)
172
  if context == "":
173
  context = "No context found"
174
+ else:
175
+ pass
176
  return context
177
 
178
 
 
189
  pass
190
  else:
191
  chatHistoryStore[storeName].clear()
192
+ for message in messages[-1:]:
193
  chatHistoryStore[storeName].add_message(message)
194
  return True
195
 
196
 
197
  def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192") -> str:
198
+ global prompt
199
  global client
200
  global vectorEmbeddings
201
  global sparseEmbeddings
202
  vectorStoreName = vectorstore
203
  vectorstore = QdrantVectorStore.from_existing_collection(
204
+ embedding=vectorEmbeddings,
205
  sparse_embedding=sparseEmbeddings,
206
  collection_name=vectorstore,
207
  url=os.environ["QDRANT_URL"],
 
219
  base_compressor=compressor, base_retriever=retriever
220
  )
221
  baseChain = (
222
+ {"context": RunnableLambda(lambda x: x["question"]) | retriever | RunnableLambda(format_docs),
223
+ "question": RunnablePassthrough(), "chatHistory": RunnablePassthrough()}
224
+ | prompt
225
+ | ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
226
+ | StrOutputParser()
227
+ )
228
  messageChain = RunnableWithMessageHistory(
229
  baseChain,
230
  get_session_history,
231
+ input_messages_key="question",
232
+ history_messages_key="chatHistory"
233
  )
234
+ chain = RunnablePassthrough.assign(messages_trimmed=trimMessages) | messageChain
235
  return {
236
  "output": chain.invoke(
237
  {"question": query},
238
  {"configurable": {"session_id": vectorStoreName}}
239
  )
240
  }
 
241
 
242
 
243
  def deleteTable(tableName: str):
 
252
  "error": e
253
  }
254
 
255
+
256
  def listTables(username: str):
257
  try:
258
  global qdrantClient
259
  qdrantCollections = qdrantClient.get_collections()
260
  return {
261
+ "output": list(filter(lambda x: True if x.split("-")[1] == username else False,
262
+ [x.name for x in qdrantCollections.collections]))
263
  }
264
  except Exception as e:
265
  return {
266
  "error": e
267
  }
 
268
 
269
+
270
+ def getLinks(url: str, timeout=30):
271
  start = time.time()
272
+
273
  def getLinksFromPage(url: str) -> list:
274
  response = requests.get(url)
275
  soup = BeautifulSoup(response.content, "lxml")
 
287
  else:
288
  continue
289
  return links
290
+
291
  links = getLinksFromPage(url)
292
  uniqueLinks = set()
293
  for link in links:
 
299
  return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
300
 
301
 
 
302
  def getTextFromImagePDF(pdfBytes):
303
  def getText(image):
304
  global reader
305
  return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
306
+
307
  allImages = convert_from_bytes(pdfBytes)
308
  texts = [getText(image) for image in allImages]
309
  return "\n\n\n".join(texts)
310
 
311
+
312
  def getTranscript(urls: str):
313
  urls = urls.split(",")
314
  texts = []
315
  for url in urls:
316
  try:
317
  loader = YoutubeLoader.from_youtube_url(
318
+ url, add_video_info=False
319
  )
320
  doc = " ".join([x.page_content for x in loader.load()])
321
  texts.append(doc)
 
326
 
327
 
328
  def analyzeData(query, dataframe):
329
+ llm = ChatGroq(name="llama-3.1-8b-instant")
330
+ df = SmartDataframe(dataframe, config={"llm": llm, "verbose": False})
331
  response = df.chat(query)
332
  if os.path.isfile(response):
333
  with open(response, "rb") as file:
334
  b64string = base64.b64encode(file.read()).decode("utf-8")
335
  return f"data:image/png;base64,{b64string}"
336
  else:
337
+ return response