Rauhan commited on
Commit
e3475f1
2 Parent(s): d22b8e9 29dd018

DEBUG: FlashRank

Browse files
Files changed (3) hide show
  1. app.py +77 -67
  2. functions.py +8 -11
  3. requirements.txt +2 -1
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import io
2
  import tempfile
 
 
 
3
  from starlette import status
4
  from functions import *
5
  import pandas as pd
@@ -13,7 +16,6 @@ import nltk
13
  import time
14
  import uuid
15
 
16
-
17
  nltk.download('punkt_tab')
18
 
19
  app = FastAPI(title="ConversAI", root_path="/api/v1")
@@ -48,11 +50,24 @@ async def sign_up(email, username, password):
48
 
49
 
50
  @app.post("/session-check")
51
- async def check_session():
52
  res = supabase.auth.get_session()
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  return res
55
 
 
56
  @app.post("/get-user")
57
  async def get_user(access_token):
58
  res = supabase.auth.get_user(jwt=access_token)
@@ -65,7 +80,6 @@ async def refresh_token(refresh_token):
65
  return res
66
 
67
 
68
-
69
  @app.post("/login")
70
  async def sign_in(email, password):
71
  try:
@@ -127,68 +141,55 @@ async def sign_in(email, password):
127
  )
128
 
129
 
130
- @app.post('login_with_token')
131
- async def login_with_token(token):
132
  try:
133
- res = supabase.auth.sign_in_with_id_token(token)
134
- print(res)
135
- user_id = res.user.id
136
- access_token = res.session.access_token
137
- refresh_token = res.session.refresh_token
138
 
139
- store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
140
- store_id = None
141
-
142
- if store_session_check and store_session_check.data:
143
- store_id = store_session_check.data[0].get("StoreID")
144
-
145
- if not store_id:
146
- response = (
147
- supabase.table("Stores").insert(
148
- {
149
- "AccessToken": access_token,
150
- "StoreID": user_id,
151
- "RefreshToken": refresh_token,
152
- }
153
- ).execute()
154
- )
155
 
156
- message = {
157
- "message": "Success",
158
- "code": status.HTTP_200_OK,
159
- "user_id": user_id,
160
- "access_token": access_token,
161
- "refresh_token": refresh_token
162
- }
163
- return message
164
 
165
- elif store_id == user_id:
166
- raise HTTPException(
167
- status_code=status.HTTP_400_BAD_REQUEST,
168
- detail="You are already signed in. Please sign out first to sign in again."
169
- )
170
 
171
- else:
172
- raise HTTPException(
173
- status_code=status.HTTP_400_BAD_REQUEST,
174
- detail="Failed to sign in. Please check your credentials."
175
- )
176
-
177
- except HTTPException as http_exc:
178
- raise http_exc
179
-
180
- except Exception as e:
181
- raise HTTPException(
182
- status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
183
- detail=f"An unexpected error occurred during sign-in: {str(e)}"
184
- )
185
 
 
 
 
 
186
 
187
 
188
  @app.post("/set-session-data")
189
- async def set_session_data(access_token, refresh_token):
190
  res = supabase.auth.set_session(access_token, refresh_token)
191
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  return res
193
 
194
 
@@ -207,8 +208,9 @@ async def sign_out(user_id):
207
 
208
 
209
  @app.post("/oauth")
210
- async def oauth(provider):
211
- res = supabase.auth.sign_in_with_oauth({"provider": provider})
 
212
  return res
213
 
214
 
@@ -259,7 +261,8 @@ async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
259
  with open(f"{fileId}.txt", "w") as file:
260
  file.write(newText)
261
  with open(f"{fileId}.txt", "rb") as f:
262
- supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
 
263
  os.remove(f"{fileId}.txt")
264
  output["supabaseFileName"] = f"{fileId}.txt"
265
  return output
@@ -280,7 +283,7 @@ async def returnText(pdf: UploadFile = File(...)):
280
  return {
281
  "source": source,
282
  "extractionTime": timeTaken,
283
- "output": text
284
  }
285
 
286
 
@@ -307,7 +310,8 @@ async def addText(vectorstore: str, text: str, source: str | None = None):
307
  with open(f"{fileId}.txt", "w") as file:
308
  file.write(newText)
309
  with open(f"{fileId}.txt", "rb") as f:
310
- supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
 
311
  os.remove(f"{fileId}.txt")
312
  output["supabaseFileName"] = f"{fileId}.txt"
313
  return output
@@ -345,13 +349,14 @@ async def addQAPairData(addQaPair: AddQAPair):
345
  @app.post("/addWebsite")
346
  async def addWebsite(vectorstore: str, websiteUrls: list[str]):
347
  start = time.time()
348
- text = extractTextFromUrlList(urls = websiteUrls)
349
  textExtraction = time.time()
350
  username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
351
  df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data)
352
  currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
353
  newCount = currentCount + len(text)
354
- limit = supabase.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0]["tokenLimit"]
 
355
  if newCount < int(limit):
356
  supabase.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
357
  "chatbotname", chatbotname).execute()
@@ -364,12 +369,14 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
364
  tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+")
365
  wordCount = f"WORD COUNT: {len(tokenizer.tokenize(text))}" + "\n"
366
  links = "LINKS:\n" + "\n".join(websiteUrls) + "\n"
367
- newText = ("=" * 75 + "\n").join([timeTaken, uploadTime, wordCount, tokenCount, links, "TEXT: \n" + text + "\n"])
 
368
  fileId = str(uuid.uuid4())
369
  with open(f"{fileId}.txt", "w") as file:
370
  file.write(newText)
371
  with open(f"{fileId}.txt", "rb") as f:
372
- supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
 
373
  os.remove(f"{fileId}.txt")
374
  output["supabaseFileName"] = f"{fileId}.txt"
375
  return output
@@ -385,7 +392,8 @@ async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-7
385
  output = answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel)
386
  response = (
387
  supabase.table("ConversAI_ChatHistory")
388
- .insert({"username": username, "chatbotName": chatbotName, "llmModel": llmModel, "question": query, "response": output["output"]})
 
389
  .execute()
390
  )
391
  return output
@@ -450,5 +458,7 @@ async def analyzeAndAnswer(query: str, file: UploadFile = File(...)):
450
  @app.post("/getChatHistory")
451
  async def chatHistory(vectorstore: str):
452
  username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
453
- response = supabase.table("ConversAI_ChatHistory").select("timestamp", "question", "response").eq("username", username).eq("chatbotName", chatbotName).execute().data
454
- return response
 
 
 
1
  import io
2
  import tempfile
3
+ import jwt
4
+ from click import option
5
+ from jwt import ExpiredSignatureError, InvalidTokenError
6
  from starlette import status
7
  from functions import *
8
  import pandas as pd
 
16
  import time
17
  import uuid
18
 
 
19
  nltk.download('punkt_tab')
20
 
21
  app = FastAPI(title="ConversAI", root_path="/api/v1")
 
50
 
51
 
52
  @app.post("/session-check")
53
+ async def check_session(user_id: str):
54
  res = supabase.auth.get_session()
55
+ if res == None:
56
+ try:
57
+ supabase.table("Stores").delete().eq(
58
+ "StoreID", user_id
59
+ ).execute()
60
+ resp = supabase.auth.sign_out()
61
+
62
+ response = {"message": "success", "code": 200, "Session": res}
63
+
64
+ return response
65
+ except Exception as e:
66
+ raise HTTPException(status_code=400, detail=str(e))
67
 
68
  return res
69
 
70
+
71
  @app.post("/get-user")
72
  async def get_user(access_token):
73
  res = supabase.auth.get_user(jwt=access_token)
 
80
  return res
81
 
82
 
 
83
  @app.post("/login")
84
  async def sign_in(email, password):
85
  try:
 
141
  )
142
 
143
 
144
+ @app.post("/login_with_token")
145
+ async def login_with_token(access_token: str, refresh_token: str):
146
  try:
147
+ decoded_token = jwt.decode(access_token, options={"verify_signature": False})
 
 
 
 
148
 
149
+ json = {
150
+ "code": status.HTTP_200_OK,
151
+ "user_id": decoded_token.get("sub"),
152
+ "email": decoded_token.get("email"),
153
+ "access_token": access_token,
154
+ "refresh_token": refresh_token,
155
+ "issued_at": decoded_token.get("iat"),
156
+ "expires_at": decoded_token.get("exp")
 
 
 
 
 
 
 
 
157
 
158
+ }
159
+ return json
 
 
 
 
 
 
160
 
161
+ except (ExpiredSignatureError, InvalidTokenError) as e:
162
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=str(e))
 
 
 
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ @app.post("/user_name")
166
+ async def user_name_(username: str, user_id: str):
167
+ r_ = createUser(user_id=user_id, username=username)
168
+ return r_
169
 
170
 
171
  @app.post("/set-session-data")
172
+ async def set_session_data(access_token, refresh_token, user_id):
173
  res = supabase.auth.set_session(access_token, refresh_token)
174
+ store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
175
+ store_id = None
176
+ if store_session_check and store_session_check.data:
177
+ store_id = store_session_check.data[0].get("StoreID")
178
+ if not store_id:
179
+ response = (
180
+ supabase.table("Stores").insert(
181
+ {
182
+ "AccessToken": access_token,
183
+ "StoreID": user_id,
184
+ "RefreshToken": refresh_token,
185
+ }
186
+ ).execute()
187
+ )
188
+ res = {
189
+ "message": "success",
190
+ "code": 200,
191
+ "session_data": res,
192
+ }
193
  return res
194
 
195
 
 
208
 
209
 
210
  @app.post("/oauth")
211
+ async def oauth():
212
+ res = supabase.auth.sign_in_with_oauth(
213
+ {"provider": "google", "options": {"redirect_to": "https://convers-ai-lac.vercel.app/"}})
214
  return res
215
 
216
 
 
261
  with open(f"{fileId}.txt", "w") as file:
262
  file.write(newText)
263
  with open(f"{fileId}.txt", "rb") as f:
264
+ supabase.storage.from_("ConversAI").upload(file=f, path=os.path.join("/", f.name),
265
+ file_options={"content-type": "text/plain"})
266
  os.remove(f"{fileId}.txt")
267
  output["supabaseFileName"] = f"{fileId}.txt"
268
  return output
 
283
  return {
284
  "source": source,
285
  "extractionTime": timeTaken,
286
+ "output": text
287
  }
288
 
289
 
 
310
  with open(f"{fileId}.txt", "w") as file:
311
  file.write(newText)
312
  with open(f"{fileId}.txt", "rb") as f:
313
+ supabase.storage.from_("ConversAI").upload(file=f, path=os.path.join("/", f.name),
314
+ file_options={"content-type": "text/plain"})
315
  os.remove(f"{fileId}.txt")
316
  output["supabaseFileName"] = f"{fileId}.txt"
317
  return output
 
349
  @app.post("/addWebsite")
350
  async def addWebsite(vectorstore: str, websiteUrls: list[str]):
351
  start = time.time()
352
+ text = extractTextFromUrlList(urls=websiteUrls)
353
  textExtraction = time.time()
354
  username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
355
  df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data)
356
  currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
357
  newCount = currentCount + len(text)
358
+ limit = supabase.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
359
+ "tokenLimit"]
360
  if newCount < int(limit):
361
  supabase.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
362
  "chatbotname", chatbotname).execute()
 
369
  tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+")
370
  wordCount = f"WORD COUNT: {len(tokenizer.tokenize(text))}" + "\n"
371
  links = "LINKS:\n" + "\n".join(websiteUrls) + "\n"
372
+ newText = ("=" * 75 + "\n").join(
373
+ [timeTaken, uploadTime, wordCount, tokenCount, links, "TEXT: \n" + text + "\n"])
374
  fileId = str(uuid.uuid4())
375
  with open(f"{fileId}.txt", "w") as file:
376
  file.write(newText)
377
  with open(f"{fileId}.txt", "rb") as f:
378
+ supabase.storage.from_("ConversAI").upload(file=f, path=os.path.join("/", f.name),
379
+ file_options={"content-type": "text/plain"})
380
  os.remove(f"{fileId}.txt")
381
  output["supabaseFileName"] = f"{fileId}.txt"
382
  return output
 
392
  output = answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel)
393
  response = (
394
  supabase.table("ConversAI_ChatHistory")
395
+ .insert({"username": username, "chatbotName": chatbotName, "llmModel": llmModel, "question": query,
396
+ "response": output["output"]})
397
  .execute()
398
  )
399
  return output
 
458
  @app.post("/getChatHistory")
459
  async def chatHistory(vectorstore: str):
460
  username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
461
+ response = supabase.table("ConversAI_ChatHistory").select("timestamp", "question", "response").eq("username",
462
+ username).eq(
463
+ "chatbotName", chatbotName).execute().data
464
+ return response
functions.py CHANGED
@@ -1,4 +1,4 @@
1
- import pymupdf
2
  from concurrent.futures import ThreadPoolExecutor
3
  from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
4
  from langchain_core.runnables import RunnablePassthrough, RunnableLambda
@@ -45,21 +45,18 @@ vectorEmbeddings = HuggingFaceEmbeddings(
45
  encode_kwargs=encode_kwargs
46
  )
47
  reader = easyocr.Reader(['en'], gpu=True, model_storage_directory="/app/EasyOCRModels")
48
- sparseEmbeddings = FastEmbedSparse(model="Qdrant/BM25", threads = 20 , parallel = 0)
49
  prompt = """
50
  INSTRUCTIONS:
51
  =====================================
52
  ### Role
53
  **Primary Function**: You are an AI chatbot designed to provide accurate and efficient assistance to users based on provided context data. Your responses must be reliable, friendly, and directly address user inquiries or issues. Always clarify any unclear questions, and conclude responses positively.
54
-
55
  ### Constraints
56
  1. **No Data Disclosure**: Never reveal access to training data or any context explicitly.
57
  2. **Maintaining Focus**: Politely redirect any off-topic conversations back to relevant issues without breaking character.
58
  3. **Exclusive Reliance on Context Data**: Base all answers strictly on the provided context data. If the context doesn’t cover the query, use a fallback response. Always maintain a third-person perspective.
59
  4. **Restrictive Role Focus**: Do not engage in tasks or answer questions unrelated to your role or context data.
60
-
61
  Ensure all instructions are strictly followed. Responses must be meaningful and concise, within 512 words. Include sources to support your answers when possible.
62
-
63
  CONTEXT:
64
  =====================================
65
  {context}
@@ -67,11 +64,9 @@ CONTEXT:
67
  QUESTION:
68
  =====================================
69
  {question}
70
-
71
  CHAT HISTORY:
72
  =====================================
73
  {chatHistory}
74
-
75
  NOTE: Generate responses directly without using phrases like "Response:" or "Answer:". Do not mention the use of extracted context or provide unnecessary details.
76
  """
77
  prompt = ChatPromptTemplate.from_template(prompt)
@@ -266,7 +261,7 @@ def getLinks(url: str, timeout=30):
266
  else:
267
  pass
268
  links = [link for link in links if "#" not in link]
269
- links = list(set(links))
270
  else:
271
  continue
272
  return links
@@ -319,18 +314,19 @@ def analyzeData(query, dataframe):
319
  return response
320
 
321
 
322
-
323
  def extractTextFromPage(page):
324
  return page.get_text()
325
 
 
326
  def extractTextFromPdf(pdf_path):
327
  doc = pymupdf.open(pdf_path)
328
  pages = [doc.load_page(i) for i in range(len(doc))]
329
  with ThreadPoolExecutor() as executor:
330
  texts = list(executor.map(extractTextFromPage, pages))
331
- doc.close()
332
  return '.'.join(texts)
333
 
 
334
  def extractTextFromUrl(url):
335
  response = requests.get(url)
336
  response.raise_for_status()
@@ -338,7 +334,8 @@ def extractTextFromUrl(url):
338
  soup = BeautifulSoup(html, 'lxml')
339
  return soup.get_text(separator=' ', strip=True)
340
 
 
341
  def extractTextFromUrlList(urls):
342
  with ThreadPoolExecutor() as executor:
343
  texts = list(executor.map(extractTextFromUrl, urls))
344
- return '.'.join(texts)
 
1
+ import pymupdf
2
  from concurrent.futures import ThreadPoolExecutor
3
  from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
4
  from langchain_core.runnables import RunnablePassthrough, RunnableLambda
 
45
  encode_kwargs=encode_kwargs
46
  )
47
  reader = easyocr.Reader(['en'], gpu=True, model_storage_directory="/app/EasyOCRModels")
48
+ sparseEmbeddings = FastEmbedSparse(model="Qdrant/BM25", threads=20, parallel=0)
49
  prompt = """
50
  INSTRUCTIONS:
51
  =====================================
52
  ### Role
53
  **Primary Function**: You are an AI chatbot designed to provide accurate and efficient assistance to users based on provided context data. Your responses must be reliable, friendly, and directly address user inquiries or issues. Always clarify any unclear questions, and conclude responses positively.
 
54
  ### Constraints
55
  1. **No Data Disclosure**: Never reveal access to training data or any context explicitly.
56
  2. **Maintaining Focus**: Politely redirect any off-topic conversations back to relevant issues without breaking character.
57
  3. **Exclusive Reliance on Context Data**: Base all answers strictly on the provided context data. If the context doesn’t cover the query, use a fallback response. Always maintain a third-person perspective.
58
  4. **Restrictive Role Focus**: Do not engage in tasks or answer questions unrelated to your role or context data.
 
59
  Ensure all instructions are strictly followed. Responses must be meaningful and concise, within 512 words. Include sources to support your answers when possible.
 
60
  CONTEXT:
61
  =====================================
62
  {context}
 
64
  QUESTION:
65
  =====================================
66
  {question}
 
67
  CHAT HISTORY:
68
  =====================================
69
  {chatHistory}
 
70
  NOTE: Generate responses directly without using phrases like "Response:" or "Answer:". Do not mention the use of extracted context or provide unnecessary details.
71
  """
72
  prompt = ChatPromptTemplate.from_template(prompt)
 
261
  else:
262
  pass
263
  links = [link for link in links if "#" not in link]
264
+ links = list(set(links))
265
  else:
266
  continue
267
  return links
 
314
  return response
315
 
316
 
 
317
  def extractTextFromPage(page):
318
  return page.get_text()
319
 
320
+
321
  def extractTextFromPdf(pdf_path):
322
  doc = pymupdf.open(pdf_path)
323
  pages = [doc.load_page(i) for i in range(len(doc))]
324
  with ThreadPoolExecutor() as executor:
325
  texts = list(executor.map(extractTextFromPage, pages))
326
+ doc.close()
327
  return '.'.join(texts)
328
 
329
+
330
  def extractTextFromUrl(url):
331
  response = requests.get(url)
332
  response.raise_for_status()
 
334
  soup = BeautifulSoup(html, 'lxml')
335
  return soup.get_text(separator=' ', strip=True)
336
 
337
+
338
  def extractTextFromUrlList(urls):
339
  with ThreadPoolExecutor() as executor:
340
  texts = list(executor.map(extractTextFromUrl, urls))
341
+ return '.'.join(texts)
requirements.txt CHANGED
@@ -94,4 +94,5 @@ pandasai
94
  easyocr
95
  youtube-transcript-api
96
  pdf2image
97
- PyPDF2
 
 
94
  easyocr
95
  youtube-transcript-api
96
  pdf2image
97
+ PyPDF2
98
+ PyJWT