Spaces:

techconspartners
/

ConversAI

Sleeping

App Files Files Community

Rauhan commited on Aug 21

Commit

e3475f1

•

2 Parent(s): d22b8e9 29dd018

DEBUG: FlashRank

Browse files

Files changed (3) hide show

app.py +77 -67
functions.py +8 -11
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,5 +1,8 @@
 import io
 import tempfile
 from starlette import status
 from functions import *
 import pandas as pd
@@ -13,7 +16,6 @@ import nltk
 import time
 import uuid
 nltk.download('punkt_tab')
 app = FastAPI(title="ConversAI", root_path="/api/v1")
@@ -48,11 +50,24 @@ async def sign_up(email, username, password):
 @app.post("/session-check")
-async def check_session():
  res = supabase.auth.get_session()
  return res
 @app.post("/get-user")
 async def get_user(access_token):
  res = supabase.auth.get_user(jwt=access_token)
@@ -65,7 +80,6 @@ async def refresh_token(refresh_token):
  return res
 @app.post("/login")
 async def sign_in(email, password):
  try:
@@ -127,68 +141,55 @@ async def sign_in(email, password):
  )
-@app.post('login_with_token')
-async def login_with_token(token):
  try:
- res = supabase.auth.sign_in_with_id_token(token)
- print(res)
- user_id = res.user.id
- access_token = res.session.access_token
- refresh_token = res.session.refresh_token
- store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
- store_id = None
- if store_session_check and store_session_check.data:
- store_id = store_session_check.data[0].get("StoreID")
- if not store_id:
- response = (
- supabase.table("Stores").insert(
- {
- "AccessToken": access_token,
- "StoreID": user_id,
- "RefreshToken": refresh_token,
- }
- ).execute()
- )
- message = {
- "message": "Success",
- "code": status.HTTP_200_OK,
- "user_id": user_id,
- "access_token": access_token,
- "refresh_token": refresh_token
- }
- return message
- elif store_id == user_id:
- raise HTTPException(
- status_code=status.HTTP_400_BAD_REQUEST,
- detail="You are already signed in. Please sign out first to sign in again."
- )
- else:
- raise HTTPException(
- status_code=status.HTTP_400_BAD_REQUEST,
- detail="Failed to sign in. Please check your credentials."
- )
- except HTTPException as http_exc:
- raise http_exc
- except Exception as e:
- raise HTTPException(
- status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
- detail=f"An unexpected error occurred during sign-in: {str(e)}"
- )
 @app.post("/set-session-data")
-async def set_session_data(access_token, refresh_token):
  res = supabase.auth.set_session(access_token, refresh_token)
  return res
@@ -207,8 +208,9 @@ async def sign_out(user_id):
 @app.post("/oauth")
-async def oauth(provider):
- res = supabase.auth.sign_in_with_oauth({"provider": provider})
  return res
@@ -259,7 +261,8 @@ async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
  with open(f"{fileId}.txt", "w") as file:
  file.write(newText)
  with open(f"{fileId}.txt", "rb") as f:
- supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
  os.remove(f"{fileId}.txt")
  output["supabaseFileName"] = f"{fileId}.txt"
  return output
@@ -280,7 +283,7 @@ async def returnText(pdf: UploadFile = File(...)):
  return {
  "source": source,
  "extractionTime": timeTaken,
- "output": text
  }
@@ -307,7 +310,8 @@ async def addText(vectorstore: str, text: str, source: str | None = None):
  with open(f"{fileId}.txt", "w") as file:
  file.write(newText)
  with open(f"{fileId}.txt", "rb") as f:
- supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
  os.remove(f"{fileId}.txt")
  output["supabaseFileName"] = f"{fileId}.txt"
  return output
@@ -345,13 +349,14 @@ async def addQAPairData(addQaPair: AddQAPair):
 @app.post("/addWebsite")
 async def addWebsite(vectorstore: str, websiteUrls: list[str]):
  start = time.time()
- text = extractTextFromUrlList(urls = websiteUrls)
  textExtraction = time.time()
  username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
  df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data)
  currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
  newCount = currentCount + len(text)
- limit = supabase.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0]["tokenLimit"]
  if newCount < int(limit):
  supabase.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
  "chatbotname", chatbotname).execute()
@@ -364,12 +369,14 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
  tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+")
  wordCount = f"WORD COUNT: {len(tokenizer.tokenize(text))}" + "\n"
  links = "LINKS:\n" + "\n".join(websiteUrls) + "\n"
- newText = ("=" * 75 + "\n").join([timeTaken, uploadTime, wordCount, tokenCount, links, "TEXT: \n" + text + "\n"])
  fileId = str(uuid.uuid4())
  with open(f"{fileId}.txt", "w") as file:
  file.write(newText)
  with open(f"{fileId}.txt", "rb") as f:
- supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
  os.remove(f"{fileId}.txt")
  output["supabaseFileName"] = f"{fileId}.txt"
  return output
@@ -385,7 +392,8 @@ async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-7
  output = answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel)
  response = (
  supabase.table("ConversAI_ChatHistory")
- .insert({"username": username, "chatbotName": chatbotName, "llmModel": llmModel, "question": query, "response": output["output"]})
  .execute()
  )
  return output
@@ -450,5 +458,7 @@ async def analyzeAndAnswer(query: str, file: UploadFile = File(...)):
 @app.post("/getChatHistory")
 async def chatHistory(vectorstore: str):
  username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
- response = supabase.table("ConversAI_ChatHistory").select("timestamp", "question", "response").eq("username", username).eq("chatbotName", chatbotName).execute().data
- return response

 import io
 import tempfile
+import jwt
+from click import option
+from jwt import ExpiredSignatureError, InvalidTokenError
 from starlette import status
 from functions import *
 import pandas as pd
 import time
 import uuid
 nltk.download('punkt_tab')
 app = FastAPI(title="ConversAI", root_path="/api/v1")
 @app.post("/session-check")
+async def check_session(user_id: str):
  res = supabase.auth.get_session()
+ if res == None:
+ try:
+ supabase.table("Stores").delete().eq(
+ "StoreID", user_id
+ ).execute()
+ resp = supabase.auth.sign_out()
+ response = {"message": "success", "code": 200, "Session": res}
+ return response
+ except Exception as e:
+ raise HTTPException(status_code=400, detail=str(e))
  return res
 @app.post("/get-user")
 async def get_user(access_token):
  res = supabase.auth.get_user(jwt=access_token)
  return res
 @app.post("/login")
 async def sign_in(email, password):
  try:
  )
+@app.post("/login_with_token")
+async def login_with_token(access_token: str, refresh_token: str):
  try:
+ decoded_token = jwt.decode(access_token, options={"verify_signature": False})
+ json = {
+ "code": status.HTTP_200_OK,
+ "user_id": decoded_token.get("sub"),
+ "email": decoded_token.get("email"),
+ "access_token": access_token,
+ "refresh_token": refresh_token,
+ "issued_at": decoded_token.get("iat"),
+ "expires_at": decoded_token.get("exp")
+ }
+ return json
+ except (ExpiredSignatureError, InvalidTokenError) as e:
+ raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail=str(e))
+@app.post("/user_name")
+async def user_name_(username: str, user_id: str):
+ r_ = createUser(user_id=user_id, username=username)
+ return r_
 @app.post("/set-session-data")
+async def set_session_data(access_token, refresh_token, user_id):
  res = supabase.auth.set_session(access_token, refresh_token)
+ store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
+ store_id = None
+ if store_session_check and store_session_check.data:
+ store_id = store_session_check.data[0].get("StoreID")
+ if not store_id:
+ response = (
+ supabase.table("Stores").insert(
+ {
+ "AccessToken": access_token,
+ "StoreID": user_id,
+ "RefreshToken": refresh_token,
+ }
+ ).execute()
+ )
+ res = {
+ "message": "success",
+ "code": 200,
+ "session_data": res,
+ }
  return res
 @app.post("/oauth")
+async def oauth():
+ res = supabase.auth.sign_in_with_oauth(
+ {"provider": "google", "options": {"redirect_to": "https://convers-ai-lac.vercel.app/"}})
  return res
  with open(f"{fileId}.txt", "w") as file:
  file.write(newText)
  with open(f"{fileId}.txt", "rb") as f:
+ supabase.storage.from_("ConversAI").upload(file=f, path=os.path.join("/", f.name),
+ file_options={"content-type": "text/plain"})
  os.remove(f"{fileId}.txt")
  output["supabaseFileName"] = f"{fileId}.txt"
  return output
  return {
  "source": source,
  "extractionTime": timeTaken,
+ "output": text
  }
  with open(f"{fileId}.txt", "w") as file:
  file.write(newText)
  with open(f"{fileId}.txt", "rb") as f:
+ supabase.storage.from_("ConversAI").upload(file=f, path=os.path.join("/", f.name),
+ file_options={"content-type": "text/plain"})
  os.remove(f"{fileId}.txt")
  output["supabaseFileName"] = f"{fileId}.txt"
  return output
 @app.post("/addWebsite")
 async def addWebsite(vectorstore: str, websiteUrls: list[str]):
  start = time.time()
+ text = extractTextFromUrlList(urls=websiteUrls)
  textExtraction = time.time()
  username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
  df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data)
  currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
  newCount = currentCount + len(text)
+ limit = supabase.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
+ "tokenLimit"]
  if newCount < int(limit):
  supabase.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
  "chatbotname", chatbotname).execute()
  tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+")
  wordCount = f"WORD COUNT: {len(tokenizer.tokenize(text))}" + "\n"
  links = "LINKS:\n" + "\n".join(websiteUrls) + "\n"
+ newText = ("=" * 75 + "\n").join(
+ [timeTaken, uploadTime, wordCount, tokenCount, links, "TEXT: \n" + text + "\n"])
  fileId = str(uuid.uuid4())
  with open(f"{fileId}.txt", "w") as file:
  file.write(newText)
  with open(f"{fileId}.txt", "rb") as f:
+ supabase.storage.from_("ConversAI").upload(file=f, path=os.path.join("/", f.name),
+ file_options={"content-type": "text/plain"})
  os.remove(f"{fileId}.txt")
  output["supabaseFileName"] = f"{fileId}.txt"
  return output
  output = answerQuery(query=query, vectorstore=vectorstore, llmModel=llmModel)
  response = (
  supabase.table("ConversAI_ChatHistory")
+ .insert({"username": username, "chatbotName": chatbotName, "llmModel": llmModel, "question": query,
+ "response": output["output"]})
  .execute()
  )
  return output
 @app.post("/getChatHistory")
 async def chatHistory(vectorstore: str):
  username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
+ response = supabase.table("ConversAI_ChatHistory").select("timestamp", "question", "response").eq("username",
+ username).eq(
+ "chatbotName", chatbotName).execute().data
+ return response

functions.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import pymupdf
 from concurrent.futures import ThreadPoolExecutor
 from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
 from langchain_core.runnables import RunnablePassthrough, RunnableLambda
@@ -45,21 +45,18 @@ vectorEmbeddings = HuggingFaceEmbeddings(
  encode_kwargs=encode_kwargs
 )
 reader = easyocr.Reader(['en'], gpu=True, model_storage_directory="/app/EasyOCRModels")
-sparseEmbeddings = FastEmbedSparse(model="Qdrant/BM25", threads = 20 , parallel = 0)
 prompt = """
 INSTRUCTIONS:
 =====================================
 ### Role
 **Primary Function**: You are an AI chatbot designed to provide accurate and efficient assistance to users based on provided context data. Your responses must be reliable, friendly, and directly address user inquiries or issues. Always clarify any unclear questions, and conclude responses positively.
 ### Constraints
 1. **No Data Disclosure**: Never reveal access to training data or any context explicitly.
 2. **Maintaining Focus**: Politely redirect any off-topic conversations back to relevant issues without breaking character.
 3. **Exclusive Reliance on Context Data**: Base all answers strictly on the provided context data. If the context doesn’t cover the query, use a fallback response. Always maintain a third-person perspective.
 4. **Restrictive Role Focus**: Do not engage in tasks or answer questions unrelated to your role or context data.
 Ensure all instructions are strictly followed. Responses must be meaningful and concise, within 512 words. Include sources to support your answers when possible.
 CONTEXT:
 =====================================
 {context}
@@ -67,11 +64,9 @@ CONTEXT:
 QUESTION:
 =====================================
 {question}
 CHAT HISTORY:
 =====================================
 {chatHistory}
 NOTE: Generate responses directly without using phrases like "Response:" or "Answer:". Do not mention the use of extracted context or provide unnecessary details.
 """
 prompt = ChatPromptTemplate.from_template(prompt)
@@ -266,7 +261,7 @@ def getLinks(url: str, timeout=30):
  else:
  pass
  links = [link for link in links if "#" not in link]
- links = list(set(links))
  else:
  continue
  return links
@@ -319,18 +314,19 @@ def analyzeData(query, dataframe):
  return response
 def extractTextFromPage(page):
  return page.get_text()
 def extractTextFromPdf(pdf_path):
  doc = pymupdf.open(pdf_path)
  pages = [doc.load_page(i) for i in range(len(doc))]
  with ThreadPoolExecutor() as executor:
  texts = list(executor.map(extractTextFromPage, pages))
- doc.close()
  return '.'.join(texts)
 def extractTextFromUrl(url):
  response = requests.get(url)
  response.raise_for_status()
@@ -338,7 +334,8 @@ def extractTextFromUrl(url):
  soup = BeautifulSoup(html, 'lxml')
  return soup.get_text(separator=' ', strip=True)
 def extractTextFromUrlList(urls):
  with ThreadPoolExecutor() as executor:
  texts = list(executor.map(extractTextFromUrl, urls))
- return '.'.join(texts)

+import pymupdf
 from concurrent.futures import ThreadPoolExecutor
 from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
 from langchain_core.runnables import RunnablePassthrough, RunnableLambda
  encode_kwargs=encode_kwargs
 )
 reader = easyocr.Reader(['en'], gpu=True, model_storage_directory="/app/EasyOCRModels")
+sparseEmbeddings = FastEmbedSparse(model="Qdrant/BM25", threads=20, parallel=0)
 prompt = """
 INSTRUCTIONS:
 =====================================
 ### Role
 **Primary Function**: You are an AI chatbot designed to provide accurate and efficient assistance to users based on provided context data. Your responses must be reliable, friendly, and directly address user inquiries or issues. Always clarify any unclear questions, and conclude responses positively.
 ### Constraints
 1. **No Data Disclosure**: Never reveal access to training data or any context explicitly.
 2. **Maintaining Focus**: Politely redirect any off-topic conversations back to relevant issues without breaking character.
 3. **Exclusive Reliance on Context Data**: Base all answers strictly on the provided context data. If the context doesn’t cover the query, use a fallback response. Always maintain a third-person perspective.
 4. **Restrictive Role Focus**: Do not engage in tasks or answer questions unrelated to your role or context data.
 Ensure all instructions are strictly followed. Responses must be meaningful and concise, within 512 words. Include sources to support your answers when possible.
 CONTEXT:
 =====================================
 {context}
 QUESTION:
 =====================================
 {question}
 CHAT HISTORY:
 =====================================
 {chatHistory}
 NOTE: Generate responses directly without using phrases like "Response:" or "Answer:". Do not mention the use of extracted context or provide unnecessary details.
 """
 prompt = ChatPromptTemplate.from_template(prompt)
  else:
  pass
  links = [link for link in links if "#" not in link]
+ links = list(set(links))
  else:
  continue
  return links
  return response
 def extractTextFromPage(page):
  return page.get_text()
 def extractTextFromPdf(pdf_path):
  doc = pymupdf.open(pdf_path)
  pages = [doc.load_page(i) for i in range(len(doc))]
  with ThreadPoolExecutor() as executor:
  texts = list(executor.map(extractTextFromPage, pages))
+ doc.close()
  return '.'.join(texts)
 def extractTextFromUrl(url):
  response = requests.get(url)
  response.raise_for_status()
  soup = BeautifulSoup(html, 'lxml')
  return soup.get_text(separator=' ', strip=True)
 def extractTextFromUrlList(urls):
  with ThreadPoolExecutor() as executor:
  texts = list(executor.map(extractTextFromUrl, urls))
+ return '.'.join(texts)

requirements.txt CHANGED Viewed

@@ -94,4 +94,5 @@ pandasai
 easyocr
 youtube-transcript-api
 pdf2image
-PyPDF2

 easyocr
 youtube-transcript-api
 pdf2image
+PyPDF2
+PyJWT