Spaces:

techconspartners
/

ConversAI

Sleeping

App Files Files Community

Rauhan commited on Aug 19, 2024

Commit

2145a2a

1 Parent(s): 058f9cb

UPDATE: supabase

Browse files

Files changed (1) hide show

app.py +15 -9

app.py CHANGED Viewed

@@ -306,15 +306,6 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
     start = time.time()
     text = extractTextFromUrlList(urls = websiteUrls)
     textExtraction = time.time()
-    timeTaken = f"TEXT EXTRACTION TIME: {textExtraction - start}s" + "\n"
-    links = "LINKS:\n" + "\n".join(websiteUrls) + "\n"
-    newText = timeTaken + links + "TEXT: \n" + text
-    fileId = str(uuid.uuid4())
-    with open(f"{fileId}.txt", "w") as file:
-        file.write(newText)
-    with open(f"{fileId}.txt", "rb") as f:
-        supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
-    os.remove(f"{fileId}.txt")
     username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
     currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
@@ -323,7 +314,22 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
     if newCount < int(limit):
         client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
             "chatbotname", chatbotname).execute()
         output = addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore)
         output["supabaseFileName"] = f"{fileId}.txt"
         return output
     else:

     start = time.time()
     text = extractTextFromUrlList(urls = websiteUrls)
     textExtraction = time.time()
     username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
     df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
     currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
     if newCount < int(limit):
         client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
             "chatbotname", chatbotname).execute()
+        uploadStart = time.time()
         output = addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore)
+        uploadEnd = time.time()
+        uploadTime = f"VECTOR UPLOAD TIME: {uploadEnd - uploadStart}s" + "\n"
+        timeTaken = f"TEXT EXTRACTION TIME: {textExtraction - start}s" + "\n"
+        tokenCount = f"TOKEN COUNT: {len(text)}" + "\n"
+        tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+")
+        wordCount = f"WORD COUNT: {len(tokenizer.tokenize(text))}" + "\n"
+        links = "LINKS:\n" + "\n".join(websiteUrls) + "\n"
+        newText = ("=" * 75 + "\n").join([timeTaken, uploadTime, wordCount, tokenCount, links, "TEXT: \n" + text + "\n"])
+        fileId = str(uuid.uuid4())
+        with open(f"{fileId}.txt", "w") as file:
+            file.write(newText)
+        with open(f"{fileId}.txt", "rb") as f:
+            supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
+        os.remove(f"{fileId}.txt")
         output["supabaseFileName"] = f"{fileId}.txt"
         return output
     else: