Spaces:
Sleeping
Sleeping
UPDATE: supabase
Browse files
app.py
CHANGED
@@ -306,15 +306,6 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
|
306 |
start = time.time()
|
307 |
text = extractTextFromUrlList(urls = websiteUrls)
|
308 |
textExtraction = time.time()
|
309 |
-
timeTaken = f"TEXT EXTRACTION TIME: {textExtraction - start}s" + "\n"
|
310 |
-
links = "LINKS:\n" + "\n".join(websiteUrls) + "\n"
|
311 |
-
newText = timeTaken + links + "TEXT: \n" + text
|
312 |
-
fileId = str(uuid.uuid4())
|
313 |
-
with open(f"{fileId}.txt", "w") as file:
|
314 |
-
file.write(newText)
|
315 |
-
with open(f"{fileId}.txt", "rb") as f:
|
316 |
-
supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
|
317 |
-
os.remove(f"{fileId}.txt")
|
318 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
319 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
320 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
@@ -323,7 +314,22 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
|
323 |
if newCount < int(limit):
|
324 |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
325 |
"chatbotname", chatbotname).execute()
|
|
|
326 |
output = addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
output["supabaseFileName"] = f"{fileId}.txt"
|
328 |
return output
|
329 |
else:
|
|
|
306 |
start = time.time()
|
307 |
text = extractTextFromUrlList(urls = websiteUrls)
|
308 |
textExtraction = time.time()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
310 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
311 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
|
|
314 |
if newCount < int(limit):
|
315 |
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
316 |
"chatbotname", chatbotname).execute()
|
317 |
+
uploadStart = time.time()
|
318 |
output = addDocuments(text=text, source=urlparse(websiteUrls[0]).netloc, vectorstore=vectorstore)
|
319 |
+
uploadEnd = time.time()
|
320 |
+
uploadTime = f"VECTOR UPLOAD TIME: {uploadEnd - uploadStart}s" + "\n"
|
321 |
+
timeTaken = f"TEXT EXTRACTION TIME: {textExtraction - start}s" + "\n"
|
322 |
+
tokenCount = f"TOKEN COUNT: {len(text)}" + "\n"
|
323 |
+
tokenizer = nltk.tokenize.RegexpTokenizer(r"\w+")
|
324 |
+
wordCount = f"WORD COUNT: {len(tokenizer.tokenize(text))}" + "\n"
|
325 |
+
links = "LINKS:\n" + "\n".join(websiteUrls) + "\n"
|
326 |
+
newText = ("=" * 75 + "\n").join([timeTaken, uploadTime, wordCount, tokenCount, links, "TEXT: \n" + text + "\n"])
|
327 |
+
fileId = str(uuid.uuid4())
|
328 |
+
with open(f"{fileId}.txt", "w") as file:
|
329 |
+
file.write(newText)
|
330 |
+
with open(f"{fileId}.txt", "rb") as f:
|
331 |
+
supabase.storage.from_("ConversAI").upload(file = f, path = os.path.join("/", f.name), file_options={"content-type": "text/plain"})
|
332 |
+
os.remove(f"{fileId}.txt")
|
333 |
output["supabaseFileName"] = f"{fileId}.txt"
|
334 |
return output
|
335 |
else:
|