Spaces:
Sleeping
Sleeping
DEBUG: WebURLs
Browse files- app.py +8 -8
- functions.py +4 -4
app.py
CHANGED
@@ -294,7 +294,7 @@ async def loadImagePDF(vectorstore: str, pdf: UploadFile = File(...)):
|
|
294 |
"output": text,
|
295 |
"source": source
|
296 |
}
|
297 |
-
dct = json.dumps(dct, indent=1).encode("utf-8")
|
298 |
fileName = createDataSourceName(sourceName=source)
|
299 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
300 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
@@ -329,7 +329,7 @@ async def loadText(addTextConfig: AddText):
|
|
329 |
"source": "Text"
|
330 |
}
|
331 |
numTokens = len(text.translate(str.maketrans('', '', string.punctuation)).split(" "))
|
332 |
-
dct = json.dumps(dct, indent=1).encode("utf-8")
|
333 |
fileName = createDataSourceName(sourceName="Text")
|
334 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
335 |
response = (
|
@@ -389,7 +389,7 @@ async def loadWebURLs(loadWebsite: LoadWebsite):
|
|
389 |
"source": source
|
390 |
}
|
391 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
392 |
-
dct = json.dumps(dct, indent=1).encode("utf-8")
|
393 |
fileName = createDataSourceName(sourceName=source)
|
394 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
395 |
response = (
|
@@ -472,7 +472,7 @@ async def loadYoutubeTranscript(ytTranscript: YtTranscript):
|
|
472 |
"source": "www.youtube.com"
|
473 |
}
|
474 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
475 |
-
dct = json.dumps(dct, indent=1).encode("utf-8")
|
476 |
fileName = createDataSourceName(sourceName="youtube")
|
477 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
478 |
response = (
|
@@ -534,7 +534,7 @@ async def listChatbotSources(vectorstore: str):
|
|
534 |
async def getDataSource(vectorstore: str, sourceUrl: str):
|
535 |
trackUsage(vectorstore=vectorstore, endpoint="/getDataSource")
|
536 |
r = requests.get(sourceUrl)
|
537 |
-
return encodeToBase64(eval(r.content.decode("utf-8")))
|
538 |
|
539 |
|
540 |
@app.post("/deleteChatbotSource")
|
@@ -624,7 +624,7 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
|
|
624 |
for source, fileType in zip(UrlSources, fileTypes):
|
625 |
if ((fileType == "/loadPDF") | (fileType == "/loadImagePDF")):
|
626 |
r = requests.get(source)
|
627 |
-
file = eval(r.content.decode("utf-8"))
|
628 |
content = file["output"]
|
629 |
fileSource = file["source"]
|
630 |
texts.append(".".join(
|
@@ -633,14 +633,14 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
|
|
633 |
sources.append(fileSource)
|
634 |
elif fileType == "/loadText":
|
635 |
r = requests.get(source)
|
636 |
-
file = eval(r.content.decode("utf-8"))
|
637 |
content = file["output"]
|
638 |
fileSource = file["source"]
|
639 |
texts.append(content.replace("\n", " "))
|
640 |
sources.append(fileSource)
|
641 |
elif ((fileType == "/loadWebURLs") | (fileType == "/loadYoutubeTranscript")):
|
642 |
r = requests.get(source)
|
643 |
-
file = eval(r.content.decode("utf-8"))
|
644 |
content = file["output"]
|
645 |
fileSource = file["source"]
|
646 |
texts.append(".".join(
|
|
|
294 |
"output": text,
|
295 |
"source": source
|
296 |
}
|
297 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
|
298 |
fileName = createDataSourceName(sourceName=source)
|
299 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
300 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
|
329 |
"source": "Text"
|
330 |
}
|
331 |
numTokens = len(text.translate(str.maketrans('', '', string.punctuation)).split(" "))
|
332 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
|
333 |
fileName = createDataSourceName(sourceName="Text")
|
334 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
335 |
response = (
|
|
|
389 |
"source": source
|
390 |
}
|
391 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
392 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
|
393 |
fileName = createDataSourceName(sourceName=source)
|
394 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
395 |
response = (
|
|
|
472 |
"source": "www.youtube.com"
|
473 |
}
|
474 |
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
475 |
+
dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
|
476 |
fileName = createDataSourceName(sourceName="youtube")
|
477 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
478 |
response = (
|
|
|
534 |
async def getDataSource(vectorstore: str, sourceUrl: str):
|
535 |
trackUsage(vectorstore=vectorstore, endpoint="/getDataSource")
|
536 |
r = requests.get(sourceUrl)
|
537 |
+
return encodeToBase64(eval(r.content.decode("utf-8", errors = "replace")))
|
538 |
|
539 |
|
540 |
@app.post("/deleteChatbotSource")
|
|
|
624 |
for source, fileType in zip(UrlSources, fileTypes):
|
625 |
if ((fileType == "/loadPDF") | (fileType == "/loadImagePDF")):
|
626 |
r = requests.get(source)
|
627 |
+
file = eval(r.content.decode("utf-8", errors = "replace"))
|
628 |
content = file["output"]
|
629 |
fileSource = file["source"]
|
630 |
texts.append(".".join(
|
|
|
633 |
sources.append(fileSource)
|
634 |
elif fileType == "/loadText":
|
635 |
r = requests.get(source)
|
636 |
+
file = eval(r.content.decode("utf-8", errors = "replace"))
|
637 |
content = file["output"]
|
638 |
fileSource = file["source"]
|
639 |
texts.append(content.replace("\n", " "))
|
640 |
sources.append(fileSource)
|
641 |
elif ((fileType == "/loadWebURLs") | (fileType == "/loadYoutubeTranscript")):
|
642 |
r = requests.get(source)
|
643 |
+
file = eval(r.content.decode("utf-8", errors = "replace"))
|
644 |
content = file["output"]
|
645 |
fileSource = file["source"]
|
646 |
texts.append(".".join(
|
functions.py
CHANGED
@@ -348,7 +348,7 @@ def analyzeData(query, dataframe):
|
|
348 |
response = df.chat(query)
|
349 |
if os.path.isfile(response):
|
350 |
with open(response, "rb") as file:
|
351 |
-
b64string = base64.b64encode(file.read()).decode("utf-8")
|
352 |
return f"data:image/png;base64,{b64string}"
|
353 |
else:
|
354 |
return response
|
@@ -384,7 +384,7 @@ def extractTextFromUrlList(urls):
|
|
384 |
def encodeToBase64(dct: dict):
|
385 |
for key in dct:
|
386 |
if type(dct[key]) == str:
|
387 |
-
dct[key] = base64.b64encode(dct[key].encode("utf-8")).decode("utf-8", errors = "replace")
|
388 |
elif type(dct[key]) == dict:
|
389 |
dct[key] = encodeToBase64(dct[key])
|
390 |
return dct
|
@@ -392,10 +392,10 @@ def encodeToBase64(dct: dict):
|
|
392 |
|
393 |
def decodeBase64(dct: dict):
|
394 |
if type(dct["output"]) == str:
|
395 |
-
dct["output"] = base64.b64decode(dct["output"].encode("utf-8")).decode("utf-8", errors = "replace")
|
396 |
else:
|
397 |
for key in dct["output"]:
|
398 |
-
dct["output"][key] = base64.b64decode(dct["output"][key].encode("utf-8")).decode("utf-8", errors = "replace")
|
399 |
return dct
|
400 |
|
401 |
|
|
|
348 |
response = df.chat(query)
|
349 |
if os.path.isfile(response):
|
350 |
with open(response, "rb") as file:
|
351 |
+
b64string = base64.b64encode(file.read()).decode("utf-8", errors = "replace")
|
352 |
return f"data:image/png;base64,{b64string}"
|
353 |
else:
|
354 |
return response
|
|
|
384 |
def encodeToBase64(dct: dict):
|
385 |
for key in dct:
|
386 |
if type(dct[key]) == str:
|
387 |
+
dct[key] = base64.b64encode(dct[key].encode("utf-8", errors = "replace")).decode("utf-8", errors = "replace")
|
388 |
elif type(dct[key]) == dict:
|
389 |
dct[key] = encodeToBase64(dct[key])
|
390 |
return dct
|
|
|
392 |
|
393 |
def decodeBase64(dct: dict):
|
394 |
if type(dct["output"]) == str:
|
395 |
+
dct["output"] = base64.b64decode(dct["output"].encode("utf-8", errors = "replace")).decode("utf-8", errors = "replace")
|
396 |
else:
|
397 |
for key in dct["output"]:
|
398 |
+
dct["output"][key] = base64.b64decode(dct["output"][key].encode("utf-8", errors = "replace")).decode("utf-8", errors = "replace")
|
399 |
return dct
|
400 |
|
401 |
|