Rauhan commited on
Commit
ed7063b
1 Parent(s): 4f74893

DEBUG: WebURLs

Browse files
Files changed (2) hide show
  1. app.py +8 -8
  2. functions.py +4 -4
app.py CHANGED
@@ -294,7 +294,7 @@ async def loadImagePDF(vectorstore: str, pdf: UploadFile = File(...)):
294
  "output": text,
295
  "source": source
296
  }
297
- dct = json.dumps(dct, indent=1).encode("utf-8")
298
  fileName = createDataSourceName(sourceName=source)
299
  numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
300
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
@@ -329,7 +329,7 @@ async def loadText(addTextConfig: AddText):
329
  "source": "Text"
330
  }
331
  numTokens = len(text.translate(str.maketrans('', '', string.punctuation)).split(" "))
332
- dct = json.dumps(dct, indent=1).encode("utf-8")
333
  fileName = createDataSourceName(sourceName="Text")
334
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
335
  response = (
@@ -389,7 +389,7 @@ async def loadWebURLs(loadWebsite: LoadWebsite):
389
  "source": source
390
  }
391
  numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
392
- dct = json.dumps(dct, indent=1).encode("utf-8")
393
  fileName = createDataSourceName(sourceName=source)
394
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
395
  response = (
@@ -472,7 +472,7 @@ async def loadYoutubeTranscript(ytTranscript: YtTranscript):
472
  "source": "www.youtube.com"
473
  }
474
  numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
475
- dct = json.dumps(dct, indent=1).encode("utf-8")
476
  fileName = createDataSourceName(sourceName="youtube")
477
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
478
  response = (
@@ -534,7 +534,7 @@ async def listChatbotSources(vectorstore: str):
534
  async def getDataSource(vectorstore: str, sourceUrl: str):
535
  trackUsage(vectorstore=vectorstore, endpoint="/getDataSource")
536
  r = requests.get(sourceUrl)
537
- return encodeToBase64(eval(r.content.decode("utf-8")))
538
 
539
 
540
  @app.post("/deleteChatbotSource")
@@ -624,7 +624,7 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
624
  for source, fileType in zip(UrlSources, fileTypes):
625
  if ((fileType == "/loadPDF") | (fileType == "/loadImagePDF")):
626
  r = requests.get(source)
627
- file = eval(r.content.decode("utf-8"))
628
  content = file["output"]
629
  fileSource = file["source"]
630
  texts.append(".".join(
@@ -633,14 +633,14 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
633
  sources.append(fileSource)
634
  elif fileType == "/loadText":
635
  r = requests.get(source)
636
- file = eval(r.content.decode("utf-8"))
637
  content = file["output"]
638
  fileSource = file["source"]
639
  texts.append(content.replace("\n", " "))
640
  sources.append(fileSource)
641
  elif ((fileType == "/loadWebURLs") | (fileType == "/loadYoutubeTranscript")):
642
  r = requests.get(source)
643
- file = eval(r.content.decode("utf-8"))
644
  content = file["output"]
645
  fileSource = file["source"]
646
  texts.append(".".join(
 
294
  "output": text,
295
  "source": source
296
  }
297
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
298
  fileName = createDataSourceName(sourceName=source)
299
  numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
300
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
 
329
  "source": "Text"
330
  }
331
  numTokens = len(text.translate(str.maketrans('', '', string.punctuation)).split(" "))
332
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
333
  fileName = createDataSourceName(sourceName="Text")
334
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
335
  response = (
 
389
  "source": source
390
  }
391
  numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
392
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
393
  fileName = createDataSourceName(sourceName=source)
394
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
395
  response = (
 
472
  "source": "www.youtube.com"
473
  }
474
  numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
475
+ dct = json.dumps(dct, indent=1).encode("utf-8", errors = "replace")
476
  fileName = createDataSourceName(sourceName="youtube")
477
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
478
  response = (
 
534
  async def getDataSource(vectorstore: str, sourceUrl: str):
535
  trackUsage(vectorstore=vectorstore, endpoint="/getDataSource")
536
  r = requests.get(sourceUrl)
537
+ return encodeToBase64(eval(r.content.decode("utf-8", errors = "replace")))
538
 
539
 
540
  @app.post("/deleteChatbotSource")
 
624
  for source, fileType in zip(UrlSources, fileTypes):
625
  if ((fileType == "/loadPDF") | (fileType == "/loadImagePDF")):
626
  r = requests.get(source)
627
+ file = eval(r.content.decode("utf-8", errors = "replace"))
628
  content = file["output"]
629
  fileSource = file["source"]
630
  texts.append(".".join(
 
633
  sources.append(fileSource)
634
  elif fileType == "/loadText":
635
  r = requests.get(source)
636
+ file = eval(r.content.decode("utf-8", errors = "replace"))
637
  content = file["output"]
638
  fileSource = file["source"]
639
  texts.append(content.replace("\n", " "))
640
  sources.append(fileSource)
641
  elif ((fileType == "/loadWebURLs") | (fileType == "/loadYoutubeTranscript")):
642
  r = requests.get(source)
643
+ file = eval(r.content.decode("utf-8", errors = "replace"))
644
  content = file["output"]
645
  fileSource = file["source"]
646
  texts.append(".".join(
functions.py CHANGED
@@ -348,7 +348,7 @@ def analyzeData(query, dataframe):
348
  response = df.chat(query)
349
  if os.path.isfile(response):
350
  with open(response, "rb") as file:
351
- b64string = base64.b64encode(file.read()).decode("utf-8")
352
  return f"data:image/png;base64,{b64string}"
353
  else:
354
  return response
@@ -384,7 +384,7 @@ def extractTextFromUrlList(urls):
384
  def encodeToBase64(dct: dict):
385
  for key in dct:
386
  if type(dct[key]) == str:
387
- dct[key] = base64.b64encode(dct[key].encode("utf-8")).decode("utf-8", errors = "replace")
388
  elif type(dct[key]) == dict:
389
  dct[key] = encodeToBase64(dct[key])
390
  return dct
@@ -392,10 +392,10 @@ def encodeToBase64(dct: dict):
392
 
393
  def decodeBase64(dct: dict):
394
  if type(dct["output"]) == str:
395
- dct["output"] = base64.b64decode(dct["output"].encode("utf-8")).decode("utf-8", errors = "replace")
396
  else:
397
  for key in dct["output"]:
398
- dct["output"][key] = base64.b64decode(dct["output"][key].encode("utf-8")).decode("utf-8", errors = "replace")
399
  return dct
400
 
401
 
 
348
  response = df.chat(query)
349
  if os.path.isfile(response):
350
  with open(response, "rb") as file:
351
+ b64string = base64.b64encode(file.read()).decode("utf-8", errors = "replace")
352
  return f"data:image/png;base64,{b64string}"
353
  else:
354
  return response
 
384
  def encodeToBase64(dct: dict):
385
  for key in dct:
386
  if type(dct[key]) == str:
387
+ dct[key] = base64.b64encode(dct[key].encode("utf-8", errors = "replace")).decode("utf-8", errors = "replace")
388
  elif type(dct[key]) == dict:
389
  dct[key] = encodeToBase64(dct[key])
390
  return dct
 
392
 
393
  def decodeBase64(dct: dict):
394
  if type(dct["output"]) == str:
395
+ dct["output"] = base64.b64decode(dct["output"].encode("utf-8", errors = "replace")).decode("utf-8", errors = "replace")
396
  else:
397
  for key in dct["output"]:
398
+ dct["output"][key] = base64.b64decode(dct["output"][key].encode("utf-8", errors = "replace")).decode("utf-8", errors = "replace")
399
  return dct
400
 
401