Spaces:
Sleeping
Sleeping
UPDATE: trainChatbot
Browse files- app.py +16 -2
- functions.py +12 -5
app.py
CHANGED
@@ -320,7 +320,7 @@ async def loadText(addTextConfig: AddText):
|
|
320 |
vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
|
321 |
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
322 |
dct = {
|
323 |
-
"output": text,
|
324 |
"source": "Text"
|
325 |
}
|
326 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
@@ -544,13 +544,27 @@ async def loadEditedJson(loadEditedJsonConfig: LoadEditedJson):
|
|
544 |
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
|
545 |
.execute()
|
546 |
)
|
547 |
-
|
548 |
return {
|
549 |
"output": "SUCCESS"
|
550 |
}
|
551 |
|
552 |
|
553 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
554 |
|
555 |
class TrainChatbot(BaseModel):
|
556 |
vectorstore: str
|
|
|
320 |
vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
|
321 |
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
322 |
dct = {
|
323 |
+
"output": cleanText(text = text),
|
324 |
"source": "Text"
|
325 |
}
|
326 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
|
|
544 |
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
|
545 |
.execute()
|
546 |
)
|
|
|
547 |
return {
|
548 |
"output": "SUCCESS"
|
549 |
}
|
550 |
|
551 |
|
552 |
|
553 |
+
@app.post("/publicOrPrivate")
|
554 |
+
async def publicOrPrivate(vectorstore: str, mode: str = "public"):
|
555 |
+
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
556 |
+
response = (
|
557 |
+
supabase.table("ConversAI_ChatbotInfo")
|
558 |
+
.update({"public/private": mode})
|
559 |
+
.eq("user_id", username)
|
560 |
+
.eq("chatbotname", chatbotName)
|
561 |
+
.execute()
|
562 |
+
)
|
563 |
+
return {
|
564 |
+
"output": "SUCCESS"
|
565 |
+
}
|
566 |
+
|
567 |
+
|
568 |
|
569 |
class TrainChatbot(BaseModel):
|
570 |
vectorstore: str
|
functions.py
CHANGED
@@ -7,6 +7,7 @@ from langchain_qdrant import QdrantVectorStore
|
|
7 |
from langchain_qdrant import RetrievalMode
|
8 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
9 |
from uuid import uuid4
|
|
|
10 |
from langchain_core.output_parsers import StrOutputParser
|
11 |
from langchain.retrievers import ParentDocumentRetriever
|
12 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
@@ -120,6 +121,10 @@ def createTable(tablename: str):
|
|
120 |
"output": "SUCCESS"
|
121 |
}
|
122 |
|
|
|
|
|
|
|
|
|
123 |
|
124 |
def addDocuments(texts: list[tuple[str]], vectorstore: str):
|
125 |
global vectorEmbeddings
|
@@ -288,7 +293,8 @@ def getLinks(url: str, timeout=30):
|
|
288 |
def getTextFromImagePDF(pdfBytes):
|
289 |
def getText(image):
|
290 |
global reader
|
291 |
-
|
|
|
292 |
|
293 |
allImages = convert_from_bytes(pdfBytes)
|
294 |
texts = [base64.b64encode(getText(image).encode("utf-8")).decode("utf-8") for image in allImages]
|
@@ -303,7 +309,7 @@ def getTranscript(urls: str):
|
|
303 |
url, add_video_info=False
|
304 |
)
|
305 |
doc = " ".join([x.page_content for x in loader.load()])
|
306 |
-
texts.append(doc)
|
307 |
except:
|
308 |
doc = ""
|
309 |
texts.append(doc)
|
@@ -325,7 +331,7 @@ def analyzeData(query, dataframe):
|
|
325 |
|
326 |
|
327 |
def extractTextFromPage(page):
|
328 |
-
text = page.get_text()
|
329 |
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
330 |
|
331 |
|
@@ -343,7 +349,7 @@ def extractTextFromUrl(url):
|
|
343 |
response.raise_for_status()
|
344 |
html = response.text
|
345 |
soup = BeautifulSoup(html, 'lxml')
|
346 |
-
text = soup.get_text(separator=' ', strip=True)
|
347 |
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
348 |
|
349 |
|
@@ -361,4 +367,5 @@ def createDataSourceName(sourceName):
|
|
361 |
i = 1
|
362 |
while True:
|
363 |
sourceName = sourceName + "-" + str(i)
|
364 |
-
return createDataSourceName(sourceName)
|
|
|
|
7 |
from langchain_qdrant import RetrievalMode
|
8 |
from langchain_core.prompts.chat import ChatPromptTemplate
|
9 |
from uuid import uuid4
|
10 |
+
import nltk
|
11 |
from langchain_core.output_parsers import StrOutputParser
|
12 |
from langchain.retrievers import ParentDocumentRetriever
|
13 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
|
|
121 |
"output": "SUCCESS"
|
122 |
}
|
123 |
|
124 |
+
def cleanText(text: str):
|
125 |
+
text = text.replace("\n", " ")
|
126 |
+
text = text.translate(str.maketrans('', '', string.punctuation.replace(".", "")))
|
127 |
+
return text
|
128 |
|
129 |
def addDocuments(texts: list[tuple[str]], vectorstore: str):
|
130 |
global vectorEmbeddings
|
|
|
293 |
def getTextFromImagePDF(pdfBytes):
|
294 |
def getText(image):
|
295 |
global reader
|
296 |
+
text = "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
297 |
+
return cleanText(text = text)
|
298 |
|
299 |
allImages = convert_from_bytes(pdfBytes)
|
300 |
texts = [base64.b64encode(getText(image).encode("utf-8")).decode("utf-8") for image in allImages]
|
|
|
309 |
url, add_video_info=False
|
310 |
)
|
311 |
doc = " ".join([x.page_content for x in loader.load()])
|
312 |
+
texts.append(cleanText(text = doc))
|
313 |
except:
|
314 |
doc = ""
|
315 |
texts.append(doc)
|
|
|
331 |
|
332 |
|
333 |
def extractTextFromPage(page):
|
334 |
+
text = cleanText(text = page.get_text())
|
335 |
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
336 |
|
337 |
|
|
|
349 |
response.raise_for_status()
|
350 |
html = response.text
|
351 |
soup = BeautifulSoup(html, 'lxml')
|
352 |
+
text = cleanText(text = soup.get_text(separator=' ', strip=True))
|
353 |
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
354 |
|
355 |
|
|
|
367 |
i = 1
|
368 |
while True:
|
369 |
sourceName = sourceName + "-" + str(i)
|
370 |
+
return createDataSourceName(sourceName)
|
371 |
+
|