Spaces:
Sleeping
Sleeping
UPDATE: base64 encodings
Browse files- app.py +10 -1
- functions.py +6 -3
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import io
|
2 |
import tempfile
|
3 |
import jwt
|
|
|
4 |
from click import option
|
5 |
from jwt import ExpiredSignatureError, InvalidTokenError
|
6 |
from starlette import status
|
@@ -273,8 +274,16 @@ async def returnText(pdf: UploadFile = File(...)):
|
|
273 |
}
|
274 |
|
275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
@app.post("/addText")
|
277 |
-
async def addText(
|
|
|
|
|
278 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
279 |
df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
280 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
|
|
1 |
import io
|
2 |
import tempfile
|
3 |
import jwt
|
4 |
+
import base64
|
5 |
from click import option
|
6 |
from jwt import ExpiredSignatureError, InvalidTokenError
|
7 |
from starlette import status
|
|
|
274 |
}
|
275 |
|
276 |
|
277 |
+
class AddText(BaseModel):
|
278 |
+
vectorstore: str
|
279 |
+
text: str
|
280 |
+
source: str = "Text"
|
281 |
+
|
282 |
+
|
283 |
@app.post("/addText")
|
284 |
+
async def addText(addTextConfig: AddText):
|
285 |
+
vectorstore, text, source = addTextConfig.vectorstore, addTextConfig.text, addTextConfig.source
|
286 |
+
text = base64.b64decode(text.encode("utf-8")).decode("utf-8")
|
287 |
username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
288 |
df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
289 |
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
functions.py
CHANGED
@@ -288,7 +288,7 @@ def getTextFromImagePDF(pdfBytes):
|
|
288 |
return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
289 |
|
290 |
allImages = convert_from_bytes(pdfBytes)
|
291 |
-
texts = [getText(image) for image in allImages]
|
292 |
return {x + 1: y for x, y in enumerate(texts)}
|
293 |
|
294 |
|
@@ -304,6 +304,7 @@ def getTranscript(urls: str):
|
|
304 |
except:
|
305 |
doc = ""
|
306 |
texts.append(doc)
|
|
|
307 |
return {x: y for x, y in zip(urls, texts)}
|
308 |
|
309 |
|
@@ -321,7 +322,8 @@ def analyzeData(query, dataframe):
|
|
321 |
|
322 |
|
323 |
def extractTextFromPage(page):
|
324 |
-
|
|
|
325 |
|
326 |
|
327 |
def extractTextFromPdf(pdf_path):
|
@@ -338,7 +340,8 @@ def extractTextFromUrl(url):
|
|
338 |
response.raise_for_status()
|
339 |
html = response.text
|
340 |
soup = BeautifulSoup(html, 'lxml')
|
341 |
-
|
|
|
342 |
|
343 |
|
344 |
def extractTextFromUrlList(urls):
|
|
|
288 |
return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
289 |
|
290 |
allImages = convert_from_bytes(pdfBytes)
|
291 |
+
texts = [base64.b64encode(getText(image).encode("utf-8")).decode("utf-8") for image in allImages]
|
292 |
return {x + 1: y for x, y in enumerate(texts)}
|
293 |
|
294 |
|
|
|
304 |
except:
|
305 |
doc = ""
|
306 |
texts.append(doc)
|
307 |
+
texts = [base64.b64encode(text.encode("utf-8")).decode("utf-8") for text in texts]
|
308 |
return {x: y for x, y in zip(urls, texts)}
|
309 |
|
310 |
|
|
|
322 |
|
323 |
|
324 |
def extractTextFromPage(page):
|
325 |
+
text = page.get_text()
|
326 |
+
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
327 |
|
328 |
|
329 |
def extractTextFromPdf(pdf_path):
|
|
|
340 |
response.raise_for_status()
|
341 |
html = response.text
|
342 |
soup = BeautifulSoup(html, 'lxml')
|
343 |
+
text = soup.get_text(separator=' ', strip=True)
|
344 |
+
return base64.b64encode(text.encode("utf-8")).decode("utf-8")
|
345 |
|
346 |
|
347 |
def extractTextFromUrlList(urls):
|