Spaces:
Sleeping
Sleeping
Commit
·
3fa5f95
1
Parent(s):
ba4a6fd
username --- changed to -- user_id
Browse files- app.py +32 -22
- functions.py +63 -55
app.py
CHANGED
@@ -20,15 +20,16 @@ app.add_middleware(
|
|
20 |
allow_headers=["*"],
|
21 |
)
|
22 |
|
23 |
-
app.include_router(speech_translator_router, prefix="/speech")
|
24 |
|
25 |
|
26 |
@app.post("/signup")
|
27 |
-
async def sign_up(email, password):
|
28 |
try:
|
29 |
res, _ = supabase.auth.sign_up(
|
30 |
{"email": email, "password": password, "role": "user"}
|
31 |
)
|
|
|
32 |
response = {
|
33 |
"status": "success",
|
34 |
"code": 200,
|
@@ -56,6 +57,8 @@ async def sign_in(email, password):
|
|
56 |
user_id = res.user.id
|
57 |
access_token = res.session.access_token
|
58 |
refresh_token = res.session.refresh_token
|
|
|
|
|
59 |
store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
|
60 |
try:
|
61 |
store_id = store_session_check[1][0]["StoreID"]
|
@@ -113,10 +116,17 @@ async def set_session_data(access_token, refresh_token):
|
|
113 |
|
114 |
|
115 |
@app.post("/logout")
|
116 |
-
async def sign_out():
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
-
|
|
|
|
|
120 |
|
121 |
|
122 |
@app.post("/oauth")
|
@@ -129,13 +139,13 @@ async def oauth(provider):
|
|
129 |
@app.post("/newChatbot")
|
130 |
async def newChatbot(chatbotName: str, username: str):
|
131 |
currentBotCount = len(listTables(username=username)["output"])
|
132 |
-
limit = client.table("ConversAI_UserConfig").select("chatbotLimit").eq("
|
133 |
"chatbotLimit"]
|
134 |
if currentBotCount >= int(limit):
|
135 |
return {
|
136 |
"output": "CHATBOT LIMIT EXCEEDED"
|
137 |
}
|
138 |
-
client.table("ConversAI_ChatbotInfo").insert({"
|
139 |
chatbotName = f"convai-{username}-{chatbotName}"
|
140 |
return createTable(tablename=chatbotName)
|
141 |
|
@@ -149,12 +159,12 @@ async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
|
|
149 |
text += page.extract_text()
|
150 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
151 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
152 |
-
currentCount = df[(df["
|
153 |
-
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("
|
154 |
"tokenLimit"]
|
155 |
newCount = currentCount + len(text)
|
156 |
if newCount < int(limit):
|
157 |
-
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("
|
158 |
"chatbotname", chatbotname).execute()
|
159 |
return addDocuments(text=text, vectorstore=vectorstore)
|
160 |
else:
|
@@ -174,12 +184,12 @@ async def returnText(pdf: UploadFile = File(...)):
|
|
174 |
async def addText(vectorstore: str, text: str):
|
175 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
176 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
177 |
-
currentCount = df[(df["
|
178 |
newCount = currentCount + len(text)
|
179 |
-
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("
|
180 |
"tokenLimit"]
|
181 |
if newCount < int(limit):
|
182 |
-
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("
|
183 |
"chatbotname", chatbotname).execute()
|
184 |
return addDocuments(text=text, vectorstore=vectorstore)
|
185 |
else:
|
@@ -198,13 +208,13 @@ class AddQAPair(BaseModel):
|
|
198 |
async def addText(addQaPair: AddQAPair):
|
199 |
username, chatbotname = addQaPair.vectorstore.split("-")[1], addQaPair.vectorstore.split("-")[2]
|
200 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
201 |
-
currentCount = df[(df["
|
202 |
qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}"
|
203 |
newCount = currentCount + len(qa)
|
204 |
-
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("
|
205 |
"tokenLimit"]
|
206 |
if newCount < int(limit):
|
207 |
-
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("
|
208 |
"chatbotname", chatbotname).execute()
|
209 |
return addDocuments(text=qa, vectorstore=addQaPair.vectorstore)
|
210 |
else:
|
@@ -222,12 +232,12 @@ async def addWebsite(vectorstore: str, websiteUrls: list[str]):
|
|
222 |
[f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
|
223 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
224 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
225 |
-
currentCount = df[(df["
|
226 |
newCount = currentCount + len(text)
|
227 |
-
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("
|
228 |
"tokenLimit"]
|
229 |
if newCount < int(limit):
|
230 |
-
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("
|
231 |
"chatbotname", chatbotname).execute()
|
232 |
return addDocuments(text=text, vectorstore=vectorstore)
|
233 |
else:
|
@@ -244,7 +254,7 @@ async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-7
|
|
244 |
@app.post("/deleteChatbot")
|
245 |
async def delete(chatbotName: str):
|
246 |
username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
|
247 |
-
client.table('ConversAI_ChatbotInfo').delete().eq('
|
248 |
return deleteTable(tableName=chatbotName)
|
249 |
|
250 |
|
@@ -265,7 +275,7 @@ async def getCount(vectorstore: str):
|
|
265 |
username, chatbotName = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
266 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
267 |
return {
|
268 |
-
"currentCount": df[(df['
|
269 |
}
|
270 |
|
271 |
|
@@ -294,4 +304,4 @@ async def analyzeAndAnswer(query: str, file: UploadFile = File(...)):
|
|
294 |
except:
|
295 |
return {
|
296 |
"output": "UNABLE TO ANSWER QUERY"
|
297 |
-
}
|
|
|
20 |
allow_headers=["*"],
|
21 |
)
|
22 |
|
23 |
+
# app.include_router(speech_translator_router, prefix="/speech")
|
24 |
|
25 |
|
26 |
@app.post("/signup")
|
27 |
+
async def sign_up(email, username, password):
|
28 |
try:
|
29 |
res, _ = supabase.auth.sign_up(
|
30 |
{"email": email, "password": password, "role": "user"}
|
31 |
)
|
32 |
+
createUser(username=username)
|
33 |
response = {
|
34 |
"status": "success",
|
35 |
"code": 200,
|
|
|
57 |
user_id = res.user.id
|
58 |
access_token = res.session.access_token
|
59 |
refresh_token = res.session.refresh_token
|
60 |
+
createUser(username=user_id)
|
61 |
+
|
62 |
store_session_check = supabase.table("Stores").select("*").filter("StoreID", "eq", user_id).execute()
|
63 |
try:
|
64 |
store_id = store_session_check[1][0]["StoreID"]
|
|
|
116 |
|
117 |
|
118 |
@app.post("/logout")
|
119 |
+
async def sign_out(store_id):
|
120 |
+
try:
|
121 |
+
supabase.table("Stores").delete().eq(
|
122 |
+
"StoreID", store_id
|
123 |
+
).execute()
|
124 |
+
res = supabase.auth.sign_out()
|
125 |
+
response = {"message": "success"}
|
126 |
|
127 |
+
return response
|
128 |
+
except Exception as e:
|
129 |
+
raise HTTPException(status_code=400, detail=str(e))
|
130 |
|
131 |
|
132 |
@app.post("/oauth")
|
|
|
139 |
@app.post("/newChatbot")
|
140 |
async def newChatbot(chatbotName: str, username: str):
|
141 |
currentBotCount = len(listTables(username=username)["output"])
|
142 |
+
limit = client.table("ConversAI_UserConfig").select("chatbotLimit").eq("user_id", username).execute().data[0][
|
143 |
"chatbotLimit"]
|
144 |
if currentBotCount >= int(limit):
|
145 |
return {
|
146 |
"output": "CHATBOT LIMIT EXCEEDED"
|
147 |
}
|
148 |
+
client.table("ConversAI_ChatbotInfo").insert({"user_id": username, "chatbotname": chatbotName}).execute()
|
149 |
chatbotName = f"convai-{username}-{chatbotName}"
|
150 |
return createTable(tablename=chatbotName)
|
151 |
|
|
|
159 |
text += page.extract_text()
|
160 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
161 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
162 |
+
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
163 |
+
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
|
164 |
"tokenLimit"]
|
165 |
newCount = currentCount + len(text)
|
166 |
if newCount < int(limit):
|
167 |
+
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
168 |
"chatbotname", chatbotname).execute()
|
169 |
return addDocuments(text=text, vectorstore=vectorstore)
|
170 |
else:
|
|
|
184 |
async def addText(vectorstore: str, text: str):
|
185 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
186 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
187 |
+
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
188 |
newCount = currentCount + len(text)
|
189 |
+
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
|
190 |
"tokenLimit"]
|
191 |
if newCount < int(limit):
|
192 |
+
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
193 |
"chatbotname", chatbotname).execute()
|
194 |
return addDocuments(text=text, vectorstore=vectorstore)
|
195 |
else:
|
|
|
208 |
async def addText(addQaPair: AddQAPair):
|
209 |
username, chatbotname = addQaPair.vectorstore.split("-")[1], addQaPair.vectorstore.split("-")[2]
|
210 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
211 |
+
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
212 |
qa = f"QUESTION: {addQaPair.question}\tANSWER: {addQaPair.answer}"
|
213 |
newCount = currentCount + len(qa)
|
214 |
+
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
|
215 |
"tokenLimit"]
|
216 |
if newCount < int(limit):
|
217 |
+
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
218 |
"chatbotname", chatbotname).execute()
|
219 |
return addDocuments(text=qa, vectorstore=addQaPair.vectorstore)
|
220 |
else:
|
|
|
232 |
[f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
|
233 |
username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
234 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
235 |
+
currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
|
236 |
newCount = currentCount + len(text)
|
237 |
+
limit = client.table("ConversAI_UserConfig").select("tokenLimit").eq("user_id", username).execute().data[0][
|
238 |
"tokenLimit"]
|
239 |
if newCount < int(limit):
|
240 |
+
client.table("ConversAI_ChatbotInfo").update({"charactercount": str(newCount)}).eq("user_id", username).eq(
|
241 |
"chatbotname", chatbotname).execute()
|
242 |
return addDocuments(text=text, vectorstore=vectorstore)
|
243 |
else:
|
|
|
254 |
@app.post("/deleteChatbot")
|
255 |
async def delete(chatbotName: str):
|
256 |
username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
|
257 |
+
client.table('ConversAI_ChatbotInfo').delete().eq('user_id', username).eq('chatbotname', chatbotName).execute()
|
258 |
return deleteTable(tableName=chatbotName)
|
259 |
|
260 |
|
|
|
275 |
username, chatbotName = vectorstore.split("-")[1], vectorstore.split("-")[2]
|
276 |
df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
|
277 |
return {
|
278 |
+
"currentCount": df[(df['user_id'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'].iloc[0]
|
279 |
}
|
280 |
|
281 |
|
|
|
304 |
except:
|
305 |
return {
|
306 |
"output": "UNABLE TO ANSWER QUERY"
|
307 |
+
}
|
functions.py
CHANGED
@@ -32,19 +32,18 @@ import base64
|
|
32 |
import time
|
33 |
import requests
|
34 |
|
35 |
-
|
36 |
load_dotenv("secrets.env")
|
37 |
client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
|
38 |
qdrantClient = QdrantClient(url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"])
|
39 |
model_kwargs = {"device": "cuda"}
|
40 |
encode_kwargs = {"normalize_embeddings": True}
|
41 |
vectorEmbeddings = HuggingFaceEmbeddings(
|
42 |
-
model_name
|
43 |
-
model_kwargs
|
44 |
-
encode_kwargs
|
45 |
)
|
46 |
-
reader = easyocr.Reader(['en'], gpu
|
47 |
-
sparseEmbeddings = FastEmbedSparse(model
|
48 |
prompt = """
|
49 |
INSTRUCTIONS:
|
50 |
=====================================
|
@@ -81,46 +80,48 @@ store = InMemoryStore()
|
|
81 |
chatHistoryStore = dict()
|
82 |
|
83 |
|
84 |
-
def createUser(username: str
|
85 |
try:
|
86 |
userData = client.table("ConversAI_UserInfo").select("*").execute().data
|
87 |
-
if username not in [userData[x]["
|
88 |
-
client.table("ConversAI_UserInfo").insert({"
|
89 |
-
client.table("ConversAI_UserConfig").insert({"
|
90 |
return {
|
91 |
"output": "SUCCESS"
|
92 |
}
|
93 |
-
else:
|
94 |
return {
|
95 |
"output": "USER ALREADY EXISTS"
|
96 |
}
|
97 |
except Exception as e:
|
98 |
return {
|
99 |
"error": e
|
100 |
-
}
|
101 |
|
102 |
|
103 |
-
def matchPassword(username: str, password: str) -> str:
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
116 |
|
117 |
|
118 |
def createTable(tablename: str):
|
119 |
global vectorEmbeddings
|
120 |
global sparseEmbeddings
|
121 |
qdrant = QdrantVectorStore.from_documents(
|
122 |
-
documents
|
123 |
-
embedding
|
124 |
sparse_embedding=sparseEmbeddings,
|
125 |
url=os.environ["QDRANT_URL"],
|
126 |
prefer_grpc=True,
|
@@ -132,21 +133,22 @@ def createTable(tablename: str):
|
|
132 |
"output": "SUCCESS"
|
133 |
}
|
134 |
|
|
|
135 |
def addDocuments(text: str, vectorstore: str):
|
136 |
global vectorEmbeddings
|
137 |
global sparseEmbeddings
|
138 |
global store
|
139 |
parentSplitter = RecursiveCharacterTextSplitter(
|
140 |
-
chunk_size
|
141 |
-
add_start_index
|
142 |
)
|
143 |
childSplitter = RecursiveCharacterTextSplitter(
|
144 |
-
chunk_size
|
145 |
-
add_start_index
|
146 |
)
|
147 |
-
texts = [Document(page_content
|
148 |
vectorstore = QdrantVectorStore.from_existing_collection(
|
149 |
-
embedding
|
150 |
sparse_embedding=sparseEmbeddings,
|
151 |
collection_name=vectorstore,
|
152 |
url=os.environ["QDRANT_URL"],
|
@@ -159,7 +161,7 @@ def addDocuments(text: str, vectorstore: str):
|
|
159 |
child_splitter=childSplitter,
|
160 |
parent_splitter=parentSplitter
|
161 |
)
|
162 |
-
retriever.add_documents(documents
|
163 |
return {
|
164 |
"output": "SUCCESS"
|
165 |
}
|
@@ -169,7 +171,8 @@ def format_docs(docs: str):
|
|
169 |
context = "\n\n".join(doc.page_content for doc in docs)
|
170 |
if context == "":
|
171 |
context = "No context found"
|
172 |
-
else:
|
|
|
173 |
return context
|
174 |
|
175 |
|
@@ -186,19 +189,19 @@ def trimMessages(chain_input):
|
|
186 |
pass
|
187 |
else:
|
188 |
chatHistoryStore[storeName].clear()
|
189 |
-
for message in messages[-1:
|
190 |
chatHistoryStore[storeName].add_message(message)
|
191 |
return True
|
192 |
|
193 |
|
194 |
def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192") -> str:
|
195 |
-
global prompt
|
196 |
global client
|
197 |
global vectorEmbeddings
|
198 |
global sparseEmbeddings
|
199 |
vectorStoreName = vectorstore
|
200 |
vectorstore = QdrantVectorStore.from_existing_collection(
|
201 |
-
embedding
|
202 |
sparse_embedding=sparseEmbeddings,
|
203 |
collection_name=vectorstore,
|
204 |
url=os.environ["QDRANT_URL"],
|
@@ -216,25 +219,25 @@ def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192")
|
|
216 |
base_compressor=compressor, base_retriever=retriever
|
217 |
)
|
218 |
baseChain = (
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
|
|
224 |
messageChain = RunnableWithMessageHistory(
|
225 |
baseChain,
|
226 |
get_session_history,
|
227 |
-
input_messages_key
|
228 |
-
history_messages_key
|
229 |
)
|
230 |
-
chain = RunnablePassthrough.assign(messages_trimmed
|
231 |
return {
|
232 |
"output": chain.invoke(
|
233 |
{"question": query},
|
234 |
{"configurable": {"session_id": vectorStoreName}}
|
235 |
)
|
236 |
}
|
237 |
-
|
238 |
|
239 |
|
240 |
def deleteTable(tableName: str):
|
@@ -249,21 +252,24 @@ def deleteTable(tableName: str):
|
|
249 |
"error": e
|
250 |
}
|
251 |
|
|
|
252 |
def listTables(username: str):
|
253 |
try:
|
254 |
global qdrantClient
|
255 |
qdrantCollections = qdrantClient.get_collections()
|
256 |
return {
|
257 |
-
"output": list(filter(lambda x: True if x.split("-")[1] == username else False,
|
|
|
258 |
}
|
259 |
except Exception as e:
|
260 |
return {
|
261 |
"error": e
|
262 |
}
|
263 |
-
|
264 |
|
265 |
-
|
|
|
266 |
start = time.time()
|
|
|
267 |
def getLinksFromPage(url: str) -> list:
|
268 |
response = requests.get(url)
|
269 |
soup = BeautifulSoup(response.content, "lxml")
|
@@ -281,6 +287,7 @@ def getLinks(url: str, timeout = 30):
|
|
281 |
else:
|
282 |
continue
|
283 |
return links
|
|
|
284 |
links = getLinksFromPage(url)
|
285 |
uniqueLinks = set()
|
286 |
for link in links:
|
@@ -292,22 +299,23 @@ def getLinks(url: str, timeout = 30):
|
|
292 |
return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
|
293 |
|
294 |
|
295 |
-
|
296 |
def getTextFromImagePDF(pdfBytes):
|
297 |
def getText(image):
|
298 |
global reader
|
299 |
return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
|
|
300 |
allImages = convert_from_bytes(pdfBytes)
|
301 |
texts = [getText(image) for image in allImages]
|
302 |
return "\n\n\n".join(texts)
|
303 |
|
|
|
304 |
def getTranscript(urls: str):
|
305 |
urls = urls.split(",")
|
306 |
texts = []
|
307 |
for url in urls:
|
308 |
try:
|
309 |
loader = YoutubeLoader.from_youtube_url(
|
310 |
-
url, add_video_info
|
311 |
)
|
312 |
doc = " ".join([x.page_content for x in loader.load()])
|
313 |
texts.append(doc)
|
@@ -318,12 +326,12 @@ def getTranscript(urls: str):
|
|
318 |
|
319 |
|
320 |
def analyzeData(query, dataframe):
|
321 |
-
llm = ChatGroq(name
|
322 |
-
df = SmartDataframe(dataframe, config
|
323 |
response = df.chat(query)
|
324 |
if os.path.isfile(response):
|
325 |
with open(response, "rb") as file:
|
326 |
b64string = base64.b64encode(file.read()).decode("utf-8")
|
327 |
return f"data:image/png;base64,{b64string}"
|
328 |
else:
|
329 |
-
return response
|
|
|
32 |
import time
|
33 |
import requests
|
34 |
|
|
|
35 |
load_dotenv("secrets.env")
|
36 |
client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
|
37 |
qdrantClient = QdrantClient(url=os.environ["QDRANT_URL"], api_key=os.environ["QDRANT_API_KEY"])
|
38 |
model_kwargs = {"device": "cuda"}
|
39 |
encode_kwargs = {"normalize_embeddings": True}
|
40 |
vectorEmbeddings = HuggingFaceEmbeddings(
|
41 |
+
model_name="BAAI/bge-m3",
|
42 |
+
model_kwargs=model_kwargs,
|
43 |
+
encode_kwargs=encode_kwargs
|
44 |
)
|
45 |
+
reader = easyocr.Reader(['en'], gpu=True, model_storage_directory="/app/EasyOCRModels")
|
46 |
+
sparseEmbeddings = FastEmbedSparse(model="Qdrant/BM25")
|
47 |
prompt = """
|
48 |
INSTRUCTIONS:
|
49 |
=====================================
|
|
|
80 |
chatHistoryStore = dict()
|
81 |
|
82 |
|
83 |
+
def createUser(username: str) -> dict:
|
84 |
try:
|
85 |
userData = client.table("ConversAI_UserInfo").select("*").execute().data
|
86 |
+
if username not in [userData[x]["user_id"] for x in range(len(userData))]:
|
87 |
+
client.table("ConversAI_UserInfo").insert({"user_id": username}).execute()
|
88 |
+
client.table("ConversAI_UserConfig").insert({"user_id": username}).execute()
|
89 |
return {
|
90 |
"output": "SUCCESS"
|
91 |
}
|
92 |
+
else:
|
93 |
return {
|
94 |
"output": "USER ALREADY EXISTS"
|
95 |
}
|
96 |
except Exception as e:
|
97 |
return {
|
98 |
"error": e
|
99 |
+
}
|
100 |
|
101 |
|
102 |
+
# def matchPassword(username: str, password: str) -> str:
|
103 |
+
# response = (
|
104 |
+
# client.table("ConversAI_UserInfo")
|
105 |
+
# .select("*")
|
106 |
+
# .eq("username", username)
|
107 |
+
# .execute()
|
108 |
+
# )
|
109 |
+
# try:
|
110 |
+
# return {
|
111 |
+
# "output": password == response.data[0]["password"]
|
112 |
+
# }
|
113 |
+
# except:
|
114 |
+
# return {
|
115 |
+
# "output": "USER DOESN'T EXIST"
|
116 |
+
# }
|
117 |
|
118 |
|
119 |
def createTable(tablename: str):
|
120 |
global vectorEmbeddings
|
121 |
global sparseEmbeddings
|
122 |
qdrant = QdrantVectorStore.from_documents(
|
123 |
+
documents=[],
|
124 |
+
embedding=vectorEmbeddings,
|
125 |
sparse_embedding=sparseEmbeddings,
|
126 |
url=os.environ["QDRANT_URL"],
|
127 |
prefer_grpc=True,
|
|
|
133 |
"output": "SUCCESS"
|
134 |
}
|
135 |
|
136 |
+
|
137 |
def addDocuments(text: str, vectorstore: str):
|
138 |
global vectorEmbeddings
|
139 |
global sparseEmbeddings
|
140 |
global store
|
141 |
parentSplitter = RecursiveCharacterTextSplitter(
|
142 |
+
chunk_size=2100,
|
143 |
+
add_start_index=True
|
144 |
)
|
145 |
childSplitter = RecursiveCharacterTextSplitter(
|
146 |
+
chunk_size=300,
|
147 |
+
add_start_index=True
|
148 |
)
|
149 |
+
texts = [Document(page_content=text)]
|
150 |
vectorstore = QdrantVectorStore.from_existing_collection(
|
151 |
+
embedding=vectorEmbeddings,
|
152 |
sparse_embedding=sparseEmbeddings,
|
153 |
collection_name=vectorstore,
|
154 |
url=os.environ["QDRANT_URL"],
|
|
|
161 |
child_splitter=childSplitter,
|
162 |
parent_splitter=parentSplitter
|
163 |
)
|
164 |
+
retriever.add_documents(documents=texts)
|
165 |
return {
|
166 |
"output": "SUCCESS"
|
167 |
}
|
|
|
171 |
context = "\n\n".join(doc.page_content for doc in docs)
|
172 |
if context == "":
|
173 |
context = "No context found"
|
174 |
+
else:
|
175 |
+
pass
|
176 |
return context
|
177 |
|
178 |
|
|
|
189 |
pass
|
190 |
else:
|
191 |
chatHistoryStore[storeName].clear()
|
192 |
+
for message in messages[-1:]:
|
193 |
chatHistoryStore[storeName].add_message(message)
|
194 |
return True
|
195 |
|
196 |
|
197 |
def answerQuery(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192") -> str:
|
198 |
+
global prompt
|
199 |
global client
|
200 |
global vectorEmbeddings
|
201 |
global sparseEmbeddings
|
202 |
vectorStoreName = vectorstore
|
203 |
vectorstore = QdrantVectorStore.from_existing_collection(
|
204 |
+
embedding=vectorEmbeddings,
|
205 |
sparse_embedding=sparseEmbeddings,
|
206 |
collection_name=vectorstore,
|
207 |
url=os.environ["QDRANT_URL"],
|
|
|
219 |
base_compressor=compressor, base_retriever=retriever
|
220 |
)
|
221 |
baseChain = (
|
222 |
+
{"context": RunnableLambda(lambda x: x["question"]) | retriever | RunnableLambda(format_docs),
|
223 |
+
"question": RunnablePassthrough(), "chatHistory": RunnablePassthrough()}
|
224 |
+
| prompt
|
225 |
+
| ChatGroq(model=llmModel, temperature=0.75, max_tokens=512)
|
226 |
+
| StrOutputParser()
|
227 |
+
)
|
228 |
messageChain = RunnableWithMessageHistory(
|
229 |
baseChain,
|
230 |
get_session_history,
|
231 |
+
input_messages_key="question",
|
232 |
+
history_messages_key="chatHistory"
|
233 |
)
|
234 |
+
chain = RunnablePassthrough.assign(messages_trimmed=trimMessages) | messageChain
|
235 |
return {
|
236 |
"output": chain.invoke(
|
237 |
{"question": query},
|
238 |
{"configurable": {"session_id": vectorStoreName}}
|
239 |
)
|
240 |
}
|
|
|
241 |
|
242 |
|
243 |
def deleteTable(tableName: str):
|
|
|
252 |
"error": e
|
253 |
}
|
254 |
|
255 |
+
|
256 |
def listTables(username: str):
|
257 |
try:
|
258 |
global qdrantClient
|
259 |
qdrantCollections = qdrantClient.get_collections()
|
260 |
return {
|
261 |
+
"output": list(filter(lambda x: True if x.split("-")[1] == username else False,
|
262 |
+
[x.name for x in qdrantCollections.collections]))
|
263 |
}
|
264 |
except Exception as e:
|
265 |
return {
|
266 |
"error": e
|
267 |
}
|
|
|
268 |
|
269 |
+
|
270 |
+
def getLinks(url: str, timeout=30):
|
271 |
start = time.time()
|
272 |
+
|
273 |
def getLinksFromPage(url: str) -> list:
|
274 |
response = requests.get(url)
|
275 |
soup = BeautifulSoup(response.content, "lxml")
|
|
|
287 |
else:
|
288 |
continue
|
289 |
return links
|
290 |
+
|
291 |
links = getLinksFromPage(url)
|
292 |
uniqueLinks = set()
|
293 |
for link in links:
|
|
|
299 |
return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
|
300 |
|
301 |
|
|
|
302 |
def getTextFromImagePDF(pdfBytes):
|
303 |
def getText(image):
|
304 |
global reader
|
305 |
return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
|
306 |
+
|
307 |
allImages = convert_from_bytes(pdfBytes)
|
308 |
texts = [getText(image) for image in allImages]
|
309 |
return "\n\n\n".join(texts)
|
310 |
|
311 |
+
|
312 |
def getTranscript(urls: str):
|
313 |
urls = urls.split(",")
|
314 |
texts = []
|
315 |
for url in urls:
|
316 |
try:
|
317 |
loader = YoutubeLoader.from_youtube_url(
|
318 |
+
url, add_video_info=False
|
319 |
)
|
320 |
doc = " ".join([x.page_content for x in loader.load()])
|
321 |
texts.append(doc)
|
|
|
326 |
|
327 |
|
328 |
def analyzeData(query, dataframe):
|
329 |
+
llm = ChatGroq(name="llama-3.1-8b-instant")
|
330 |
+
df = SmartDataframe(dataframe, config={"llm": llm, "verbose": False})
|
331 |
response = df.chat(query)
|
332 |
if os.path.isfile(response):
|
333 |
with open(response, "rb") as file:
|
334 |
b64string = base64.b64encode(file.read()).decode("utf-8")
|
335 |
return f"data:image/png;base64,{b64string}"
|
336 |
else:
|
337 |
+
return response
|