Spaces:
Build error
Build error
enhance performance
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from typing import
|
2 |
from queue import Empty, Queue
|
3 |
from threading import Thread
|
4 |
from bot.web_scrapping.crawler_and_indexer import content_crawler_and_index
|
@@ -16,12 +16,10 @@ human_message_prompt_template = HumanMessagePromptTemplate.from_template("{text}
|
|
16 |
|
17 |
|
18 |
def bot_learning(urls, file_formats, llm, prompt, chat_mode=False):
|
19 |
-
index = content_crawler_and_index(url=str(urls), llm=llm, prompt=prompt, file_format=file_formats)
|
20 |
if chat_mode:
|
21 |
-
return
|
22 |
else:
|
23 |
-
|
24 |
-
return fb
|
25 |
|
26 |
|
27 |
def chat_start(
|
|
|
1 |
+
from typing import Optional, Tuple
|
2 |
from queue import Empty, Queue
|
3 |
from threading import Thread
|
4 |
from bot.web_scrapping.crawler_and_indexer import content_crawler_and_index
|
|
|
16 |
|
17 |
|
18 |
def bot_learning(urls, file_formats, llm, prompt, chat_mode=False):
|
|
|
19 |
if chat_mode:
|
20 |
+
return content_crawler_and_index(url=str(urls), llm=llm, prompt=prompt, file_format=file_formats)
|
21 |
else:
|
22 |
+
return 'Training Completed'
|
|
|
23 |
|
24 |
|
25 |
def chat_start(
|
bot/web_scrapping/searchable_index.py
CHANGED
@@ -13,6 +13,7 @@ import pandas as pd
|
|
13 |
import threading
|
14 |
import glob
|
15 |
import os
|
|
|
16 |
import queue
|
17 |
|
18 |
|
@@ -72,41 +73,52 @@ class SearchableIndex:
|
|
72 |
if os.path.exists(index_store):
|
73 |
local_db = FAISS.load_local(index_store, embeddings)
|
74 |
local_db.merge_from(faiss_db)
|
75 |
-
|
76 |
-
logger.info("Merge index completed")
|
77 |
else:
|
78 |
-
faiss_db
|
79 |
-
|
80 |
-
|
|
|
|
|
81 |
return local_db
|
82 |
|
83 |
@classmethod
|
84 |
-
def
|
85 |
if index_files:
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
@classmethod
|
92 |
def embed_index(cls, url, path, llm, prompt, target_col=None, sheet_name=None):
|
93 |
embeddings = OpenAIEmbeddings()
|
94 |
|
95 |
-
if
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
index_files = glob.glob(os.path.join(path, '*_index'))
|
103 |
-
|
104 |
-
result_queue = queue.Queue() # Create a queue to store the result
|
105 |
|
106 |
-
|
107 |
-
|
108 |
-
thread.start()
|
109 |
-
local_db = result_queue.get() # Retrieve the result from the queue
|
110 |
return Query(prompt, llm, local_db)
|
111 |
|
112 |
|
|
|
13 |
import threading
|
14 |
import glob
|
15 |
import os
|
16 |
+
import asyncio
|
17 |
import queue
|
18 |
|
19 |
|
|
|
73 |
if os.path.exists(index_store):
|
74 |
local_db = FAISS.load_local(index_store, embeddings)
|
75 |
local_db.merge_from(faiss_db)
|
76 |
+
operation_info = "Merge"
|
|
|
77 |
else:
|
78 |
+
local_db = faiss_db # Use the provided faiss_db directly for a new store
|
79 |
+
operation_info = "New store creation"
|
80 |
+
|
81 |
+
local_db.save_local(index_store)
|
82 |
+
logger.info(f"{operation_info} index completed")
|
83 |
return local_db
|
84 |
|
85 |
@classmethod
|
86 |
+
def load_index(cls, index_files, embeddings, logger):
|
87 |
if index_files:
|
88 |
+
return FAISS.load_local(index_files[0], embeddings)
|
89 |
+
logger.warning("Index store does not exist")
|
90 |
+
return None
|
91 |
+
|
92 |
+
@classmethod
|
93 |
+
def check_and_load_index(cls, index_files, embeddings, logger, result_queue):
|
94 |
+
local_db = cls.load_index(index_files, embeddings, logger)
|
95 |
+
result_queue.put(local_db)
|
96 |
+
|
97 |
+
@classmethod
|
98 |
+
def load_index_asynchronously(cls, index_files, embeddings, logger):
|
99 |
+
result_queue = queue.Queue()
|
100 |
+
thread = threading.Thread(
|
101 |
+
target=cls.check_and_load_index,
|
102 |
+
args=(index_files, embeddings, logger, result_queue)
|
103 |
+
)
|
104 |
+
thread.start()
|
105 |
+
thread.join() # Wait for the thread to finish
|
106 |
+
return result_queue.get()
|
107 |
|
108 |
@classmethod
|
109 |
def embed_index(cls, url, path, llm, prompt, target_col=None, sheet_name=None):
|
110 |
embeddings = OpenAIEmbeddings()
|
111 |
|
112 |
+
if path:
|
113 |
+
if url != 'NO_URL':
|
114 |
+
doc_list = cls.get_splits(path, target_col, sheet_name)
|
115 |
+
faiss_db = FAISS.from_texts(doc_list, embeddings)
|
116 |
+
index_store = os.path.splitext(path)[0] + "_index"
|
117 |
+
local_db = cls.merge_or_create_index(index_store, faiss_db, embeddings, logger)
|
118 |
+
return Query(prompt, llm, local_db)
|
|
|
|
|
|
|
119 |
|
120 |
+
index_files = glob.glob(os.path.join(path, '*_index'))
|
121 |
+
local_db = cls.load_index_asynchronously(index_files, embeddings, logger)
|
|
|
|
|
122 |
return Query(prompt, llm, local_db)
|
123 |
|
124 |
|
learning_documents/combined_content_index/index.faiss
CHANGED
Binary files a/learning_documents/combined_content_index/index.faiss and b/learning_documents/combined_content_index/index.faiss differ
|
|
learning_documents/combined_content_index/index.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0d19bc7afb8227f67225b52afd8c746bc67aceca43fb5e5c84a19e94cda0e9d
|
3 |
+
size 3959
|
requirements.txt
CHANGED
@@ -154,6 +154,7 @@ typer==0.9.0
|
|
154 |
typing-inspect==0.9.0
|
155 |
typing_extensions==4.8.0
|
156 |
tzdata==2023.3
|
|
|
157 |
unstructured==0.10.29
|
158 |
urllib3==1.26.18
|
159 |
uvicorn==0.24.0.post1
|
|
|
154 |
typing-inspect==0.9.0
|
155 |
typing_extensions==4.8.0
|
156 |
tzdata==2023.3
|
157 |
+
twine
|
158 |
unstructured==0.10.29
|
159 |
urllib3==1.26.18
|
160 |
uvicorn==0.24.0.post1
|