myn0908 commited on
Commit
e44f2dc
·
1 Parent(s): 5dca21e

enhance performance

Browse files
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import List, Optional, Tuple
2
  from queue import Empty, Queue
3
  from threading import Thread
4
  from bot.web_scrapping.crawler_and_indexer import content_crawler_and_index
@@ -16,12 +16,10 @@ human_message_prompt_template = HumanMessagePromptTemplate.from_template("{text}
16
 
17
 
18
  def bot_learning(urls, file_formats, llm, prompt, chat_mode=False):
19
- index = content_crawler_and_index(url=str(urls), llm=llm, prompt=prompt, file_format=file_formats)
20
  if chat_mode:
21
- return index
22
  else:
23
- fb = 'Training Completed'
24
- return fb
25
 
26
 
27
  def chat_start(
 
1
+ from typing import Optional, Tuple
2
  from queue import Empty, Queue
3
  from threading import Thread
4
  from bot.web_scrapping.crawler_and_indexer import content_crawler_and_index
 
16
 
17
 
18
  def bot_learning(urls, file_formats, llm, prompt, chat_mode=False):
 
19
  if chat_mode:
20
+ return content_crawler_and_index(url=str(urls), llm=llm, prompt=prompt, file_format=file_formats)
21
  else:
22
+ return 'Training Completed'
 
23
 
24
 
25
  def chat_start(
bot/web_scrapping/searchable_index.py CHANGED
@@ -13,6 +13,7 @@ import pandas as pd
13
  import threading
14
  import glob
15
  import os
 
16
  import queue
17
 
18
 
@@ -72,41 +73,52 @@ class SearchableIndex:
72
  if os.path.exists(index_store):
73
  local_db = FAISS.load_local(index_store, embeddings)
74
  local_db.merge_from(faiss_db)
75
- local_db.save_local(index_store)
76
- logger.info("Merge index completed")
77
  else:
78
- faiss_db.save_local(folder_path=index_store)
79
- logger.info("New store created and loaded...")
80
- local_db = FAISS.load_local(index_store, embeddings)
 
 
81
  return local_db
82
 
83
  @classmethod
84
- def check_and_load_index(cls, index_files, embeddings, logger, result_queue):
85
  if index_files:
86
- local_db = FAISS.load_local(index_files[0], embeddings)
87
- else:
88
- raise logger.warning("Index store does not exist")
89
- result_queue.put(local_db) # Put the result in the queue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  @classmethod
92
  def embed_index(cls, url, path, llm, prompt, target_col=None, sheet_name=None):
93
  embeddings = OpenAIEmbeddings()
94
 
95
- if url != 'NO_URL' and path:
96
- doc_list = cls.get_splits(path, target_col, sheet_name)
97
- faiss_db = FAISS.from_texts(doc_list, embeddings)
98
- index_store = os.path.splitext(path)[0] + "_index"
99
- local_db = cls.merge_or_create_index(index_store, faiss_db, embeddings, logger)
100
- return Query(prompt, llm, local_db)
101
- elif url == 'NO_URL' and path:
102
- index_files = glob.glob(os.path.join(path, '*_index'))
103
-
104
- result_queue = queue.Queue() # Create a queue to store the result
105
 
106
- thread = threading.Thread(target=cls.check_and_load_index,
107
- args=(index_files, embeddings, logger, result_queue))
108
- thread.start()
109
- local_db = result_queue.get() # Retrieve the result from the queue
110
  return Query(prompt, llm, local_db)
111
 
112
 
 
13
  import threading
14
  import glob
15
  import os
16
+ import asyncio
17
  import queue
18
 
19
 
 
73
  if os.path.exists(index_store):
74
  local_db = FAISS.load_local(index_store, embeddings)
75
  local_db.merge_from(faiss_db)
76
+ operation_info = "Merge"
 
77
  else:
78
+ local_db = faiss_db # Use the provided faiss_db directly for a new store
79
+ operation_info = "New store creation"
80
+
81
+ local_db.save_local(index_store)
82
+ logger.info(f"{operation_info} index completed")
83
  return local_db
84
 
85
  @classmethod
86
+ def load_index(cls, index_files, embeddings, logger):
87
  if index_files:
88
+ return FAISS.load_local(index_files[0], embeddings)
89
+ logger.warning("Index store does not exist")
90
+ return None
91
+
92
+ @classmethod
93
+ def check_and_load_index(cls, index_files, embeddings, logger, result_queue):
94
+ local_db = cls.load_index(index_files, embeddings, logger)
95
+ result_queue.put(local_db)
96
+
97
+ @classmethod
98
+ def load_index_asynchronously(cls, index_files, embeddings, logger):
99
+ result_queue = queue.Queue()
100
+ thread = threading.Thread(
101
+ target=cls.check_and_load_index,
102
+ args=(index_files, embeddings, logger, result_queue)
103
+ )
104
+ thread.start()
105
+ thread.join() # Wait for the thread to finish
106
+ return result_queue.get()
107
 
108
  @classmethod
109
  def embed_index(cls, url, path, llm, prompt, target_col=None, sheet_name=None):
110
  embeddings = OpenAIEmbeddings()
111
 
112
+ if path:
113
+ if url != 'NO_URL':
114
+ doc_list = cls.get_splits(path, target_col, sheet_name)
115
+ faiss_db = FAISS.from_texts(doc_list, embeddings)
116
+ index_store = os.path.splitext(path)[0] + "_index"
117
+ local_db = cls.merge_or_create_index(index_store, faiss_db, embeddings, logger)
118
+ return Query(prompt, llm, local_db)
 
 
 
119
 
120
+ index_files = glob.glob(os.path.join(path, '*_index'))
121
+ local_db = cls.load_index_asynchronously(index_files, embeddings, logger)
 
 
122
  return Query(prompt, llm, local_db)
123
 
124
 
learning_documents/combined_content_index/index.faiss CHANGED
Binary files a/learning_documents/combined_content_index/index.faiss and b/learning_documents/combined_content_index/index.faiss differ
 
learning_documents/combined_content_index/index.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbc4c242c12ce0d77cdeb962c2854f6c0888ce56f6d098a40570e16caff75d52
3
- size 6059
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d19bc7afb8227f67225b52afd8c746bc67aceca43fb5e5c84a19e94cda0e9d
3
+ size 3959
requirements.txt CHANGED
@@ -154,6 +154,7 @@ typer==0.9.0
154
  typing-inspect==0.9.0
155
  typing_extensions==4.8.0
156
  tzdata==2023.3
 
157
  unstructured==0.10.29
158
  urllib3==1.26.18
159
  uvicorn==0.24.0.post1
 
154
  typing-inspect==0.9.0
155
  typing_extensions==4.8.0
156
  tzdata==2023.3
157
+ twine
158
  unstructured==0.10.29
159
  urllib3==1.26.18
160
  uvicorn==0.24.0.post1