NickNYU commited on
Commit
4ccf537
·
1 Parent(s): 5dfd401
Makefile CHANGED
@@ -8,7 +8,7 @@ format:
8
  lint:
9
  mypy .
10
  black . --check
11
- ruff check .
12
 
13
  test:
14
  pytest tests
 
8
  lint:
9
  mypy .
10
  black . --check
11
+ ruff check . --fix
12
 
13
  test:
14
  pytest tests
README.md CHANGED
@@ -11,3 +11,47 @@ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ Then, create a new Python virtual environment. The command below creates an environment in `.venv`,
16
+ and activates it:
17
+
18
+ ```bash
19
+ python -m venv .venv
20
+ source .venv/bin/activate
21
+ ```
22
+
23
+ if you are in windows, use the following to activate your virtual environment:
24
+
25
+ ```bash
26
+ .venv\scripts\activate
27
+ ```
28
+
29
+ Install the required dependencies (this will also install gpt-index through `pip install -e .`
30
+ so that you can start developing on it):
31
+
32
+ ```bash
33
+ pip install -r requirements.txt
34
+ ```
35
+
36
+ Now you should be set!
37
+
38
+ ### Validating your Change
39
+
40
+ Let's make sure to `format/lint` our change. For bigger changes,
41
+ let's also make sure to `test` it and perhaps create an `example notebook`.
42
+
43
+ #### Formatting/Linting
44
+
45
+ You can format and lint your changes with the following commands in the root directory:
46
+
47
+ ```bash
48
+ make format; make lint
49
+ ```
50
+
51
+ You can also make use of our pre-commit hooks by setting up git hook scripts:
52
+
53
+ ```bash
54
+ pre-commit install
55
+ ```
56
+
57
+ We run an assortment of linters: `black`, `ruff`, `mypy`.
app.py CHANGED
@@ -1,15 +1,9 @@
1
- from llama_hub.github_repo import GithubRepositoryReader, GithubClient
2
- from llama_index import download_loader, GPTVectorStoreIndex
3
- from llama_index import LLMPredictor, ServiceContext, LangchainEmbedding
4
- from langchain.llms import AzureOpenAI
5
- from langchain.embeddings.openai import OpenAIEmbeddings
6
- import os
7
- import pickle
8
- import streamlit as st
9
-
10
  import logging
11
  import sys
12
 
 
 
 
13
 
14
  logging.basicConfig(
15
  stream=sys.stdout, level=logging.DEBUG
@@ -34,59 +28,11 @@ with st.sidebar:
34
 
35
  def main() -> None:
36
  st.header("X-Pipe Wiki 机器人 💬")
37
- # define embedding
38
- embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1))
39
- # define LLM
40
- llm_predictor = LLMPredictor(
41
- llm=AzureOpenAI(
42
- deployment_name="text-davinci-003",
43
- model="text-davinci-003",
44
- client=None,
45
- )
46
- )
47
-
48
- # configure service context
49
- service_context = ServiceContext.from_defaults(
50
- llm_predictor=llm_predictor, embed_model=embedding
51
- )
52
- download_loader("GithubRepositoryReader")
53
- docs = None
54
- if os.path.exists("docs/docs.pkl"):
55
- with open("docs/docs.pkl", "rb") as f:
56
- docs = pickle.load(f)
57
-
58
- if docs is None:
59
- github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
60
- loader = GithubRepositoryReader(
61
- github_client,
62
- owner="ctripcorp",
63
- repo="x-pipe",
64
- filter_directories=(
65
- [".", "doc"],
66
- GithubRepositoryReader.FilterType.INCLUDE,
67
- ),
68
- filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
69
- verbose=True,
70
- concurrent_requests=10,
71
- )
72
-
73
- docs = loader.load_data(branch="master")
74
-
75
- with open("docs/docs.pkl", "wb") as f:
76
- pickle.dump(docs, f)
77
-
78
- index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
79
-
80
- query_engine = index.as_query_engine(service_context=service_context)
81
-
82
  query = st.text_input("X-Pipe Wiki 问题:")
83
  if query:
84
- index = GPTVectorStoreIndex.from_documents(
85
- docs, service_context=service_context
86
- )
87
-
88
- query_engine = index.as_query_engine(service_context=service_context)
89
- response = query_engine.query(query)
90
  st.write(response)
91
 
92
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
  import sys
3
 
4
+ import streamlit as st
5
+
6
+ from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
7
 
8
  logging.basicConfig(
9
  stream=sys.stdout, level=logging.DEBUG
 
28
 
29
  def main() -> None:
30
  st.header("X-Pipe Wiki 机器人 💬")
31
+ robot_manager = XPipeRobotManagerFactory.get_or_create(XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0)
32
+ robot = robot_manager.get_robot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  query = st.text_input("X-Pipe Wiki 问题:")
34
  if query:
35
+ response = robot.ask(question=query)
 
 
 
 
 
36
  st.write(response)
37
 
38
 
core/helper.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.lifecycle import Lifecycle
2
+
3
+
4
+ class LifecycleHelper:
5
+
6
+ @classmethod
7
+ def initialize_if_possible(cls, ls: Lifecycle) -> None:
8
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_initialize(ls.lifecycle_state.phase):
9
+ ls.initialize()
10
+
11
+ @classmethod
12
+ def start_if_possible(cls, ls: Lifecycle) -> None:
13
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_start(ls.lifecycle_state.phase):
14
+ ls.start()
15
+
16
+ @classmethod
17
+ def stop_if_possible(cls, ls: Lifecycle) -> None:
18
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_stop(ls.lifecycle_state.phase):
19
+ ls.stop()
20
+
21
+ @classmethod
22
+ def dispose_if_possible(cls, ls: Lifecycle) -> None:
23
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_dispose(ls.lifecycle_state.phase):
24
+ ls.dispose()
core/lifecycle.py CHANGED
@@ -37,7 +37,6 @@ class LifecycleAware(ABC):
37
  """
38
  self.state = state
39
 
40
- @property
41
  def get_lifecycle_state(self) -> "LifecycleState":
42
  return self.state
43
 
@@ -113,7 +112,7 @@ class LifecycleController(ABC):
113
 
114
  def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
115
  return phase is not None and (
116
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
117
  )
118
 
119
  def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
@@ -121,7 +120,7 @@ class LifecycleController(ABC):
121
 
122
  def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
123
  return phase is not None and (
124
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
125
  )
126
 
127
 
 
37
  """
38
  self.state = state
39
 
 
40
  def get_lifecycle_state(self) -> "LifecycleState":
41
  return self.state
42
 
 
112
 
113
  def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
114
  return phase is not None and (
115
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
116
  )
117
 
118
  def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
 
120
 
121
  def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
122
  return phase is not None and (
123
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
124
  )
125
 
126
 
dataset/docstore.json CHANGED
The diff for this file is too large to render. See raw diff
 
dataset/graph_store.json CHANGED
@@ -1 +1,3 @@
1
- {"graph_dict": {}}
 
 
 
1
+ {
2
+ "graph_dict": {}
3
+ }
dataset/index_store.json CHANGED
@@ -1 +1,8 @@
1
- {"index_store/data": {"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {"__type__": "vector_store", "__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
 
 
 
 
 
 
 
 
1
+ {
2
+ "index_store/data": {
3
+ "7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {
4
+ "__type__": "vector_store",
5
+ "__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"
6
+ }
7
+ }
8
+ }
dataset/vector_store.json CHANGED
The diff for this file is too large to render. See raw diff
 
langchain_manager/manager.py CHANGED
@@ -1,9 +1,9 @@
1
  from abc import abstractmethod, ABC
2
 
 
3
  from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
4
  from langchain_manager.embeddings.openai import OpenAIEmbeddings
5
  from langchain_manager.llms import AzureOpenAI
6
- from langchain_manager.base_language import BaseLanguageModel
7
 
8
  from core.lifecycle import Lifecycle
9
 
 
1
  from abc import abstractmethod, ABC
2
 
3
+ from langchain_manager.base_language import BaseLanguageModel
4
  from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
5
  from langchain_manager.embeddings.openai import OpenAIEmbeddings
6
  from langchain_manager.llms import AzureOpenAI
 
7
 
8
  from core.lifecycle import Lifecycle
9
 
llama/context.py CHANGED
@@ -1,16 +1,26 @@
1
- from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
2
- from type import Optional
 
 
 
3
  from core.lifecycle import Lifecycle
4
- from langchain.manager import BaseLangChainManager
 
 
 
 
 
 
 
5
 
6
 
7
- class ServiceContextManager(Lifecycle):
8
- service_context: Optional[ServiceContext]
 
9
 
10
- def __init__(self, manager: BaseLangChainManager) -> None:
11
  super().__init__()
12
- self.manager = manager
13
- self.service_context = None
14
 
15
  def get_service_context(self) -> ServiceContext:
16
  if self.lifecycle_state.is_started():
@@ -25,37 +35,75 @@ class ServiceContextManager(Lifecycle):
25
 
26
  def do_init(self) -> None:
27
  # define embedding
28
- embedding = LangchainEmbedding(self.manager.get_embedding())
29
  # define LLM
30
- llm_predictor = LLMPredictor(llm=self.manager.get_llm())
31
  # configure service context
32
  self.service_context = ServiceContext.from_defaults(
33
  llm_predictor=llm_predictor, embed_model=embedding
34
  )
35
 
36
  def do_start(self) -> None:
37
- pass
 
 
 
38
 
39
  def do_stop(self) -> None:
40
- pass
 
 
 
41
 
42
  def do_dispose(self) -> None:
 
 
 
 
 
 
 
43
  pass
44
 
45
 
46
- class StorageContextManager(Lifecycle):
47
- def __init__(self, dataset_path: Optional[str] = "./dataset") -> None:
 
 
 
 
48
  super().__init__()
49
  self.dataset_path = dataset_path
 
 
 
 
50
 
51
  def do_init(self) -> None:
52
- pass
 
 
 
 
 
53
 
54
  def do_start(self) -> None:
55
- pass
56
 
57
  def do_stop(self) -> None:
58
- pass
59
 
60
  def do_dispose(self) -> None:
61
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import abstractmethod, ABC
2
+
3
+ from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
4
+ from llama_index import StorageContext
5
+
6
  from core.lifecycle import Lifecycle
7
+ from langchain_manager.manager import BaseLangChainManager
8
+
9
+
10
+ class ServiceContextManager(Lifecycle, ABC):
11
+
12
+ @abstractmethod
13
+ def get_service_context(self) -> ServiceContext:
14
+ pass
15
 
16
 
17
+ class AzureServiceContextManager(ServiceContextManager):
18
+ lc_manager: BaseLangChainManager
19
+ service_context: ServiceContext
20
 
21
+ def __init__(self, lc_manager: BaseLangChainManager):
22
  super().__init__()
23
+ self.lc_manager = lc_manager
 
24
 
25
  def get_service_context(self) -> ServiceContext:
26
  if self.lifecycle_state.is_started():
 
35
 
36
  def do_init(self) -> None:
37
  # define embedding
38
+ embedding = LangchainEmbedding(self.lc_manager.get_embedding())
39
  # define LLM
40
+ llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
41
  # configure service context
42
  self.service_context = ServiceContext.from_defaults(
43
  llm_predictor=llm_predictor, embed_model=embedding
44
  )
45
 
46
  def do_start(self) -> None:
47
+ self.logger.info("[do_start][embedding] last used usage: %d",
48
+ self.service_context.embed_model.total_tokens_used)
49
+ self.logger.info("[do_start][predict] last used usage: %d",
50
+ self.service_context.llm_predictor.total_tokens_used)
51
 
52
  def do_stop(self) -> None:
53
+ self.logger.info("[do_stop][embedding] last used usage: %d",
54
+ self.service_context.embed_model.total_tokens_used)
55
+ self.logger.info("[do_stop][predict] last used usage: %d",
56
+ self.service_context.llm_predictor.total_tokens_used)
57
 
58
  def do_dispose(self) -> None:
59
+ self.logger.info("[do_dispose] total used token: %d", self.service_context.llm_predictor.total_tokens_used)
60
+
61
+
62
+ class StorageContextManager(Lifecycle, ABC):
63
+
64
+ @abstractmethod
65
+ def get_storage_context(self) -> StorageContext:
66
  pass
67
 
68
 
69
+ class LocalStorageContextManager(StorageContextManager):
70
+ storage_context: StorageContext
71
+
72
+ def __init__(self,
73
+ dataset_path: str = "./dataset",
74
+ service_context_manager: ServiceContextManager = None) -> None:
75
  super().__init__()
76
  self.dataset_path = dataset_path
77
+ self.service_context_manager = service_context_manager
78
+
79
+ def get_storage_context(self) -> StorageContext:
80
+ return self.storage_context
81
 
82
  def do_init(self) -> None:
83
+ from llama.utils import is_local_storage_files_ready
84
+ if is_local_storage_files_ready(self.dataset_path):
85
+ self.storage_context = StorageContext.from_defaults(persist_dir=self.dataset_path)
86
+ else:
87
+ docs = self._download()
88
+ self._indexing(docs)
89
 
90
  def do_start(self) -> None:
91
+ self.logger.info("[do_start]%", **self.storage_context.to_dict())
92
 
93
  def do_stop(self) -> None:
94
+ self.logger.info("[do_stop]%", **self.storage_context.to_dict())
95
 
96
  def do_dispose(self) -> None:
97
+ self.storage_context.persist(self.dataset_path)
98
+
99
+ def _download(self) -> [Document]:
100
+ from llama.data_loader import GithubLoader
101
+ loader = GithubLoader()
102
+ return loader.load()
103
+
104
+ def _indexing(self, docs: [Document]):
105
+ from llama_index import GPTVectorStoreIndex
106
+ index = GPTVectorStoreIndex.from_documents(docs,
107
+ service_context=self.service_context_manager.get_service_context())
108
+ index.storage_context.persist(persist_dir=self.dataset_path)
109
+ self.storage_context = index.storage_context
llama/data_loader.py CHANGED
@@ -7,8 +7,6 @@ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
7
  from llama_index import download_loader
8
  from llama_index.readers.schema.base import Document
9
 
10
- from core.lifecycle import Lifecycle
11
-
12
 
13
  class WikiLoader(ABC):
14
  @abstractmethod
@@ -16,12 +14,12 @@ class WikiLoader(ABC):
16
  pass
17
 
18
 
19
- class GithubLoader(WikiLoader, Lifecycle):
20
  def __init__(
21
- self,
22
- github_owner: Optional[str] = None,
23
- repo: Optional[str] = None,
24
- dirs: Optional[Sequence[str]] = None,
25
  ):
26
  super().__init__()
27
  self.owner = (
 
7
  from llama_index import download_loader
8
  from llama_index.readers.schema.base import Document
9
 
 
 
10
 
11
  class WikiLoader(ABC):
12
  @abstractmethod
 
14
  pass
15
 
16
 
17
+ class GithubLoader(WikiLoader):
18
  def __init__(
19
+ self,
20
+ github_owner: Optional[str] = None,
21
+ repo: Optional[str] = None,
22
+ dirs: Optional[Sequence[str]] = None,
23
  ):
24
  super().__init__()
25
  self.owner = (
llama/utils.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+
3
+
4
+ def is_local_storage_files_ready(persist_dir: str) -> bool:
5
+ return os.path.exists(persist_dir) and len(os.listdir(persist_dir)) != 0
pyproject.toml CHANGED
@@ -1,7 +1,7 @@
1
  [tool.mypy]
2
  ignore_missing_imports = "True"
3
  disallow_untyped_defs = "True"
4
- exclude = ["notebooks", "build", "examples", "docs", "dataset", "app.py", "github_retriever.py"]
5
 
6
  [tool.ruff]
7
  exclude = [
@@ -14,6 +14,5 @@ exclude = [
14
  "notebooks",
15
  "docs",
16
  "dataset",
17
- "app.py",
18
  "github_retriever.py"
19
  ]
 
1
  [tool.mypy]
2
  ignore_missing_imports = "True"
3
  disallow_untyped_defs = "True"
4
+ exclude = ["notebooks", "build", "examples", "docs", "dataset", "github_retriever.py"]
5
 
6
  [tool.ruff]
7
  exclude = [
 
14
  "notebooks",
15
  "docs",
16
  "dataset",
 
17
  "github_retriever.py"
18
  ]
requirements.txt CHANGED
@@ -1,7 +1,6 @@
1
- langchain>=0.0.154
2
- openai>=0.26.4
3
- llama_index>=0.6.32
4
  llama_hub
 
5
  ruff
6
  black
7
  mypy
 
1
+ llama_index>=0.6.3
 
 
2
  llama_hub
3
+
4
  ruff
5
  black
6
  mypy
xpipe_wiki/__init__.py ADDED
File without changes
xpipe_wiki/manager_factory.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import enum
2
+ import os
3
+
4
+ from core.helper import LifecycleHelper
5
+ from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
6
+
7
+
8
+ class XPipeRobotRevision(enum.Enum):
9
+ SIMPLE_OPENAI_VERSION_0 = 1
10
+
11
+
12
+ CAPABLE = dict[XPipeRobotRevision, XPipeWikiRobotManager]
13
+
14
+
15
+ class XPipeRobotManagerFactory:
16
+
17
+ @classmethod
18
+ def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
19
+ if CAPABLE.get(revision) is not None:
20
+ return CAPABLE[revision]
21
+ if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
22
+ manager = cls.create_simple_openai_version_0()
23
+ CAPABLE[revision] = manager
24
+ return manager
25
+
26
+ @classmethod
27
+ def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
28
+
29
+ from llama.context import AzureServiceContextManager
30
+ from langchain_manager.manager import LangChainAzureManager
31
+ service_context_manager = AzureServiceContextManager(lc_manager=LangChainAzureManager())
32
+
33
+ from llama.context import LocalStorageContextManager
34
+ dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
35
+ storage_context_manager = LocalStorageContextManager(dataset_path=dataset_path,
36
+ service_context_manager=service_context_manager)
37
+
38
+ robot_manager = AzureXPipeWikiRobotManager(service_context_manager=service_context_manager,
39
+ storage_context_manager=storage_context_manager)
40
+ LifecycleHelper.initialize_if_possible(robot_manager)
41
+ LifecycleHelper.start_if_possible(robot_manager)
42
+ return robot_manager
xpipe_wiki/robot_manager.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any
3
+
4
+ from llama_index import load_index_from_storage
5
+ from llama_index.indices.query.base import BaseQueryEngine
6
+ from pydantic import dataclasses
7
+
8
+ from core.helper import LifecycleHelper
9
+ from core.lifecycle import Lifecycle
10
+ from llama.context import ServiceContextManager, StorageContextManager
11
+
12
+
13
+ class XPipeWikiRobot(ABC):
14
+ @abstractmethod
15
+ def ask(self, question: str) -> Any:
16
+ pass
17
+
18
+
19
+ @dataclasses
20
+ class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
21
+ query_engine: BaseQueryEngine
22
+
23
+ def ask(self, question: str) -> Any:
24
+ return self.query_engine.query(question)
25
+
26
+
27
+ class XPipeWikiRobotManager(Lifecycle):
28
+
29
+ @abstractmethod
30
+ def get_robot(self) -> XPipeWikiRobot:
31
+ pass
32
+
33
+
34
+ @dataclasses
35
+ class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
36
+ service_context_manager: ServiceContextManager
37
+ storage_context_manager: StorageContextManager
38
+
39
+ def get_robot(self) -> XPipeWikiRobot:
40
+ index = load_index_from_storage(storage_context=self.storage_context_manager.get_storage_context())
41
+ query_engine = index.as_query_engine(service_context=self.service_context_manager.get_service_context())
42
+ return AzureOpenAIXPipeWikiRobot(query_engine)
43
+
44
+ def do_init(self) -> None:
45
+ LifecycleHelper.initialize_if_possible(self.service_context_manager)
46
+ LifecycleHelper.initialize_if_possible(self.storage_context_manager)
47
+
48
+ def do_start(self) -> None:
49
+ LifecycleHelper.start_if_possible(self.service_context_manager)
50
+ LifecycleHelper.start_if_possible(self.storage_context_manager)
51
+
52
+ def do_stop(self) -> None:
53
+ LifecycleHelper.stop_if_possible(self.storage_context_manager)
54
+ LifecycleHelper.stop_if_possible(self.service_context_manager)
55
+
56
+ def do_dispose(self) -> None:
57
+ LifecycleHelper.dispose_if_possible(self.storage_context_manager)
58
+ LifecycleHelper.dispose_if_possible(self.service_context_manager)