Nick Chen Zhu commited on
Commit
257bc0d
·
1 Parent(s): a03e89c

first edition

Browse files
README.md CHANGED
@@ -11,3 +11,47 @@ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+
15
+ Then, create a new Python virtual environment. The command below creates an environment in `.venv`,
16
+ and activates it:
17
+
18
+ ```bash
19
+ python -m venv .venv
20
+ source .venv/bin/activate
21
+ ```
22
+
23
+ if you are in windows, use the following to activate your virtual environment:
24
+
25
+ ```bash
26
+ .venv\scripts\activate
27
+ ```
28
+
29
+ Install the required dependencies (this will also install gpt-index through `pip install -e .`
30
+ so that you can start developing on it):
31
+
32
+ ```bash
33
+ pip install -r requirements.txt
34
+ ```
35
+
36
+ Now you should be set!
37
+
38
+ ### Validating your Change
39
+
40
+ Let's make sure to `format/lint` our change. For bigger changes,
41
+ let's also make sure to `test` it and perhaps create an `example notebook`.
42
+
43
+ #### Formatting/Linting
44
+
45
+ You can format and lint your changes with the following commands in the root directory:
46
+
47
+ ```bash
48
+ make format; make lint
49
+ ```
50
+
51
+ You can also make use of our pre-commit hooks by setting up git hook scripts:
52
+
53
+ ```bash
54
+ pre-commit install
55
+ ```
56
+
57
+ We run an assortment of linters: `black`, `ruff`, `mypy`.
app.py CHANGED
@@ -1,16 +1,10 @@
1
- from llama_hub.github_repo import GithubRepositoryReader, GithubClient
2
- from llama_index import download_loader, GPTVectorStoreIndex
3
- from llama_index import LLMPredictor, ServiceContext, LangchainEmbedding
4
- from langchain.llms import AzureOpenAI
5
- from langchain.embeddings.openai import OpenAIEmbeddings
6
- from llama_index import StorageContext, load_index_from_storage
7
- import os
8
- import pickle
9
- import streamlit as st
10
-
11
  import logging
12
  import sys
13
 
 
 
 
 
14
  logging.basicConfig(
15
  stream=sys.stdout, level=logging.DEBUG
16
  ) # logging.DEBUG for more verbose output
@@ -34,60 +28,11 @@ with st.sidebar:
34
 
35
  def main() -> None:
36
  st.header("X-Pipe Wiki 机器人 💬")
37
- # define embedding
38
- embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1))
39
- # define LLM
40
- llm_predictor = LLMPredictor(
41
- llm=AzureOpenAI(
42
- deployment_name="text-davinci-003",
43
- model="text-davinci-003",
44
- client=None,
45
- )
46
- )
47
-
48
- # configure service context
49
- service_context = ServiceContext.from_defaults(
50
- llm_predictor=llm_predictor, embed_model=embedding
51
- )
52
- if os.path.exists("./dataset") and len(os.listdir("./dataset")) != 0:
53
- storage_context = StorageContext.from_defaults(persist_dir="./dataset")
54
- else:
55
- download_loader("GithubRepositoryReader")
56
- docs = None
57
- if os.path.exists("docs/docs.pkl"):
58
- with open("docs/docs.pkl", "rb") as f:
59
- docs = pickle.load(f)
60
-
61
- if docs is None:
62
- github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
63
- loader = GithubRepositoryReader(
64
- github_client,
65
- owner="ctripcorp",
66
- repo="x-pipe",
67
- filter_directories=(
68
- [".", "doc"],
69
- GithubRepositoryReader.FilterType.INCLUDE,
70
- ),
71
- filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
72
- verbose=True,
73
- concurrent_requests=10,
74
- )
75
-
76
- docs = loader.load_data(branch="master")
77
-
78
- with open("docs/docs.pkl", "wb") as f:
79
- pickle.dump(docs, f)
80
-
81
- index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
82
- index.storage_context.persist(persist_dir="./dataset")
83
- storage_context = index.storage_context
84
-
85
- index = load_index_from_storage(storage_context=storage_context)
86
- query_engine = index.as_query_engine(service_context=service_context)
87
-
88
  query = st.text_input("X-Pipe Wiki 问题:")
89
  if query:
90
- response = query_engine.query(query)
91
  st.write(response)
92
 
93
 
 
 
 
 
 
 
 
 
 
 
 
1
  import logging
2
  import sys
3
 
4
+ import streamlit as st
5
+
6
+ from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
7
+
8
  logging.basicConfig(
9
  stream=sys.stdout, level=logging.DEBUG
10
  ) # logging.DEBUG for more verbose output
 
28
 
29
  def main() -> None:
30
  st.header("X-Pipe Wiki 机器人 💬")
31
+ robot_manager = XPipeRobotManagerFactory.get_or_create(XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0)
32
+ robot = robot_manager.get_robot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  query = st.text_input("X-Pipe Wiki 问题:")
34
  if query:
35
+ response = robot.ask(question=query)
36
  st.write(response)
37
 
38
 
core/helper.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.lifecycle import Lifecycle
2
+
3
+
4
+ class LifecycleHelper:
5
+
6
+ @classmethod
7
+ def initialize_if_possible(cls, ls: Lifecycle) -> None:
8
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_initialize(ls.lifecycle_state.phase):
9
+ ls.initialize()
10
+
11
+ @classmethod
12
+ def start_if_possible(cls, ls: Lifecycle) -> None:
13
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_start(ls.lifecycle_state.phase):
14
+ ls.start()
15
+
16
+ @classmethod
17
+ def stop_if_possible(cls, ls: Lifecycle) -> None:
18
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_stop(ls.lifecycle_state.phase):
19
+ ls.stop()
20
+
21
+ @classmethod
22
+ def dispose_if_possible(cls, ls: Lifecycle) -> None:
23
+ if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_dispose(ls.lifecycle_state.phase):
24
+ ls.dispose()
core/lifecycle.py CHANGED
@@ -37,7 +37,6 @@ class LifecycleAware(ABC):
37
  """
38
  self.state = state
39
 
40
- @property
41
  def get_lifecycle_state(self) -> "LifecycleState":
42
  return self.state
43
 
@@ -113,7 +112,7 @@ class LifecycleController(ABC):
113
 
114
  def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
115
  return phase is not None and (
116
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
117
  )
118
 
119
  def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
@@ -121,7 +120,7 @@ class LifecycleController(ABC):
121
 
122
  def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
123
  return phase is not None and (
124
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
125
  )
126
 
127
 
 
37
  """
38
  self.state = state
39
 
 
40
  def get_lifecycle_state(self) -> "LifecycleState":
41
  return self.state
42
 
 
112
 
113
  def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
114
  return phase is not None and (
115
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
116
  )
117
 
118
  def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
 
120
 
121
  def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
122
  return phase is not None and (
123
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
124
  )
125
 
126
 
dataset/docstore.json CHANGED
The diff for this file is too large to render. See raw diff
 
dataset/graph_store.json CHANGED
@@ -1 +1,3 @@
1
- {"graph_dict": {}}
 
 
 
1
+ {
2
+ "graph_dict": {}
3
+ }
dataset/index_store.json CHANGED
@@ -1 +1,8 @@
1
- {"index_store/data": {"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {"__type__": "vector_store", "__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
 
 
 
 
 
 
 
 
1
+ {
2
+ "index_store/data": {
3
+ "7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {
4
+ "__type__": "vector_store",
5
+ "__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"
6
+ }
7
+ }
8
+ }
dataset/vector_store.json CHANGED
The diff for this file is too large to render. See raw diff
 
github_retriever.py DELETED
@@ -1,63 +0,0 @@
1
- from llama_hub.github_repo import GithubRepositoryReader, GithubClient
2
- from llama_index import download_loader, GPTVectorStoreIndex
3
- from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
4
- from langchain.llms import AzureOpenAI
5
- from langchain.embeddings.openai import OpenAIEmbeddings
6
- from llama_index import LangchainEmbedding, ServiceContext
7
- from llama_index import StorageContext, load_index_from_storage
8
- from dotenv import load_dotenv
9
- import os
10
- import pickle
11
-
12
-
13
- def main() -> None:
14
- # define embedding
15
- embedding = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
16
- # define LLM
17
- llm_predictor = LLMPredictor(
18
- llm=AzureOpenAI(
19
- engine="text-davinci-003",
20
- model_name="text-davinci-003",
21
- )
22
- )
23
-
24
- # configure service context
25
- service_context = ServiceContext.from_defaults(
26
- llm_predictor=llm_predictor, embed_model=embedding
27
- )
28
- download_loader("GithubRepositoryReader")
29
- docs = None
30
- if os.path.exists("docs/docs.pkl"):
31
- with open("docs/docs.pkl", "rb") as f:
32
- docs = pickle.load(f)
33
-
34
- if docs is None:
35
- github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
36
- loader = GithubRepositoryReader(
37
- github_client,
38
- owner="ctripcorp",
39
- repo="x-pipe",
40
- filter_directories=(
41
- [".", "doc"],
42
- GithubRepositoryReader.FilterType.INCLUDE,
43
- ),
44
- filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
45
- verbose=True,
46
- concurrent_requests=10,
47
- )
48
-
49
- docs = loader.load_data(branch="master")
50
-
51
- with open("docs/docs.pkl", "wb") as f:
52
- pickle.dump(docs, f)
53
-
54
- index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
55
-
56
- query_engine = index.as_query_engine(service_context=service_context)
57
- response = query_engine.query("如何使用X-Pipe?")
58
- print(response)
59
-
60
-
61
- if __name__ == "__main__":
62
- load_dotenv()
63
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
langchain_manager/manager.py CHANGED
@@ -1,9 +1,9 @@
1
  from abc import abstractmethod, ABC
2
 
 
3
  from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
4
  from langchain_manager.embeddings.openai import OpenAIEmbeddings
5
  from langchain_manager.llms import AzureOpenAI
6
- from langchain_manager.base_language import BaseLanguageModel
7
 
8
  from core.lifecycle import Lifecycle
9
 
 
1
  from abc import abstractmethod, ABC
2
 
3
+ from langchain_manager.base_language import BaseLanguageModel
4
  from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
5
  from langchain_manager.embeddings.openai import OpenAIEmbeddings
6
  from langchain_manager.llms import AzureOpenAI
 
7
 
8
  from core.lifecycle import Lifecycle
9
 
llama/context.py CHANGED
@@ -1,16 +1,26 @@
1
- from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
2
- from type import Optional
 
 
 
3
  from core.lifecycle import Lifecycle
4
- from langchain.manager import BaseLangChainManager
 
 
 
 
 
 
 
5
 
6
 
7
- class ServiceContextManager(Lifecycle):
8
- service_context: Optional[ServiceContext]
 
9
 
10
- def __init__(self, manager: BaseLangChainManager) -> None:
11
  super().__init__()
12
- self.manager = manager
13
- self.service_context = None
14
 
15
  def get_service_context(self) -> ServiceContext:
16
  if self.lifecycle_state.is_started():
@@ -25,37 +35,75 @@ class ServiceContextManager(Lifecycle):
25
 
26
  def do_init(self) -> None:
27
  # define embedding
28
- embedding = LangchainEmbedding(self.manager.get_embedding())
29
  # define LLM
30
- llm_predictor = LLMPredictor(llm=self.manager.get_llm())
31
  # configure service context
32
  self.service_context = ServiceContext.from_defaults(
33
  llm_predictor=llm_predictor, embed_model=embedding
34
  )
35
 
36
  def do_start(self) -> None:
37
- pass
 
 
 
38
 
39
  def do_stop(self) -> None:
40
- pass
 
 
 
41
 
42
  def do_dispose(self) -> None:
 
 
 
 
 
 
 
43
  pass
44
 
45
 
46
- class StorageContextManager(Lifecycle):
47
- def __init__(self, dataset_path: Optional[str] = "./dataset") -> None:
 
 
 
 
48
  super().__init__()
49
  self.dataset_path = dataset_path
 
 
 
 
50
 
51
  def do_init(self) -> None:
52
- pass
 
 
 
 
 
53
 
54
  def do_start(self) -> None:
55
- pass
56
 
57
  def do_stop(self) -> None:
58
- pass
59
 
60
  def do_dispose(self) -> None:
61
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import abstractmethod, ABC
2
+
3
+ from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
4
+ from llama_index import StorageContext
5
+
6
  from core.lifecycle import Lifecycle
7
+ from langchain_manager.manager import BaseLangChainManager
8
+
9
+
10
+ class ServiceContextManager(Lifecycle, ABC):
11
+
12
+ @abstractmethod
13
+ def get_service_context(self) -> ServiceContext:
14
+ pass
15
 
16
 
17
+ class AzureServiceContextManager(ServiceContextManager):
18
+ lc_manager: BaseLangChainManager
19
+ service_context: ServiceContext
20
 
21
+ def __init__(self, lc_manager: BaseLangChainManager):
22
  super().__init__()
23
+ self.lc_manager = lc_manager
 
24
 
25
  def get_service_context(self) -> ServiceContext:
26
  if self.lifecycle_state.is_started():
 
35
 
36
  def do_init(self) -> None:
37
  # define embedding
38
+ embedding = LangchainEmbedding(self.lc_manager.get_embedding())
39
  # define LLM
40
+ llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
41
  # configure service context
42
  self.service_context = ServiceContext.from_defaults(
43
  llm_predictor=llm_predictor, embed_model=embedding
44
  )
45
 
46
  def do_start(self) -> None:
47
+ self.logger.info("[do_start][embedding] last used usage: %d",
48
+ self.service_context.embed_model.total_tokens_used)
49
+ self.logger.info("[do_start][predict] last used usage: %d",
50
+ self.service_context.llm_predictor.total_tokens_used)
51
 
52
  def do_stop(self) -> None:
53
+ self.logger.info("[do_stop][embedding] last used usage: %d",
54
+ self.service_context.embed_model.total_tokens_used)
55
+ self.logger.info("[do_stop][predict] last used usage: %d",
56
+ self.service_context.llm_predictor.total_tokens_used)
57
 
58
  def do_dispose(self) -> None:
59
+ self.logger.info("[do_dispose] total used token: %d", self.service_context.llm_predictor.total_tokens_used)
60
+
61
+
62
+ class StorageContextManager(Lifecycle, ABC):
63
+
64
+ @abstractmethod
65
+ def get_storage_context(self) -> StorageContext:
66
  pass
67
 
68
 
69
+ class LocalStorageContextManager(StorageContextManager):
70
+ storage_context: StorageContext
71
+
72
+ def __init__(self,
73
+ dataset_path: str = "./dataset",
74
+ service_context_manager: ServiceContextManager = None) -> None:
75
  super().__init__()
76
  self.dataset_path = dataset_path
77
+ self.service_context_manager = service_context_manager
78
+
79
+ def get_storage_context(self) -> StorageContext:
80
+ return self.storage_context
81
 
82
  def do_init(self) -> None:
83
+ from llama.utils import is_local_storage_files_ready
84
+ if is_local_storage_files_ready(self.dataset_path):
85
+ self.storage_context = StorageContext.from_defaults(persist_dir=self.dataset_path)
86
+ else:
87
+ docs = self._download()
88
+ self._indexing(docs)
89
 
90
  def do_start(self) -> None:
91
+ self.logger.info("[do_start]%", **self.storage_context.to_dict())
92
 
93
  def do_stop(self) -> None:
94
+ self.logger.info("[do_stop]%", **self.storage_context.to_dict())
95
 
96
  def do_dispose(self) -> None:
97
+ self.storage_context.persist(self.dataset_path)
98
+
99
+ def _download(self) -> [Document]:
100
+ from llama.data_loader import GithubLoader
101
+ loader = GithubLoader()
102
+ return loader.load()
103
+
104
+ def _indexing(self, docs: [Document]):
105
+ from llama_index import GPTVectorStoreIndex
106
+ index = GPTVectorStoreIndex.from_documents(docs,
107
+ service_context=self.service_context_manager.get_service_context())
108
+ index.storage_context.persist(persist_dir=self.dataset_path)
109
+ self.storage_context = index.storage_context
llama/data_loader.py CHANGED
@@ -1,27 +1,25 @@
1
  import os
2
  import pickle
3
  from abc import abstractmethod, ABC
4
- from typing import Optional, Sequence, List
5
 
6
  from llama_hub.github_repo import GithubRepositoryReader, GithubClient
7
  from llama_index import download_loader
8
  from llama_index.readers.schema.base import Document
9
 
10
- from core.lifecycle import Lifecycle
11
-
12
 
13
  class WikiLoader(ABC):
14
  @abstractmethod
15
- def load(self) -> List[Document]:
16
  pass
17
 
18
 
19
- class GithubLoader(WikiLoader, Lifecycle):
20
  def __init__(
21
- self,
22
- github_owner: Optional[str] = None,
23
- repo: Optional[str] = None,
24
- dirs: Optional[Sequence[str]] = None,
25
  ):
26
  super().__init__()
27
  self.owner = (
@@ -30,7 +28,7 @@ class GithubLoader(WikiLoader, Lifecycle):
30
  self.repo = repo if repo is not None else os.environ["GITHUB_REPO"]
31
  self.dirs = dirs if dirs is not None else [".", "doc"]
32
 
33
- def load(self) -> List[Document]:
34
  download_loader("GithubRepositoryReader")
35
  docs = None
36
  if os.path.exists("docs/docs.pkl"):
 
1
  import os
2
  import pickle
3
  from abc import abstractmethod, ABC
4
+ from typing import Optional, Sequence
5
 
6
  from llama_hub.github_repo import GithubRepositoryReader, GithubClient
7
  from llama_index import download_loader
8
  from llama_index.readers.schema.base import Document
9
 
 
 
10
 
11
  class WikiLoader(ABC):
12
  @abstractmethod
13
+ def load(self) -> [Document]:
14
  pass
15
 
16
 
17
+ class GithubLoader(WikiLoader):
18
  def __init__(
19
+ self,
20
+ github_owner: Optional[str] = None,
21
+ repo: Optional[str] = None,
22
+ dirs: Optional[Sequence[str]] = None,
23
  ):
24
  super().__init__()
25
  self.owner = (
 
28
  self.repo = repo if repo is not None else os.environ["GITHUB_REPO"]
29
  self.dirs = dirs if dirs is not None else [".", "doc"]
30
 
31
+ def load(self) -> [Document]:
32
  download_loader("GithubRepositoryReader")
33
  docs = None
34
  if os.path.exists("docs/docs.pkl"):
llama/index.py DELETED
@@ -1,18 +0,0 @@
1
- from core.lifecycle import Lifecycle
2
- from llama.context import ServiceContextManager
3
- from llama_index.indices.vector_store import VectorStoreIndex
4
- from typing import Optional
5
-
6
-
7
- class IndexManager(Lifecycle):
8
- index: Optional[VectorStoreIndex]
9
-
10
- def __init__(self, context_manager: ServiceContextManager) -> None:
11
- super().__init__()
12
- self.index = None
13
- self.context_manager = context_manager
14
-
15
- def get_index(self) -> Optional[VectorStoreIndex]:
16
- if not self.lifecycle_state.is_started():
17
- raise Exception("Lifecycle state is not correct")
18
- return self.index
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama/utils.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+
3
+
4
+ def is_local_storage_files_ready(persist_dir: str) -> bool:
5
+ return os.path.exists(persist_dir) and len(os.listdir(persist_dir)) != 0
llama/vector_storage.py DELETED
@@ -1,18 +0,0 @@
1
- from core.lifecycle import Lifecycle
2
-
3
-
4
- class VectorStorageManager(Lifecycle):
5
- def __init__(self) -> None:
6
- super().__init__()
7
-
8
- def do_init(self) -> None:
9
- pass
10
-
11
- def do_start(self) -> None:
12
- pass
13
-
14
- def do_stop(self) -> None:
15
- pass
16
-
17
- def do_dispose(self) -> None:
18
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,7 +1,6 @@
1
- llama_index
2
  llama_hub
3
- langchain
4
- dotenv
5
  ruff
6
  black
7
  mypy
 
1
+ llama_index>=0.6.3
2
  llama_hub
3
+
 
4
  ruff
5
  black
6
  mypy
xpipe_wiki/__init__.py ADDED
File without changes
xpipe_wiki/manager_factory.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import enum
2
+ import os
3
+
4
+ from core.helper import LifecycleHelper
5
+ from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
6
+
7
+
8
+ class XPipeRobotRevision(enum.Enum):
9
+ SIMPLE_OPENAI_VERSION_0 = 1
10
+
11
+
12
+ CAPABLE = dict[XPipeRobotRevision, XPipeWikiRobotManager]
13
+
14
+
15
+ class XPipeRobotManagerFactory:
16
+
17
+ @classmethod
18
+ def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
19
+ if CAPABLE.get(revision) is not None:
20
+ return CAPABLE[revision]
21
+ if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
22
+ manager = cls.create_simple_openai_version_0()
23
+ CAPABLE[revision] = manager
24
+ return manager
25
+
26
+ @classmethod
27
+ def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
28
+
29
+ from llama.context import AzureServiceContextManager
30
+ from langchain_manager.manager import LangChainAzureManager
31
+ service_context_manager = AzureServiceContextManager(lc_manager=LangChainAzureManager())
32
+
33
+ from llama.context import LocalStorageContextManager
34
+ dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
35
+ storage_context_manager = LocalStorageContextManager(dataset_path=dataset_path,
36
+ service_context_manager=service_context_manager)
37
+
38
+ robot_manager = AzureXPipeWikiRobotManager(service_context_manager=service_context_manager,
39
+ storage_context_manager=storage_context_manager)
40
+ LifecycleHelper.initialize_if_possible(robot_manager)
41
+ LifecycleHelper.start_if_possible(robot_manager)
42
+ return robot_manager
xpipe_wiki/robot_manager.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from typing import Any
3
+
4
+ from llama_index import load_index_from_storage
5
+ from llama_index.indices.query.base import BaseQueryEngine
6
+ from pydantic import dataclasses
7
+
8
+ from core.helper import LifecycleHelper
9
+ from core.lifecycle import Lifecycle
10
+ from llama.context import ServiceContextManager, StorageContextManager
11
+
12
+
13
+ class XPipeWikiRobot(ABC):
14
+ @abstractmethod
15
+ def ask(self, question: str) -> Any:
16
+ pass
17
+
18
+
19
+ @dataclasses
20
+ class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
21
+ query_engine: BaseQueryEngine
22
+
23
+ def ask(self, question: str) -> Any:
24
+ return self.query_engine.query(question)
25
+
26
+
27
+ class XPipeWikiRobotManager(Lifecycle):
28
+
29
+ @abstractmethod
30
+ def get_robot(self) -> XPipeWikiRobot:
31
+ pass
32
+
33
+
34
+ @dataclasses
35
+ class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
36
+ service_context_manager: ServiceContextManager
37
+ storage_context_manager: StorageContextManager
38
+
39
+ def get_robot(self) -> XPipeWikiRobot:
40
+ index = load_index_from_storage(storage_context=self.storage_context_manager.get_storage_context())
41
+ query_engine = index.as_query_engine(service_context=self.service_context_manager.get_service_context())
42
+ return AzureOpenAIXPipeWikiRobot(query_engine)
43
+
44
+ def do_init(self) -> None:
45
+ LifecycleHelper.initialize_if_possible(self.service_context_manager)
46
+ LifecycleHelper.initialize_if_possible(self.storage_context_manager)
47
+
48
+ def do_start(self) -> None:
49
+ LifecycleHelper.start_if_possible(self.service_context_manager)
50
+ LifecycleHelper.start_if_possible(self.storage_context_manager)
51
+
52
+ def do_stop(self) -> None:
53
+ LifecycleHelper.stop_if_possible(self.storage_context_manager)
54
+ LifecycleHelper.stop_if_possible(self.service_context_manager)
55
+
56
+ def do_dispose(self) -> None:
57
+ LifecycleHelper.dispose_if_possible(self.storage_context_manager)
58
+ LifecycleHelper.dispose_if_possible(self.service_context_manager)