Spaces:

X-Pipe
/

flash

Sleeping

App Files Files Community

Nick Chen Zhu commited on Jun 25, 2023

Commit

257bc0d

1 Parent(s): a03e89c

first edition

Browse files

Files changed (19) hide show

README.md +44 -0
app.py +7 -62
core/helper.py +24 -0
core/lifecycle.py +2 -3
dataset/docstore.json +0 -0
dataset/graph_store.json +3 -1
dataset/index_store.json +8 -1
dataset/vector_store.json +0 -0
github_retriever.py +0 -63
langchain_manager/manager.py +1 -1
llama/context.py +66 -18
llama/data_loader.py +8 -10
llama/index.py +0 -18
llama/utils.py +5 -0
llama/vector_storage.py +0 -18
requirements.txt +2 -3
xpipe_wiki/__init__.py +0 -0
xpipe_wiki/manager_factory.py +42 -0
xpipe_wiki/robot_manager.py +58 -0

README.md CHANGED Viewed

@@ -11,3 +11,47 @@ license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+Then, create a new Python virtual environment. The command below creates an environment in `.venv`,
+and activates it:
+```bash
+python -m venv .venv
+source .venv/bin/activate
+```
+if you are in windows, use the following to activate your virtual environment:
+```bash
+.venv\scripts\activate
+```
+Install the required dependencies (this will also install gpt-index through `pip install -e .`
+so that you can start developing on it):
+```bash
+pip install -r requirements.txt
+```
+Now you should be set!
+### Validating your Change
+Let's make sure to `format/lint` our change. For bigger changes,
+let's also make sure to `test` it and perhaps create an `example notebook`.
+#### Formatting/Linting
+You can format and lint your changes with the following commands in the root directory:
+```bash
+make format; make lint
+```
+You can also make use of our pre-commit hooks by setting up git hook scripts:
+```bash
+pre-commit install
+```
+We run an assortment of linters: `black`, `ruff`, `mypy`.

app.py CHANGED Viewed

@@ -1,16 +1,10 @@
-from llama_hub.github_repo import GithubRepositoryReader, GithubClient
-from llama_index import download_loader, GPTVectorStoreIndex
-from llama_index import LLMPredictor, ServiceContext, LangchainEmbedding
-from langchain.llms import AzureOpenAI
-from langchain.embeddings.openai import OpenAIEmbeddings
-from llama_index import StorageContext, load_index_from_storage
-import os
-import pickle
-import streamlit as st
 import logging
 import sys
 logging.basicConfig(
     stream=sys.stdout, level=logging.DEBUG
 )  # logging.DEBUG for more verbose output
@@ -34,60 +28,11 @@ with st.sidebar:
 def main() -> None:
     st.header("X-Pipe Wiki 机器人 💬")
-    # define embedding
-    embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1))
-    # define LLM
-    llm_predictor = LLMPredictor(
-        llm=AzureOpenAI(
-            deployment_name="text-davinci-003",
-            model="text-davinci-003",
-            client=None,
-        )
-    )
-    # configure service context
-    service_context = ServiceContext.from_defaults(
-        llm_predictor=llm_predictor, embed_model=embedding
-    )
-    if os.path.exists("./dataset") and len(os.listdir("./dataset")) != 0:
-        storage_context = StorageContext.from_defaults(persist_dir="./dataset")
-    else:
-        download_loader("GithubRepositoryReader")
-        docs = None
-        if os.path.exists("docs/docs.pkl"):
-            with open("docs/docs.pkl", "rb") as f:
-                docs = pickle.load(f)
-        if docs is None:
-            github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
-            loader = GithubRepositoryReader(
-                github_client,
-                owner="ctripcorp",
-                repo="x-pipe",
-                filter_directories=(
-                    [".", "doc"],
-                    GithubRepositoryReader.FilterType.INCLUDE,
-                ),
-                filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
-                verbose=True,
-                concurrent_requests=10,
-            )
-            docs = loader.load_data(branch="master")
-            with open("docs/docs.pkl", "wb") as f:
-                pickle.dump(docs, f)
-        index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
-        index.storage_context.persist(persist_dir="./dataset")
-        storage_context = index.storage_context
-    index = load_index_from_storage(storage_context=storage_context)
-    query_engine = index.as_query_engine(service_context=service_context)
     query = st.text_input("X-Pipe Wiki 问题:")
     if query:
-        response = query_engine.query(query)
         st.write(response)

 import logging
 import sys
+import streamlit as st
+from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
 logging.basicConfig(
     stream=sys.stdout, level=logging.DEBUG
 )  # logging.DEBUG for more verbose output
 def main() -> None:
     st.header("X-Pipe Wiki 机器人 💬")
+    robot_manager = XPipeRobotManagerFactory.get_or_create(XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0)
+    robot = robot_manager.get_robot()
     query = st.text_input("X-Pipe Wiki 问题:")
     if query:
+        response = robot.ask(question=query)
         st.write(response)

core/helper.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from core.lifecycle import Lifecycle
+class LifecycleHelper:
+    @classmethod
+    def initialize_if_possible(cls, ls: Lifecycle) -> None:
+        if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_initialize(ls.lifecycle_state.phase):
+            ls.initialize()
+    @classmethod
+    def start_if_possible(cls, ls: Lifecycle) -> None:
+        if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_start(ls.lifecycle_state.phase):
+            ls.start()
+    @classmethod
+    def stop_if_possible(cls, ls: Lifecycle) -> None:
+        if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_stop(ls.lifecycle_state.phase):
+            ls.stop()
+    @classmethod
+    def dispose_if_possible(cls, ls: Lifecycle) -> None:
+        if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_dispose(ls.lifecycle_state.phase):
+            ls.dispose()

core/lifecycle.py CHANGED Viewed

@@ -37,7 +37,6 @@ class LifecycleAware(ABC):
         """
         self.state = state
-    @property
     def get_lifecycle_state(self) -> "LifecycleState":
         return self.state
@@ -113,7 +112,7 @@ class LifecycleController(ABC):
     def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
         return phase is not None and (
-            phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
         )
     def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
@@ -121,7 +120,7 @@ class LifecycleController(ABC):
     def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
         return phase is not None and (
-            phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
         )

         """
         self.state = state
     def get_lifecycle_state(self) -> "LifecycleState":
         return self.state
     def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
         return phase is not None and (
+                phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
         )
     def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
     def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
         return phase is not None and (
+                phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
         )

dataset/docstore.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

dataset/graph_store.json CHANGED Viewed

	@@ -1 +1,3 @@
1	- {~~"graph_dict": {}}~~

+{
+  "graph_dict": {}
+}

dataset/index_store.json CHANGED Viewed

	@@ -1 +1,8 @@
1	- {"index_store/data": {"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {"__type__": "vector_store", "__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}

+{
+  "index_store/data": {
+    "7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {
+      "__type__": "vector_store",
+      "__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"
+    }
+  }
+}

dataset/vector_store.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

github_retriever.py DELETED Viewed

@@ -1,63 +0,0 @@
-from llama_hub.github_repo import GithubRepositoryReader, GithubClient
-from llama_index import download_loader, GPTVectorStoreIndex
-from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
-from langchain.llms import AzureOpenAI
-from langchain.embeddings.openai import OpenAIEmbeddings
-from llama_index import LangchainEmbedding, ServiceContext
-from llama_index import StorageContext, load_index_from_storage
-from dotenv import load_dotenv
-import os
-import pickle
-def main() -> None:
-    # define embedding
-    embedding = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
-    # define LLM
-    llm_predictor = LLMPredictor(
-        llm=AzureOpenAI(
-            engine="text-davinci-003",
-            model_name="text-davinci-003",
-        )
-    )
-    # configure service context
-    service_context = ServiceContext.from_defaults(
-        llm_predictor=llm_predictor, embed_model=embedding
-    )
-    download_loader("GithubRepositoryReader")
-    docs = None
-    if os.path.exists("docs/docs.pkl"):
-        with open("docs/docs.pkl", "rb") as f:
-            docs = pickle.load(f)
-    if docs is None:
-        github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
-        loader = GithubRepositoryReader(
-            github_client,
-            owner="ctripcorp",
-            repo="x-pipe",
-            filter_directories=(
-                [".", "doc"],
-                GithubRepositoryReader.FilterType.INCLUDE,
-            ),
-            filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
-            verbose=True,
-            concurrent_requests=10,
-        )
-        docs = loader.load_data(branch="master")
-        with open("docs/docs.pkl", "wb") as f:
-            pickle.dump(docs, f)
-    index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
-    query_engine = index.as_query_engine(service_context=service_context)
-    response = query_engine.query("如何使用X-Pipe?")
-    print(response)
-if __name__ == "__main__":
-    load_dotenv()
-    main()

langchain_manager/manager.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from abc import abstractmethod, ABC
 from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
 from langchain_manager.embeddings.openai import OpenAIEmbeddings
 from langchain_manager.llms import AzureOpenAI
-from langchain_manager.base_language import BaseLanguageModel
 from core.lifecycle import Lifecycle

 from abc import abstractmethod, ABC
+from langchain_manager.base_language import BaseLanguageModel
 from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
 from langchain_manager.embeddings.openai import OpenAIEmbeddings
 from langchain_manager.llms import AzureOpenAI
 from core.lifecycle import Lifecycle

llama/context.py CHANGED Viewed

@@ -1,16 +1,26 @@
-from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
-from type import Optional
 from core.lifecycle import Lifecycle
-from langchain.manager import BaseLangChainManager
-class ServiceContextManager(Lifecycle):
-    service_context: Optional[ServiceContext]
-    def __init__(self, manager: BaseLangChainManager) -> None:
         super().__init__()
-        self.manager = manager
-        self.service_context = None
     def get_service_context(self) -> ServiceContext:
         if self.lifecycle_state.is_started():
@@ -25,37 +35,75 @@ class ServiceContextManager(Lifecycle):
     def do_init(self) -> None:
         # define embedding
-        embedding = LangchainEmbedding(self.manager.get_embedding())
         # define LLM
-        llm_predictor = LLMPredictor(llm=self.manager.get_llm())
         # configure service context
         self.service_context = ServiceContext.from_defaults(
             llm_predictor=llm_predictor, embed_model=embedding
         )
     def do_start(self) -> None:
-        pass
     def do_stop(self) -> None:
-        pass
     def do_dispose(self) -> None:
         pass
-class StorageContextManager(Lifecycle):
-    def __init__(self, dataset_path: Optional[str] = "./dataset") -> None:
         super().__init__()
         self.dataset_path = dataset_path
     def do_init(self) -> None:
-        pass
     def do_start(self) -> None:
-        pass
     def do_stop(self) -> None:
-        pass
     def do_dispose(self) -> None:
-        pass

+from abc import abstractmethod, ABC
+from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
+from llama_index import StorageContext
 from core.lifecycle import Lifecycle
+from langchain_manager.manager import BaseLangChainManager
+class ServiceContextManager(Lifecycle, ABC):
+    @abstractmethod
+    def get_service_context(self) -> ServiceContext:
+        pass
+class AzureServiceContextManager(ServiceContextManager):
+    lc_manager: BaseLangChainManager
+    service_context: ServiceContext
+    def __init__(self, lc_manager: BaseLangChainManager):
         super().__init__()
+        self.lc_manager = lc_manager
     def get_service_context(self) -> ServiceContext:
         if self.lifecycle_state.is_started():
     def do_init(self) -> None:
         # define embedding
+        embedding = LangchainEmbedding(self.lc_manager.get_embedding())
         # define LLM
+        llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
         # configure service context
         self.service_context = ServiceContext.from_defaults(
             llm_predictor=llm_predictor, embed_model=embedding
         )
     def do_start(self) -> None:
+        self.logger.info("[do_start][embedding] last used usage: %d",
+                         self.service_context.embed_model.total_tokens_used)
+        self.logger.info("[do_start][predict] last used usage: %d",
+                         self.service_context.llm_predictor.total_tokens_used)
     def do_stop(self) -> None:
+        self.logger.info("[do_stop][embedding] last used usage: %d",
+                         self.service_context.embed_model.total_tokens_used)
+        self.logger.info("[do_stop][predict] last used usage: %d",
+                         self.service_context.llm_predictor.total_tokens_used)
     def do_dispose(self) -> None:
+        self.logger.info("[do_dispose] total used token: %d", self.service_context.llm_predictor.total_tokens_used)
+class StorageContextManager(Lifecycle, ABC):
+    @abstractmethod
+    def get_storage_context(self) -> StorageContext:
         pass
+class LocalStorageContextManager(StorageContextManager):
+    storage_context: StorageContext
+    def __init__(self,
+                 dataset_path: str = "./dataset",
+                 service_context_manager: ServiceContextManager = None) -> None:
         super().__init__()
         self.dataset_path = dataset_path
+        self.service_context_manager = service_context_manager
+    def get_storage_context(self) -> StorageContext:
+        return self.storage_context
     def do_init(self) -> None:
+        from llama.utils import is_local_storage_files_ready
+        if is_local_storage_files_ready(self.dataset_path):
+            self.storage_context = StorageContext.from_defaults(persist_dir=self.dataset_path)
+        else:
+            docs = self._download()
+            self._indexing(docs)
     def do_start(self) -> None:
+        self.logger.info("[do_start]%", **self.storage_context.to_dict())
     def do_stop(self) -> None:
+        self.logger.info("[do_stop]%", **self.storage_context.to_dict())
     def do_dispose(self) -> None:
+        self.storage_context.persist(self.dataset_path)
+    def _download(self) -> [Document]:
+        from llama.data_loader import GithubLoader
+        loader = GithubLoader()
+        return loader.load()
+    def _indexing(self, docs: [Document]):
+        from llama_index import GPTVectorStoreIndex
+        index = GPTVectorStoreIndex.from_documents(docs,
+                                                   service_context=self.service_context_manager.get_service_context())
+        index.storage_context.persist(persist_dir=self.dataset_path)
+        self.storage_context = index.storage_context

llama/data_loader.py CHANGED Viewed

@@ -1,27 +1,25 @@
 import os
 import pickle
 from abc import abstractmethod, ABC
-from typing import Optional, Sequence, List
 from llama_hub.github_repo import GithubRepositoryReader, GithubClient
 from llama_index import download_loader
 from llama_index.readers.schema.base import Document
-from core.lifecycle import Lifecycle
 class WikiLoader(ABC):
     @abstractmethod
-    def load(self) -> List[Document]:
         pass
-class GithubLoader(WikiLoader, Lifecycle):
     def __init__(
-        self,
-        github_owner: Optional[str] = None,
-        repo: Optional[str] = None,
-        dirs: Optional[Sequence[str]] = None,
     ):
         super().__init__()
         self.owner = (
@@ -30,7 +28,7 @@ class GithubLoader(WikiLoader, Lifecycle):
         self.repo = repo if repo is not None else os.environ["GITHUB_REPO"]
         self.dirs = dirs if dirs is not None else [".", "doc"]
-    def load(self) -> List[Document]:
         download_loader("GithubRepositoryReader")
         docs = None
         if os.path.exists("docs/docs.pkl"):

 import os
 import pickle
 from abc import abstractmethod, ABC
+from typing import Optional, Sequence
 from llama_hub.github_repo import GithubRepositoryReader, GithubClient
 from llama_index import download_loader
 from llama_index.readers.schema.base import Document
 class WikiLoader(ABC):
     @abstractmethod
+    def load(self) -> [Document]:
         pass
+class GithubLoader(WikiLoader):
     def __init__(
+            self,
+            github_owner: Optional[str] = None,
+            repo: Optional[str] = None,
+            dirs: Optional[Sequence[str]] = None,
     ):
         super().__init__()
         self.owner = (
         self.repo = repo if repo is not None else os.environ["GITHUB_REPO"]
         self.dirs = dirs if dirs is not None else [".", "doc"]
+    def load(self) -> [Document]:
         download_loader("GithubRepositoryReader")
         docs = None
         if os.path.exists("docs/docs.pkl"):

llama/index.py DELETED Viewed

@@ -1,18 +0,0 @@
-from core.lifecycle import Lifecycle
-from llama.context import ServiceContextManager
-from llama_index.indices.vector_store import VectorStoreIndex
-from typing import Optional
-class IndexManager(Lifecycle):
-    index: Optional[VectorStoreIndex]
-    def __init__(self, context_manager: ServiceContextManager) -> None:
-        super().__init__()
-        self.index = None
-        self.context_manager = context_manager
-    def get_index(self) -> Optional[VectorStoreIndex]:
-        if not self.lifecycle_state.is_started():
-            raise Exception("Lifecycle state is not correct")
-        return self.index

llama/utils.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import os
+def is_local_storage_files_ready(persist_dir: str) -> bool:
+    return os.path.exists(persist_dir) and len(os.listdir(persist_dir)) != 0

llama/vector_storage.py DELETED Viewed

@@ -1,18 +0,0 @@
-from core.lifecycle import Lifecycle
-class VectorStorageManager(Lifecycle):
-    def __init__(self) -> None:
-        super().__init__()
-    def do_init(self) -> None:
-        pass
-    def do_start(self) -> None:
-        pass
-    def do_stop(self) -> None:
-        pass
-    def do_dispose(self) -> None:
-        pass

requirements.txt CHANGED Viewed

@@ -1,7 +1,6 @@
-llama_index
 llama_hub
-langchain
-dotenv
 ruff
 black
 mypy

+llama_index>=0.6.3
 llama_hub
 ruff
 black
 mypy

xpipe_wiki/__init__.py ADDED Viewed

File without changes

xpipe_wiki/manager_factory.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import enum
+import os
+from core.helper import LifecycleHelper
+from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
+class XPipeRobotRevision(enum.Enum):
+    SIMPLE_OPENAI_VERSION_0 = 1
+CAPABLE = dict[XPipeRobotRevision, XPipeWikiRobotManager]
+class XPipeRobotManagerFactory:
+    @classmethod
+    def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
+        if CAPABLE.get(revision) is not None:
+            return CAPABLE[revision]
+        if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
+            manager = cls.create_simple_openai_version_0()
+        CAPABLE[revision] = manager
+        return manager
+    @classmethod
+    def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
+        from llama.context import AzureServiceContextManager
+        from langchain_manager.manager import LangChainAzureManager
+        service_context_manager = AzureServiceContextManager(lc_manager=LangChainAzureManager())
+        from llama.context import LocalStorageContextManager
+        dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
+        storage_context_manager = LocalStorageContextManager(dataset_path=dataset_path,
+                                                             service_context_manager=service_context_manager)
+        robot_manager = AzureXPipeWikiRobotManager(service_context_manager=service_context_manager,
+                                                   storage_context_manager=storage_context_manager)
+        LifecycleHelper.initialize_if_possible(robot_manager)
+        LifecycleHelper.start_if_possible(robot_manager)
+        return robot_manager

xpipe_wiki/robot_manager.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from abc import ABC, abstractmethod
+from typing import Any
+from llama_index import load_index_from_storage
+from llama_index.indices.query.base import BaseQueryEngine
+from pydantic import dataclasses
+from core.helper import LifecycleHelper
+from core.lifecycle import Lifecycle
+from llama.context import ServiceContextManager, StorageContextManager
+class XPipeWikiRobot(ABC):
+    @abstractmethod
+    def ask(self, question: str) -> Any:
+        pass
+@dataclasses
+class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
+    query_engine: BaseQueryEngine
+    def ask(self, question: str) -> Any:
+        return self.query_engine.query(question)
+class XPipeWikiRobotManager(Lifecycle):
+    @abstractmethod
+    def get_robot(self) -> XPipeWikiRobot:
+        pass
+@dataclasses
+class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
+    service_context_manager: ServiceContextManager
+    storage_context_manager: StorageContextManager
+    def get_robot(self) -> XPipeWikiRobot:
+        index = load_index_from_storage(storage_context=self.storage_context_manager.get_storage_context())
+        query_engine = index.as_query_engine(service_context=self.service_context_manager.get_service_context())
+        return AzureOpenAIXPipeWikiRobot(query_engine)
+    def do_init(self) -> None:
+        LifecycleHelper.initialize_if_possible(self.service_context_manager)
+        LifecycleHelper.initialize_if_possible(self.storage_context_manager)
+    def do_start(self) -> None:
+        LifecycleHelper.start_if_possible(self.service_context_manager)
+        LifecycleHelper.start_if_possible(self.storage_context_manager)
+    def do_stop(self) -> None:
+        LifecycleHelper.stop_if_possible(self.storage_context_manager)
+        LifecycleHelper.stop_if_possible(self.service_context_manager)
+    def do_dispose(self) -> None:
+        LifecycleHelper.dispose_if_possible(self.storage_context_manager)
+        LifecycleHelper.dispose_if_possible(self.service_context_manager)