Spaces:

X-Pipe
/

flash

Sleeping

App Files Files Community

refactor python directory name and add dataset to decrease vector calculating

by NickNYU - opened Jun 25, 2023

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+503

-782

Files changed (37) hide show

.gitattributes +35 -0
.gitignore +0 -1
.idea/.gitignore +0 -8
.idea/inspectionProfiles/profiles_settings.xml +0 -6
.idea/llama-xpipe.iml +0 -11
.idea/misc.xml +0 -4
.idea/modules.xml +0 -8
.idea/vcs.xml +0 -6
Makefile +1 -1
README.md +0 -44
app.py +94 -45
core/__pycache__/__init__.cpython-310.pyc +0 -0
core/__pycache__/lifecycle.cpython-310.pyc +0 -0
core/__pycache__/logger_factory.cpython-310.pyc +0 -0
core/helper.py +0 -31
core/lifecycle.py +185 -184
core/test_lifecycle.py +3 -0
dataset/docstore.json +0 -0
dataset/index_store.json +1 -1
dataset/vector_store.json +0 -0
docs/docs.pkl +0 -0
github_retriever.py +63 -0
langchain_manager/__pycache__/__init__.cpython-310.pyc +0 -0
langchain_manager/manager.py +9 -40
llama/context.py +61 -0
llama/data_loader.py +4 -3
llama/index.py +18 -0
llama/service_context.py +0 -142
llama/storage_context.py +0 -67
llama/utils.py +0 -5
llama/vector_storage.py +18 -0
local-requirements.txt +0 -1
pyproject.toml +2 -1
requirements.txt +7 -10
xpipe_wiki/__init__.py +0 -0
xpipe_wiki/manager_factory.py +0 -82
xpipe_wiki/robot_manager.py +0 -79

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -56,7 +56,6 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 .ruff_cache
-wandb/
 # Translations
 *.mo

 .hypothesis/
 .pytest_cache/
 .ruff_cache
 # Translations
 *.mo

.idea/.gitignore DELETED Viewed

@@ -1,8 +0,0 @@
-# Default ignored files
-/shelf/
-/workspace.xml
-# Editor-based HTTP Client requests
-/httpRequests/
-# Datasource local storage ignored files
-/dataSources/
-/dataSources.local.xml

.idea/inspectionProfiles/profiles_settings.xml DELETED Viewed

@@ -1,6 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <settings>
-    <option name="USE_PROJECT_PROFILE" value="false" />
-    <version value="1.0" />
-  </settings>
-</component>

.idea/llama-xpipe.iml DELETED Viewed

@@ -1,11 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module type="PYTHON_MODULE" version="4">
-  <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$">
-      <excludeFolder url="file://$MODULE_DIR$/.venv" />
-      <excludeFolder url="file://$MODULE_DIR$/venv" />
-    </content>
-    <orderEntry type="inheritedJdk" />
-    <orderEntry type="sourceFolder" forTests="false" />
-  </component>
-</module>

.idea/misc.xml DELETED Viewed

@@ -1,4 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (llama-xpipe)" project-jdk-type="Python SDK" />
-</project>

.idea/modules.xml DELETED Viewed

@@ -1,8 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="ProjectModuleManager">
-    <modules>
-      <module fileurl="file://$PROJECT_DIR$/.idea/llama-xpipe.iml" filepath="$PROJECT_DIR$/.idea/llama-xpipe.iml" />
-    </modules>
-  </component>
-</project>

.idea/vcs.xml DELETED Viewed

@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="" vcs="Git" />
-  </component>
-</project>

Makefile CHANGED Viewed

@@ -8,7 +8,7 @@ format:
 lint:
 	mypy .
 	black . --check
-	ruff check . --fix
 test:
 	pytest tests

 lint:
 	mypy .
 	black . --check
+	ruff check .
 test:
 	pytest tests

README.md CHANGED Viewed

@@ -11,47 +11,3 @@ license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
-Then, create a new Python virtual environment. The command below creates an environment in `.venv`,
-and activates it:
-```bash
-python -m venv .venv
-source .venv/bin/activate
-```
-if you are in windows, use the following to activate your virtual environment:
-```bash
-.venv\scripts\activate
-```
-Install the required dependencies (this will also install gpt-index through `pip install -e .`
-so that you can start developing on it):
-```bash
-pip install -r requirements.txt
-```
-Now you should be set!
-### Validating your Change
-Let's make sure to `format/lint` our change. For bigger changes,
-let's also make sure to `test` it and perhaps create an `example notebook`.
-#### Formatting/Linting
-You can format and lint your changes with the following commands in the root directory:
-```bash
-make format; make lint
-```
-You can also make use of our pre-commit hooks by setting up git hook scripts:
-```bash
-pre-commit install
-```
-We run an assortment of linters: `black`, `ruff`, `mypy`.


11	---
12
13	Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,45 +1,94 @@
-import logging
-import sys
-import streamlit as st
-from dotenv import load_dotenv
-from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
-logging.basicConfig(
-    stream=sys.stdout, level=logging.INFO
-)  # logging.DEBUG for more verbose output
-# logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
-# # Sidebar contents
-with st.sidebar:
-    st.title("🤗💬 LLM Chat App")
-    st.markdown(
-        """
-    ## About
-    This app is an LLM-powered chatbot built using:
-    - [Streamlit](https://streamlit.io/)
-    - [LangChain](https://python.langchain.com/)
-    - [X-Pipe](https://github.com/ctripcorp/x-pipe)
-    """
-    )
-    # add_vertical_space(5)
-    st.write("Made by Nick")
-def main() -> None:
-    st.header("X-Pipe Wiki 机器人 💬")
-    robot_manager = XPipeRobotManagerFactory.get_or_create(
-        XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0
-    )
-    robot = robot_manager.get_robot()
-    query = st.text_input("X-Pipe Wiki 问题:")
-    if query:
-        response = robot.ask(question=query)
-        st.write(response)
-if __name__ == "__main__":
-    load_dotenv()
-    main()

+from llama_hub.github_repo import GithubRepositoryReader, GithubClient
+from llama_index import download_loader, GPTVectorStoreIndex
+from llama_index import LLMPredictor, ServiceContext, LangchainEmbedding
+from langchain.llms import AzureOpenAI
+from langchain.embeddings.openai import OpenAIEmbeddings
+import os
+import pickle
+import streamlit as st
+import logging
+import sys
+logging.basicConfig(
+    stream=sys.stdout, level=logging.DEBUG
+)  # logging.DEBUG for more verbose output
+logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
+# Sidebar contents
+with st.sidebar:
+    st.title("🤗💬 LLM Chat App")
+    st.markdown(
+        """
+    ## About
+    This app is an LLM-powered chatbot built using:
+    - [Streamlit](https://streamlit.io/)
+    - [LangChain](https://python.langchain.com/)
+    - [X-Pipe](https://github.com/ctripcorp/x-pipe)
+    """
+    )
+    # add_vertical_space(5)
+    st.write("Made by Nick")
+def main() -> None:
+    st.header("X-Pipe Wiki 机器人 💬")
+    # define embedding
+    embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1))
+    # define LLM
+    llm_predictor = LLMPredictor(
+        llm=AzureOpenAI(
+            deployment_name="text-davinci-003",
+            model="text-davinci-003",
+            client=None,
+        )
+    )
+    # configure service context
+    service_context = ServiceContext.from_defaults(
+        llm_predictor=llm_predictor, embed_model=embedding
+    )
+    download_loader("GithubRepositoryReader")
+    docs = None
+    if os.path.exists("docs/docs.pkl"):
+        with open("docs/docs.pkl", "rb") as f:
+            docs = pickle.load(f)
+    if docs is None:
+        github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
+        loader = GithubRepositoryReader(
+            github_client,
+            owner="ctripcorp",
+            repo="x-pipe",
+            filter_directories=(
+                [".", "doc"],
+                GithubRepositoryReader.FilterType.INCLUDE,
+            ),
+            filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
+            verbose=True,
+            concurrent_requests=10,
+        )
+        docs = loader.load_data(branch="master")
+        with open("docs/docs.pkl", "wb") as f:
+            pickle.dump(docs, f)
+    index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
+    query_engine = index.as_query_engine(service_context=service_context)
+    query = st.text_input("X-Pipe Wiki 问题:")
+    if query:
+        index = GPTVectorStoreIndex.from_documents(
+            docs, service_context=service_context
+        )
+        query_engine = index.as_query_engine(service_context=service_context)
+        response = query_engine.query(query)
+        st.write(response)
+if __name__ == "__main__":
+    main()

core/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/core/__pycache__/__init__.cpython-310.pyc and b/core/__pycache__/__init__.cpython-310.pyc differ

core/__pycache__/lifecycle.cpython-310.pyc CHANGED Viewed

Binary files a/core/__pycache__/lifecycle.cpython-310.pyc and b/core/__pycache__/lifecycle.cpython-310.pyc differ

core/__pycache__/logger_factory.cpython-310.pyc CHANGED Viewed

Binary files a/core/__pycache__/logger_factory.cpython-310.pyc and b/core/__pycache__/logger_factory.cpython-310.pyc differ

core/helper.py DELETED Viewed

@@ -1,31 +0,0 @@
-from core.lifecycle import Lifecycle
-class LifecycleHelper:
-    @classmethod
-    def initialize_if_possible(cls, ls: Lifecycle) -> None:
-        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_initialize(
-            ls.lifecycle_state.phase
-        ):
-            ls.initialize()
-    @classmethod
-    def start_if_possible(cls, ls: Lifecycle) -> None:
-        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_start(
-            ls.lifecycle_state.phase
-        ):
-            ls.start()
-    @classmethod
-    def stop_if_possible(cls, ls: Lifecycle) -> None:
-        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_stop(
-            ls.lifecycle_state.phase
-        ):
-            ls.stop()
-    @classmethod
-    def dispose_if_possible(cls, ls: Lifecycle) -> None:
-        if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_dispose(
-            ls.lifecycle_state.phase
-        ):
-            ls.dispose()

core/lifecycle.py CHANGED Viewed

@@ -1,184 +1,185 @@
-import enum
-from abc import ABC, abstractmethod
-from typing import TypeVar, Optional
-from core import logger_factory
-class Initializable(ABC):
-    @abstractmethod
-    def initialize(self) -> None:
-        pass
-class Startable(ABC):
-    @abstractmethod
-    def start(self) -> None:
-        pass
-class Stoppable(ABC):
-    @abstractmethod
-    def stop(self) -> None:
-        pass
-class Disposable(ABC):
-    @abstractmethod
-    def dispose(self) -> None:
-        pass
-class LifecycleAware(ABC):
-    def __init__(self, state: "LifecycleState") -> None:
-        """
-        Args:
-            state(LifecycleState): lifecycle state
-        """
-        self.state = state
-    def get_lifecycle_state(self) -> "LifecycleState":
-        return self.state
-class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
-    def __init__(self) -> None:
-        self.logger = logger_factory.get_logger(self.__class__.__name__)
-        self.lifecycle_state = LifecycleState(lifecycle=self)
-    def initialize(self) -> None:
-        if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
-            self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
-            return
-        self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
-        self.do_init()
-        self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
-    def start(self) -> None:
-        if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
-            self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
-            return
-        self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
-        self.do_start()
-        self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
-    def stop(self) -> None:
-        if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
-            self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
-            return
-        self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
-        self.do_stop()
-        self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
-    def dispose(self) -> None:
-        if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
-            self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
-            return
-        self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
-        self.do_dispose()
-        self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
-    @abstractmethod
-    def do_init(self) -> None:
-        pass
-    @abstractmethod
-    def do_start(self) -> None:
-        pass
-    @abstractmethod
-    def do_stop(self) -> None:
-        pass
-    @abstractmethod
-    def do_dispose(self) -> None:
-        pass
-class LifecyclePhase(enum.Enum):
-    INITIALIZING = 1
-    INITIALIZED = 2
-    STARTING = 3
-    STARTED = 4
-    STOPPING = 5
-    STOPPED = 6
-    DISPOSING = 7
-    DISPOSED = 8
-class LifecycleController(ABC):
-    def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
-        return phase is None or phase == LifecyclePhase.DISPOSED
-    def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
-        return phase is not None and (
-            phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
-        )
-    def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
-        return phase is not None and phase == LifecyclePhase.STARTED
-    def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
-        return phase is not None and (
-            phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
-        )
-LS = TypeVar("LS", bound=Lifecycle)
-class LifecycleState(LifecycleController, ABC):
-    phase: Optional[LifecyclePhase]
-    def __init__(self, lifecycle: LS) -> None:
-        self.phase = None
-        self.prev_phase = None
-        self.lifecycle = lifecycle
-        self.logger = logger_factory.get_logger(__name__)
-    def is_initializing(self) -> bool:
-        return self.phase == LifecyclePhase.INITIALIZING
-    def is_initialized(self) -> bool:
-        return self.phase == LifecyclePhase.INITIALIZED
-    def is_starting(self) -> bool:
-        return self.phase == LifecyclePhase.STARTING
-    def is_started(self) -> bool:
-        return self.phase == LifecyclePhase.STARTED
-    def is_stopping(self) -> bool:
-        return self.phase == LifecyclePhase.STOPPING
-    def is_stopped(self) -> bool:
-        return self.phase == LifecyclePhase.STOPPED
-    def is_disposing(self) -> bool:
-        return self.phase == LifecyclePhase.DISPOSING
-    def is_disposed(self) -> bool:
-        return self.phase == LifecyclePhase.DISPOSED
-    def get_phase(self) -> Optional[LifecyclePhase]:
-        return self.phase
-    def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
-        prev = "None"
-        if self.phase is not None:
-            prev = self.phase.name
-        current = "None"
-        if phase is not None:
-            current = phase.name
-        self.logger.info(
-            "[setPhaseName][{}]{} --> {}".format(
-                self.lifecycle.__class__.__name__,
-                prev,
-                current,
-            )
-        )
-        self.phase = phase
-    def rollback(self, err: Exception) -> None:
-        self.phase = self.prev_phase
-        self.prev_phase = None

+import enum
+from abc import ABC, abstractmethod
+from typing import TypeVar, Optional
+from core import logger_factory
+class Initializable(ABC):
+    @abstractmethod
+    def initialize(self) -> None:
+        pass
+class Startable(ABC):
+    @abstractmethod
+    def start(self) -> None:
+        pass
+class Stoppable(ABC):
+    @abstractmethod
+    def stop(self) -> None:
+        pass
+class Disposable(ABC):
+    @abstractmethod
+    def dispose(self) -> None:
+        pass
+class LifecycleAware(ABC):
+    def __init__(self, state: "LifecycleState") -> None:
+        """
+        Args:
+            state(LifecycleState): lifecycle state
+        """
+        self.state = state
+    @property
+    def get_lifecycle_state(self) -> "LifecycleState":
+        return self.state
+class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
+    def __init__(self) -> None:
+        self.logger = logger_factory.get_logger(self.__class__.__name__)
+        self.lifecycle_state = LifecycleState(lifecycle=self)
+    def initialize(self) -> None:
+        if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
+            self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
+            return
+        self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
+        self.do_init()
+        self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
+    def start(self) -> None:
+        if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
+            self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
+            return
+        self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
+        self.do_start()
+        self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
+    def stop(self) -> None:
+        if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
+            self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
+            return
+        self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
+        self.do_stop()
+        self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
+    def dispose(self) -> None:
+        if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
+            self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
+            return
+        self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
+        self.do_dispose()
+        self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
+    @abstractmethod
+    def do_init(self) -> None:
+        pass
+    @abstractmethod
+    def do_start(self) -> None:
+        pass
+    @abstractmethod
+    def do_stop(self) -> None:
+        pass
+    @abstractmethod
+    def do_dispose(self) -> None:
+        pass
+class LifecyclePhase(enum.Enum):
+    INITIALIZING = 1
+    INITIALIZED = 2
+    STARTING = 3
+    STARTED = 4
+    STOPPING = 5
+    STOPPED = 6
+    DISPOSING = 7
+    DISPOSED = 8
+class LifecycleController(ABC):
+    def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
+        return phase is None or phase == LifecyclePhase.DISPOSED
+    def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
+        return phase is not None and (
+            phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
+        )
+    def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
+        return phase is not None and phase == LifecyclePhase.STARTED
+    def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
+        return phase is not None and (
+            phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
+        )
+LS = TypeVar("LS", bound=Lifecycle)
+class LifecycleState(LifecycleController, ABC):
+    phase: Optional[LifecyclePhase]
+    def __init__(self, lifecycle: LS) -> None:
+        self.phase = None
+        self.prev_phase = None
+        self.lifecycle = lifecycle
+        self.logger = logger_factory.get_logger(__name__)
+    def is_initializing(self) -> bool:
+        return self.phase == LifecyclePhase.INITIALIZING
+    def is_initialized(self) -> bool:
+        return self.phase == LifecyclePhase.INITIALIZED
+    def is_starting(self) -> bool:
+        return self.phase == LifecyclePhase.STARTING
+    def is_started(self) -> bool:
+        return self.phase == LifecyclePhase.STARTED
+    def is_stopping(self) -> bool:
+        return self.phase == LifecyclePhase.STOPPING
+    def is_stopped(self) -> bool:
+        return self.phase == LifecyclePhase.STOPPED
+    def is_disposing(self) -> bool:
+        return self.phase == LifecyclePhase.DISPOSING
+    def is_disposed(self) -> bool:
+        return self.phase == LifecyclePhase.DISPOSED
+    def get_phase(self) -> Optional[LifecyclePhase]:
+        return self.phase
+    def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
+        prev = "None"
+        if self.phase is not None:
+            prev = self.phase.name
+        current = "None"
+        if phase is not None:
+            current = phase.name
+        self.logger.info(
+            "[setPhaseName][{}]{} --> {}".format(
+                self.lifecycle.__class__.__name__,
+                prev,
+                current,
+            )
+        )
+        self.phase = phase
+    def rollback(self, err: Exception) -> None:
+        self.phase = self.prev_phase
+        self.prev_phase = None

core/test_lifecycle.py CHANGED Viewed

@@ -1,7 +1,10 @@
 from unittest import TestCase
 from core.lifecycle import Lifecycle
 class SubLifecycle(Lifecycle):
     def __init__(self) -> None:

+import logging
 from unittest import TestCase
 from core.lifecycle import Lifecycle
+logging.basicConfig()
 class SubLifecycle(Lifecycle):
     def __init__(self) -> None:

dataset/docstore.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

dataset/index_store.json CHANGED Viewed

@@ -1 +1 @@

- {"index_store/data": {"~~da495c94~~-~~4541~~-~~47e1~~-~~b93f~~-~~8535192a5f28~~": {"__type__": "vector_store", "__data__": "{\"index_id\": \"~~da495c94~~-~~4541~~-~~47e1~~-~~b93f~~-~~8535192a5f28~~\", \"summary\": null, \"nodes_dict\": {\"~~59108663~~-~~a5e1~~-~~4e3e~~-~~bb21~~-~~626158eef136~~\": \"~~59108663~~-~~a5e1~~-~~4e3e~~-~~bb21~~-~~626158eef136~~\", \"~~50de4ec9~~-~~febb~~-~~466f~~-~~9f9a~~-~~cc9296895e83~~\": \"~~50de4ec9~~-~~febb~~-~~466f~~-~~9f9a~~-~~cc9296895e83~~\", \"~~aa413a53~~-~~0dda~~-~~4ac4~~-~~8ae9~~-~~6e8e340bb4f0~~\": \"~~aa413a53~~-~~0dda~~-~~4ac4~~-~~8ae9~~-~~6e8e340bb4f0~~\", \"~~a0cc4323~~-~~ec8f~~-~~4fed~~-~~9401~~-~~e44125134341~~\": \"~~a0cc4323~~-~~ec8f~~-~~4fed~~-~~9401~~-~~e44125134341~~\", \"~~5321cc7b~~-~~2a86~~-~~48b8~~-~~b56c~~-~~415dde7c149b~~\": \"~~5321cc7b~~-~~2a86~~-~~48b8~~-~~b56c~~-~~415dde7c149b~~\", \"~~9e19fb91~~-~~8258~~-~~4aca~~-~~9692~~-~~2d027073499e~~\": \"~~9e19fb91~~-~~8258~~-~~4aca~~-~~9692~~-~~2d027073499e~~\", \"~~02e856e5~~-~~4211~~-~~4a27~~-~~9204~~-~~e966907f1d74~~\": \"~~02e856e5~~-~~4211~~-~~4a27~~-~~9204~~-~~e966907f1d74~~\", \"~~f3074870~~-~~8fbf~~-~~4322~~-~~b1d2~~-~~2111e6aac9af~~\": \"~~f3074870~~-~~8fbf~~-~~4322~~-~~b1d2~~-~~2111e6aac9af~~\", \"~~82677fb9~~-~~abe3~~-~~4038~~-~~8263~~-~~5576c47da4f2~~\": \"~~82677fb9~~-~~abe3~~-~~4038~~-~~8263~~-~~5576c47da4f2~~\", \"~~a08364a6~~-~~c23d~~-~~4df5~~-~~8b5d~~-~~84137fbebd4e~~\": \"~~a08364a6~~-~~c23d~~-~~4df5~~-~~8b5d~~-~~84137fbebd4e~~\", \"~~e45b082d~~-~~c3ec~~-~~45aa~~-~~b630~~-~~6db49a62728b~~\": \"~~e45b082d~~-~~c3ec~~-~~45aa~~-~~b630~~-~~6db49a62728b~~\", \"~~2c55445c~~-~~04b1~~-~~4705~~-~~9871~~-~~adaa02f38f1b~~\": \"~~2c55445c~~-~~04b1~~-~~4705~~-~~9871~~-~~adaa02f38f1b~~\", \"~~d0de9736~~-~~ccad~~-~~450e~~-~~b4a1~~-~~49d4cdb8b941~~\": \"~~d0de9736~~-~~ccad~~-~~450e~~-~~b4a1~~-~~49d4cdb8b941~~\", \"~~fd0d2375~~-~~39e2~~-~~4bce~~-~~8e39~~-~~1182a122a1b4~~\": \"~~fd0d2375~~-~~39e2~~-~~4bce~~-~~8e39~~-~~1182a122a1b4~~\", \"~~13221de7~~-~~6c68~~-~~4367~~-~~b1be~~-~~f35b06fc3a74~~\": \"~~13221de7~~-~~6c68~~-~~4367~~-~~b1be~~-~~f35b06fc3a74~~\", \"~~9f448401~~-~~cda9~~-~~4b5f~~-~~9a80~~-~~c79e111f9963~~\": \"~~9f448401~~-~~cda9~~-~~4b5f~~-~~9a80~~-~~c79e111f9963~~\", \"~~3bc7dfc2~~-~~3ddf~~-~~4384~~-~~a60c~~-~~6cd52e1314f4~~\": \"~~3bc7dfc2~~-~~3ddf~~-~~4384~~-~~a60c~~-~~6cd52e1314f4~~\", \"~~ce3e530c~~-~~ce2d~~-~~4f5f~~-~~a171~~-~~72a790c3c624~~\": \"~~ce3e530c~~-~~ce2d~~-~~4f5f~~-~~a171~~-~~72a790c3c624~~\", \"~~85f764bd~~-~~e560~~-~~48ba~~-~~a51e~~-~~2287b6fe19db~~\": \"~~85f764bd~~-~~e560~~-~~48ba~~-~~a51e~~-~~2287b6fe19db~~\", \"~~3a8e4c7c~~-~~9f7d~~-~~4735~~-~~93e7~~-~~9d847cff98de~~\": \"~~3a8e4c7c~~-~~9f7d~~-~~4735~~-~~93e7~~-~~9d847cff98de~~\", \"~~af881b61~~-~~03f4~~-~~4851~~-~~8946~~-~~794015e3436c~~\": \"~~af881b61~~-~~03f4~~-~~4851~~-~~8946~~-~~794015e3436c~~\", \"~~31579820~~-~~439e~~-~~4029~~-~~b8c4~~-~~a0d6528daa59~~\": \"~~31579820~~-~~439e~~-~~4029~~-~~b8c4~~-~~a0d6528daa59~~\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}

+ {"index_store/data": {"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {"__type__": "vector_store", "__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}

dataset/vector_store.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

docs/docs.pkl CHANGED Viewed

Binary files a/docs/docs.pkl and b/docs/docs.pkl differ

github_retriever.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from llama_hub.github_repo import GithubRepositoryReader, GithubClient
+from llama_index import download_loader, GPTVectorStoreIndex
+from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
+from langchain.llms import AzureOpenAI
+from langchain.embeddings.openai import OpenAIEmbeddings
+from llama_index import LangchainEmbedding, ServiceContext
+from llama_index import StorageContext, load_index_from_storage
+from dotenv import load_dotenv
+import os
+import pickle
+def main() -> None:
+    # define embedding
+    embedding = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
+    # define LLM
+    llm_predictor = LLMPredictor(
+        llm=AzureOpenAI(
+            engine="text-davinci-003",
+            model_name="text-davinci-003",
+        )
+    )
+    # configure service context
+    service_context = ServiceContext.from_defaults(
+        llm_predictor=llm_predictor, embed_model=embedding
+    )
+    download_loader("GithubRepositoryReader")
+    docs = None
+    if os.path.exists("docs/docs.pkl"):
+        with open("docs/docs.pkl", "rb") as f:
+            docs = pickle.load(f)
+    if docs is None:
+        github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
+        loader = GithubRepositoryReader(
+            github_client,
+            owner="ctripcorp",
+            repo="x-pipe",
+            filter_directories=(
+                [".", "doc"],
+                GithubRepositoryReader.FilterType.INCLUDE,
+            ),
+            filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
+            verbose=True,
+            concurrent_requests=10,
+        )
+        docs = loader.load_data(branch="master")
+        with open("docs/docs.pkl", "wb") as f:
+            pickle.dump(docs, f)
+    index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
+    query_engine = index.as_query_engine(service_context=service_context)
+    response = query_engine.query("如何使用X-Pipe?")
+    print(response)
+if __name__ == "__main__":
+    load_dotenv()
+    main()

langchain_manager/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

langchain_manager/manager.py CHANGED Viewed

@@ -1,12 +1,14 @@
 from abc import abstractmethod, ABC
-from langchain.base_language import BaseLanguageModel
-from langchain.embeddings.base import Embeddings as LCEmbeddings
-from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.llms import AzureOpenAI
-class BaseLangChainManager(ABC):
     def __init__(self) -> None:
         super().__init__()
@@ -22,49 +24,16 @@ class BaseLangChainManager(ABC):
 class LangChainAzureManager(BaseLangChainManager):
     def __init__(self) -> None:
         super().__init__()
-        self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
-        self.llm = AzureOpenAI(
-            deployment_name="text-davinci-003",
-            # model_name="text-davinci-003",
-            model="text-davinci-003",
-            client=None,
-            # temperature set to 0.0(default 0.7) to get a certain answer from OpenAI,
-            # as a wiki robot we won't want to get flexible answers
-            temperature=0.0,
-            # GPT-3 default is 4096, however, openai.py default is 256
-            max_tokens=2048,
-        )
     # Override
     def get_embedding(self) -> LCEmbeddings:
-        return self.embedding
     # Override
     def get_llm(self) -> BaseLanguageModel:
-        return self.llm
-class LangChainHuggingFaceManager(BaseLangChainManager):
-    def __init__(self) -> None:
-        super().__init__()
-        from transformers import AutoTokenizer, AutoModel
-        AutoTokenizer.from_pretrained("GanymedeNil/text2vec-large-chinese")
-        AutoModel.from_pretrained("GanymedeNil/text2vec-large-chinese")
-        self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
-        self.llm = AzureOpenAI(
             deployment_name="text-davinci-003",
             # model_name="text-davinci-003",
             model="text-davinci-003",
             client=None,
         )
-    # Override
-    def get_embedding(self) -> LCEmbeddings:
-        return self.embedding
-    # Override
-    def get_llm(self) -> BaseLanguageModel:
-        return self.llm

 from abc import abstractmethod, ABC
+from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
+from langchain_manager.embeddings.openai import OpenAIEmbeddings
+from langchain_manager.llms import AzureOpenAI
+from langchain_manager.base_language import BaseLanguageModel
+from core.lifecycle import Lifecycle
+class BaseLangChainManager(Lifecycle, ABC):
     def __init__(self) -> None:
         super().__init__()
 class LangChainAzureManager(BaseLangChainManager):
     def __init__(self) -> None:
         super().__init__()
     # Override
     def get_embedding(self) -> LCEmbeddings:
+        return OpenAIEmbeddings(client=None, chunk_size=1)
     # Override
     def get_llm(self) -> BaseLanguageModel:
+        return AzureOpenAI(
             deployment_name="text-davinci-003",
             # model_name="text-davinci-003",
             model="text-davinci-003",
             client=None,
         )

llama/context.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
+from type import Optional
+from core.lifecycle import Lifecycle
+from langchain.manager import BaseLangChainManager
+class ServiceContextManager(Lifecycle):
+    service_context: Optional[ServiceContext]
+    def __init__(self, manager: BaseLangChainManager) -> None:
+        super().__init__()
+        self.manager = manager
+        self.service_context = None
+    def get_service_context(self) -> ServiceContext:
+        if self.lifecycle_state.is_started():
+            raise KeyError(
+                "incorrect lifecycle state: {}".format(self.lifecycle_state.phase)
+            )
+        if self.service_context is None:
+            raise ValueError(
+                "service context is not ready, check for lifecycle statement"
+            )
+        return self.service_context
+    def do_init(self) -> None:
+        # define embedding
+        embedding = LangchainEmbedding(self.manager.get_embedding())
+        # define LLM
+        llm_predictor = LLMPredictor(llm=self.manager.get_llm())
+        # configure service context
+        self.service_context = ServiceContext.from_defaults(
+            llm_predictor=llm_predictor, embed_model=embedding
+        )
+    def do_start(self) -> None:
+        pass
+    def do_stop(self) -> None:
+        pass
+    def do_dispose(self) -> None:
+        pass
+class StorageContextManager(Lifecycle):
+    def __init__(self, dataset_path: Optional[str] = "./dataset") -> None:
+        super().__init__()
+        self.dataset_path = dataset_path
+    def do_init(self) -> None:
+        pass
+    def do_start(self) -> None:
+        pass
+    def do_stop(self) -> None:
+        pass
+    def do_dispose(self) -> None:
+        pass

llama/data_loader.py CHANGED Viewed

@@ -7,6 +7,8 @@ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
 from llama_index import download_loader
 from llama_index.readers.schema.base import Document
 class WikiLoader(ABC):
     @abstractmethod
@@ -14,7 +16,7 @@ class WikiLoader(ABC):
         pass
-class GithubLoader(WikiLoader):
     def __init__(
         self,
         github_owner: Optional[str] = None,
@@ -51,8 +53,7 @@ class GithubLoader(WikiLoader):
             verbose=True,
             concurrent_requests=10,
         )
-        os.environ["http_proxy"] = "http://127.0.0.1:7890"
-        os.environ["https_proxy"] = "http://127.0.0.1:7890"
         docs = loader.load_data(branch="master")
         with open("docs/docs.pkl", "wb") as f:

 from llama_index import download_loader
 from llama_index.readers.schema.base import Document
+from core.lifecycle import Lifecycle
 class WikiLoader(ABC):
     @abstractmethod
         pass
+class GithubLoader(WikiLoader, Lifecycle):
     def __init__(
         self,
         github_owner: Optional[str] = None,
             verbose=True,
             concurrent_requests=10,
         )
         docs = loader.load_data(branch="master")
         with open("docs/docs.pkl", "wb") as f:

llama/index.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from core.lifecycle import Lifecycle
+from llama.context import ServiceContextManager
+from llama_index.indices.vector_store import VectorStoreIndex
+from typing import Optional
+class IndexManager(Lifecycle):
+    index: Optional[VectorStoreIndex]
+    def __init__(self, context_manager: ServiceContextManager) -> None:
+        super().__init__()
+        self.index = None
+        self.context_manager = context_manager
+    def get_index(self) -> Optional[VectorStoreIndex]:
+        if not self.lifecycle_state.is_started():
+            raise Exception("Lifecycle state is not correct")
+        return self.index

llama/service_context.py DELETED Viewed

@@ -1,142 +0,0 @@
-from abc import abstractmethod, ABC
-from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
-from core.lifecycle import Lifecycle
-from langchain_manager.manager import BaseLangChainManager
-# def get_callback_manager() -> CallbackManager:
-#     from llama_index.callbacks import (
-#         WandbCallbackHandler,
-#         CallbackManager,
-#         LlamaDebugHandler,
-#     )
-#     llama_debug = LlamaDebugHandler(print_trace_on_end=True)
-#     # wandb.init args
-#     run_args = dict(
-#         project="llamaindex",
-#     )
-#     wandb_callback = WandbCallbackHandler(run_args=run_args)
-#     return CallbackManager([llama_debug, wandb_callback])
-class ServiceContextManager(Lifecycle, ABC):
-    @abstractmethod
-    def get_service_context(self) -> ServiceContext:
-        pass
-class AzureServiceContextManager(ServiceContextManager):
-    lc_manager: BaseLangChainManager
-    service_context: ServiceContext
-    def __init__(self, lc_manager: BaseLangChainManager):
-        super().__init__()
-        self.lc_manager = lc_manager
-    def get_service_context(self) -> ServiceContext:
-        if self.service_context is None:
-            raise ValueError(
-                "service context is not ready, check for lifecycle statement"
-            )
-        return self.service_context
-    def do_init(self) -> None:
-        # define embedding
-        embedding = LangchainEmbedding(self.lc_manager.get_embedding())
-        # define LLM
-        llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
-        # configure service context
-        self.service_context = ServiceContext.from_defaults(
-            llm_predictor=llm_predictor,
-            embed_model=embedding,
-            # callback_manager=get_callback_manager(),
-        )
-    def do_start(self) -> None:
-        self.logger.info(
-            "[do_start][embedding] last used usage: %d",
-            self.service_context.embed_model.total_tokens_used,
-        )
-        self.logger.info(
-            "[do_start][predict] last used usage: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-    def do_stop(self) -> None:
-        self.logger.info(
-            "[do_stop][embedding] last used usage: %d",
-            self.service_context.embed_model.total_tokens_used,
-        )
-        self.logger.info(
-            "[do_stop][predict] last used usage: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-    def do_dispose(self) -> None:
-        self.logger.info(
-            "[do_dispose] total used token: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-class HuggingFaceChineseOptServiceContextManager(ServiceContextManager):
-    lc_manager: BaseLangChainManager
-    service_context: ServiceContext
-    def __init__(self, lc_manager: BaseLangChainManager):
-        super().__init__()
-        self.lc_manager = lc_manager
-    def get_service_context(self) -> ServiceContext:
-        if self.service_context is None:
-            raise ValueError(
-                "service context is not ready, check for lifecycle statement"
-            )
-        return self.service_context
-    def do_init(self) -> None:
-        # define embedding
-        from langchain.embeddings import HuggingFaceEmbeddings
-        model_name = "GanymedeNil/text2vec-large-chinese"
-        hf_embedding = HuggingFaceEmbeddings(
-            model_name=model_name, model_kwargs={"device": "cpu"}
-        )
-        embedding = LangchainEmbedding(hf_embedding)
-        # define LLM
-        llm_predictor = LLMPredictor(self.lc_manager.get_llm())
-        # configure service context
-        self.service_context = ServiceContext.from_defaults(
-            llm_predictor=llm_predictor,
-            embed_model=embedding,
-            # callback_manager=get_callback_manager()
-        )
-    def do_start(self) -> None:
-        self.logger.info(
-            "[do_start][embedding] last used usage: %d",
-            self.service_context.embed_model.total_tokens_used,
-        )
-        self.logger.info(
-            "[do_start][predict] last used usage: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-    def do_stop(self) -> None:
-        self.logger.info(
-            "[do_stop][embedding] last used usage: %d",
-            self.service_context.embed_model.total_tokens_used,
-        )
-        self.logger.info(
-            "[do_stop][predict] last used usage: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )
-    def do_dispose(self) -> None:
-        self.logger.info(
-            "[do_dispose] total used token: %d",
-            self.service_context.llm_predictor.total_tokens_used,
-        )

llama/storage_context.py DELETED Viewed

@@ -1,67 +0,0 @@
-from llama_index import StorageContext
-from typing import List
-from abc import abstractmethod, ABC
-from llama_index import Document
-from core.lifecycle import Lifecycle
-from llama.service_context import ServiceContextManager
-class StorageContextManager(Lifecycle, ABC):
-    @abstractmethod
-    def get_storage_context(self) -> StorageContext:
-        pass
-class LocalStorageContextManager(StorageContextManager):
-    storage_context: StorageContext
-    def __init__(
-        self,
-        service_context_manager: ServiceContextManager,
-        dataset_path: str = "./dataset",
-    ) -> None:
-        super().__init__()
-        self.dataset_path = dataset_path
-        self.service_context_manager = service_context_manager
-    def get_storage_context(self) -> StorageContext:
-        return self.storage_context
-    def do_init(self) -> None:
-        from llama.utils import is_local_storage_files_ready
-        if is_local_storage_files_ready(self.dataset_path):
-            self.storage_context = StorageContext.from_defaults(
-                persist_dir=self.dataset_path
-            )
-        else:
-            docs = self._download()
-            self._indexing(docs)
-    def do_start(self) -> None:
-        # self.logger.info("[do_start]%", **self.storage_context.to_dict())
-        pass
-    def do_stop(self) -> None:
-        # self.logger.info("[do_stop]%", **self.storage_context.to_dict())
-        pass
-    def do_dispose(self) -> None:
-        self.storage_context.persist(self.dataset_path)
-    def _download(self) -> List[Document]:
-        from llama.data_loader import GithubLoader
-        loader = GithubLoader()
-        return loader.load()
-    def _indexing(self, docs: List[Document]) -> None:
-        from llama_index import GPTVectorStoreIndex
-        index = GPTVectorStoreIndex.from_documents(
-            docs, service_context=self.service_context_manager.get_service_context()
-        )
-        index.storage_context.persist(persist_dir=self.dataset_path)
-        self.storage_context = index.storage_context

llama/utils.py DELETED Viewed

@@ -1,5 +0,0 @@
-import os
-def is_local_storage_files_ready(persist_dir: str) -> bool:
-    return os.path.exists(persist_dir) and len(os.listdir(persist_dir)) != 0

llama/vector_storage.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from core.lifecycle import Lifecycle
+class VectorStorageManager(Lifecycle):
+    def __init__(self) -> None:
+        super().__init__()
+    def do_init(self) -> None:
+        pass
+    def do_start(self) -> None:
+        pass
+    def do_stop(self) -> None:
+        pass
+    def do_dispose(self) -> None:
+        pass

local-requirements.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- python-dotenv

pyproject.toml CHANGED Viewed

@@ -1,7 +1,7 @@
 [tool.mypy]
 ignore_missing_imports = "True"
 disallow_untyped_defs = "True"
-exclude = ["notebooks", "build", "examples", "docs", "dataset", "github_retriever.py"]
 [tool.ruff]
 exclude = [
@@ -14,5 +14,6 @@ exclude = [
     "notebooks",
     "docs",
     "dataset",
     "github_retriever.py"
 ]

 [tool.mypy]
 ignore_missing_imports = "True"
 disallow_untyped_defs = "True"
+exclude = ["notebooks", "build", "examples", "docs", "dataset", "app.py", "github_retriever.py"]
 [tool.ruff]
 exclude = [
     "notebooks",
     "docs",
     "dataset",
+    "app.py",
     "github_retriever.py"
 ]

requirements.txt CHANGED Viewed

@@ -1,10 +1,7 @@
-llama_index>=0.6.3
-llama_hub
-streamlit
-ruff
-black
-mypy
-accelerate
-python-dotenv
-sentence_transformers
-wandb

+langchain>=0.0.154
+openai>=0.26.4
+llama_index>=0.6.32
+llama_hub
+ruff
+black
+mypy

xpipe_wiki/__init__.py DELETED Viewed

File without changes

xpipe_wiki/manager_factory.py DELETED Viewed

@@ -1,82 +0,0 @@
-import enum
-import os
-from core.helper import LifecycleHelper
-from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
-from multiprocessing import Lock
-lock = Lock()
-class XPipeRobotRevision(enum.Enum):
-    SIMPLE_OPENAI_VERSION_0 = 1
-    HUGGINGFACE_VERSION_0 = 2
-class XPipeRobotManagerFactory:
-    """
-    CAPABLE: Dict[XPipeRobotRevision, XPipeWikiRobotManager] =
-    {XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0: XPipeWikiRobotManager()}
-    """
-    CAPABLE = dict()  # type: dict[XPipeRobotRevision, XPipeWikiRobotManager]
-    @classmethod
-    def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
-        with lock:
-            if cls.CAPABLE.get(revision) is not None:
-                return cls.CAPABLE[revision]
-            if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
-                manager = cls.create_simple_openai_version_0()
-            elif revision == XPipeRobotRevision.HUGGINGFACE_VERSION_0:
-                manager = cls.create_huggingface_version_0()
-            cls.CAPABLE[revision] = manager
-            return manager
-    @classmethod
-    def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
-        from llama.service_context import AzureServiceContextManager
-        from langchain_manager.manager import LangChainAzureManager
-        service_context_manager = AzureServiceContextManager(
-            lc_manager=LangChainAzureManager()
-        )
-        from llama.storage_context import LocalStorageContextManager
-        dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
-        storage_context_manager = LocalStorageContextManager(
-            dataset_path=dataset_path, service_context_manager=service_context_manager
-        )
-        robot_manager = AzureXPipeWikiRobotManager(
-            service_context_manager=service_context_manager,
-            storage_context_manager=storage_context_manager,
-        )
-        LifecycleHelper.initialize_if_possible(robot_manager)
-        LifecycleHelper.start_if_possible(robot_manager)
-        return robot_manager
-    @classmethod
-    def create_huggingface_version_0(cls) -> AzureXPipeWikiRobotManager:
-        from llama.service_context import HuggingFaceChineseOptServiceContextManager
-        from langchain_manager.manager import LangChainAzureManager
-        service_context_manager = HuggingFaceChineseOptServiceContextManager(
-            lc_manager=LangChainAzureManager()
-        )
-        from llama.storage_context import LocalStorageContextManager
-        dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
-        storage_context_manager = LocalStorageContextManager(
-            dataset_path=dataset_path, service_context_manager=service_context_manager
-        )
-        robot_manager = AzureXPipeWikiRobotManager(
-            service_context_manager=service_context_manager,
-            storage_context_manager=storage_context_manager,
-        )
-        LifecycleHelper.initialize_if_possible(robot_manager)
-        LifecycleHelper.start_if_possible(robot_manager)
-        return robot_manager

xpipe_wiki/robot_manager.py DELETED Viewed

@@ -1,79 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Any
-from llama_index import load_index_from_storage
-from llama_index.indices.query.base import BaseQueryEngine
-from llama_index.indices.response import ResponseMode
-from core.helper import LifecycleHelper
-from core.lifecycle import Lifecycle
-from llama.service_context import ServiceContextManager
-from llama.storage_context import StorageContextManager
-class XPipeWikiRobot(ABC):
-    @abstractmethod
-    def ask(self, question: str) -> Any:
-        pass
-class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
-    query_engine: BaseQueryEngine
-    def __init__(self, query_engine: BaseQueryEngine) -> None:
-        super().__init__()
-        self.query_engine = query_engine
-    def ask(self, question: str) -> Any:
-        print("question: ", question)
-        response = self.query_engine.query(question)
-        print("response type: ", type(response))
-        return response.__str__()
-class XPipeWikiRobotManager(Lifecycle):
-    @abstractmethod
-    def get_robot(self) -> XPipeWikiRobot:
-        pass
-class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
-    service_context_manager: ServiceContextManager
-    storage_context_manager: StorageContextManager
-    query_engine: BaseQueryEngine
-    def __init__(
-        self,
-        service_context_manager: ServiceContextManager,
-        storage_context_manager: StorageContextManager,
-    ) -> None:
-        super().__init__()
-        self.service_context_manager = service_context_manager
-        self.storage_context_manager = storage_context_manager
-    def get_robot(self) -> XPipeWikiRobot:
-        return AzureOpenAIXPipeWikiRobot(self.query_engine)
-    def do_init(self) -> None:
-        LifecycleHelper.initialize_if_possible(self.service_context_manager)
-        LifecycleHelper.initialize_if_possible(self.storage_context_manager)
-    def do_start(self) -> None:
-        LifecycleHelper.start_if_possible(self.service_context_manager)
-        LifecycleHelper.start_if_possible(self.storage_context_manager)
-        index = load_index_from_storage(
-            storage_context=self.storage_context_manager.get_storage_context(),
-            service_context=self.service_context_manager.get_service_context(),
-        )
-        self.query_engine = index.as_query_engine(
-            service_context=self.service_context_manager.get_service_context(),
-            response_mode=ResponseMode.TREE_SUMMARIZE,
-        )
-    def do_stop(self) -> None:
-        LifecycleHelper.stop_if_possible(self.storage_context_manager)
-        LifecycleHelper.stop_if_possible(self.service_context_manager)
-    def do_dispose(self) -> None:
-        LifecycleHelper.dispose_if_possible(self.storage_context_manager)
-        LifecycleHelper.dispose_if_possible(self.service_context_manager)