refactor python directory name and add dataset to decrease vector calculating

#2
by NickNYU - opened
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -56,7 +56,6 @@ coverage.xml
56
  .hypothesis/
57
  .pytest_cache/
58
  .ruff_cache
59
- wandb/
60
 
61
  # Translations
62
  *.mo
 
56
  .hypothesis/
57
  .pytest_cache/
58
  .ruff_cache
 
59
 
60
  # Translations
61
  *.mo
.idea/.gitignore DELETED
@@ -1,8 +0,0 @@
1
- # Default ignored files
2
- /shelf/
3
- /workspace.xml
4
- # Editor-based HTTP Client requests
5
- /httpRequests/
6
- # Datasource local storage ignored files
7
- /dataSources/
8
- /dataSources.local.xml
 
 
 
 
 
 
 
 
 
.idea/inspectionProfiles/profiles_settings.xml DELETED
@@ -1,6 +0,0 @@
1
- <component name="InspectionProjectProfileManager">
2
- <settings>
3
- <option name="USE_PROJECT_PROFILE" value="false" />
4
- <version value="1.0" />
5
- </settings>
6
- </component>
 
 
 
 
 
 
 
.idea/llama-xpipe.iml DELETED
@@ -1,11 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <module type="PYTHON_MODULE" version="4">
3
- <component name="NewModuleRootManager">
4
- <content url="file://$MODULE_DIR$">
5
- <excludeFolder url="file://$MODULE_DIR$/.venv" />
6
- <excludeFolder url="file://$MODULE_DIR$/venv" />
7
- </content>
8
- <orderEntry type="inheritedJdk" />
9
- <orderEntry type="sourceFolder" forTests="false" />
10
- </component>
11
- </module>
 
 
 
 
 
 
 
 
 
 
 
 
.idea/misc.xml DELETED
@@ -1,4 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (llama-xpipe)" project-jdk-type="Python SDK" />
4
- </project>
 
 
 
 
 
.idea/modules.xml DELETED
@@ -1,8 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectModuleManager">
4
- <modules>
5
- <module fileurl="file://$PROJECT_DIR$/.idea/llama-xpipe.iml" filepath="$PROJECT_DIR$/.idea/llama-xpipe.iml" />
6
- </modules>
7
- </component>
8
- </project>
 
 
 
 
 
 
 
 
 
.idea/vcs.xml DELETED
@@ -1,6 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="VcsDirectoryMappings">
4
- <mapping directory="" vcs="Git" />
5
- </component>
6
- </project>
 
 
 
 
 
 
 
Makefile CHANGED
@@ -8,7 +8,7 @@ format:
8
  lint:
9
  mypy .
10
  black . --check
11
- ruff check . --fix
12
 
13
  test:
14
  pytest tests
 
8
  lint:
9
  mypy .
10
  black . --check
11
+ ruff check .
12
 
13
  test:
14
  pytest tests
README.md CHANGED
@@ -11,47 +11,3 @@ license: apache-2.0
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
-
15
- Then, create a new Python virtual environment. The command below creates an environment in `.venv`,
16
- and activates it:
17
-
18
- ```bash
19
- python -m venv .venv
20
- source .venv/bin/activate
21
- ```
22
-
23
- if you are in windows, use the following to activate your virtual environment:
24
-
25
- ```bash
26
- .venv\scripts\activate
27
- ```
28
-
29
- Install the required dependencies (this will also install gpt-index through `pip install -e .`
30
- so that you can start developing on it):
31
-
32
- ```bash
33
- pip install -r requirements.txt
34
- ```
35
-
36
- Now you should be set!
37
-
38
- ### Validating your Change
39
-
40
- Let's make sure to `format/lint` our change. For bigger changes,
41
- let's also make sure to `test` it and perhaps create an `example notebook`.
42
-
43
- #### Formatting/Linting
44
-
45
- You can format and lint your changes with the following commands in the root directory:
46
-
47
- ```bash
48
- make format; make lint
49
- ```
50
-
51
- You can also make use of our pre-commit hooks by setting up git hook scripts:
52
-
53
- ```bash
54
- pre-commit install
55
- ```
56
-
57
- We run an assortment of linters: `black`, `ruff`, `mypy`.
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,45 +1,94 @@
1
- import logging
2
- import sys
3
-
4
- import streamlit as st
5
- from dotenv import load_dotenv
6
-
7
- from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
8
-
9
- logging.basicConfig(
10
- stream=sys.stdout, level=logging.INFO
11
- ) # logging.DEBUG for more verbose output
12
- # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
13
-
14
- # # Sidebar contents
15
- with st.sidebar:
16
- st.title("🤗💬 LLM Chat App")
17
- st.markdown(
18
- """
19
- ## About
20
- This app is an LLM-powered chatbot built using:
21
- - [Streamlit](https://streamlit.io/)
22
- - [LangChain](https://python.langchain.com/)
23
- - [X-Pipe](https://github.com/ctripcorp/x-pipe)
24
- """
25
- )
26
- # add_vertical_space(5)
27
- st.write("Made by Nick")
28
-
29
-
30
- def main() -> None:
31
- st.header("X-Pipe Wiki 机器人 💬")
32
-
33
- robot_manager = XPipeRobotManagerFactory.get_or_create(
34
- XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0
35
- )
36
- robot = robot_manager.get_robot()
37
- query = st.text_input("X-Pipe Wiki 问题:")
38
- if query:
39
- response = robot.ask(question=query)
40
- st.write(response)
41
-
42
-
43
- if __name__ == "__main__":
44
- load_dotenv()
45
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
2
+ from llama_index import download_loader, GPTVectorStoreIndex
3
+ from llama_index import LLMPredictor, ServiceContext, LangchainEmbedding
4
+ from langchain.llms import AzureOpenAI
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ import os
7
+ import pickle
8
+ import streamlit as st
9
+
10
+ import logging
11
+ import sys
12
+
13
+
14
+ logging.basicConfig(
15
+ stream=sys.stdout, level=logging.DEBUG
16
+ ) # logging.DEBUG for more verbose output
17
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
18
+
19
+ # Sidebar contents
20
+ with st.sidebar:
21
+ st.title("🤗💬 LLM Chat App")
22
+ st.markdown(
23
+ """
24
+ ## About
25
+ This app is an LLM-powered chatbot built using:
26
+ - [Streamlit](https://streamlit.io/)
27
+ - [LangChain](https://python.langchain.com/)
28
+ - [X-Pipe](https://github.com/ctripcorp/x-pipe)
29
+ """
30
+ )
31
+ # add_vertical_space(5)
32
+ st.write("Made by Nick")
33
+
34
+
35
+ def main() -> None:
36
+ st.header("X-Pipe Wiki 机器人 💬")
37
+ # define embedding
38
+ embedding = LangchainEmbedding(OpenAIEmbeddings(client=None, chunk_size=1))
39
+ # define LLM
40
+ llm_predictor = LLMPredictor(
41
+ llm=AzureOpenAI(
42
+ deployment_name="text-davinci-003",
43
+ model="text-davinci-003",
44
+ client=None,
45
+ )
46
+ )
47
+
48
+ # configure service context
49
+ service_context = ServiceContext.from_defaults(
50
+ llm_predictor=llm_predictor, embed_model=embedding
51
+ )
52
+ download_loader("GithubRepositoryReader")
53
+ docs = None
54
+ if os.path.exists("docs/docs.pkl"):
55
+ with open("docs/docs.pkl", "rb") as f:
56
+ docs = pickle.load(f)
57
+
58
+ if docs is None:
59
+ github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
60
+ loader = GithubRepositoryReader(
61
+ github_client,
62
+ owner="ctripcorp",
63
+ repo="x-pipe",
64
+ filter_directories=(
65
+ [".", "doc"],
66
+ GithubRepositoryReader.FilterType.INCLUDE,
67
+ ),
68
+ filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
69
+ verbose=True,
70
+ concurrent_requests=10,
71
+ )
72
+
73
+ docs = loader.load_data(branch="master")
74
+
75
+ with open("docs/docs.pkl", "wb") as f:
76
+ pickle.dump(docs, f)
77
+
78
+ index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
79
+
80
+ query_engine = index.as_query_engine(service_context=service_context)
81
+
82
+ query = st.text_input("X-Pipe Wiki 问题:")
83
+ if query:
84
+ index = GPTVectorStoreIndex.from_documents(
85
+ docs, service_context=service_context
86
+ )
87
+
88
+ query_engine = index.as_query_engine(service_context=service_context)
89
+ response = query_engine.query(query)
90
+ st.write(response)
91
+
92
+
93
+ if __name__ == "__main__":
94
+ main()
core/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/__init__.cpython-310.pyc and b/core/__pycache__/__init__.cpython-310.pyc differ
 
core/__pycache__/lifecycle.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/lifecycle.cpython-310.pyc and b/core/__pycache__/lifecycle.cpython-310.pyc differ
 
core/__pycache__/logger_factory.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/logger_factory.cpython-310.pyc and b/core/__pycache__/logger_factory.cpython-310.pyc differ
 
core/helper.py DELETED
@@ -1,31 +0,0 @@
1
- from core.lifecycle import Lifecycle
2
-
3
-
4
- class LifecycleHelper:
5
- @classmethod
6
- def initialize_if_possible(cls, ls: Lifecycle) -> None:
7
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_initialize(
8
- ls.lifecycle_state.phase
9
- ):
10
- ls.initialize()
11
-
12
- @classmethod
13
- def start_if_possible(cls, ls: Lifecycle) -> None:
14
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_start(
15
- ls.lifecycle_state.phase
16
- ):
17
- ls.start()
18
-
19
- @classmethod
20
- def stop_if_possible(cls, ls: Lifecycle) -> None:
21
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_stop(
22
- ls.lifecycle_state.phase
23
- ):
24
- ls.stop()
25
-
26
- @classmethod
27
- def dispose_if_possible(cls, ls: Lifecycle) -> None:
28
- if isinstance(ls, Lifecycle) and ls.lifecycle_state.can_dispose(
29
- ls.lifecycle_state.phase
30
- ):
31
- ls.dispose()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
core/lifecycle.py CHANGED
@@ -1,184 +1,185 @@
1
- import enum
2
- from abc import ABC, abstractmethod
3
- from typing import TypeVar, Optional
4
-
5
- from core import logger_factory
6
-
7
-
8
- class Initializable(ABC):
9
- @abstractmethod
10
- def initialize(self) -> None:
11
- pass
12
-
13
-
14
- class Startable(ABC):
15
- @abstractmethod
16
- def start(self) -> None:
17
- pass
18
-
19
-
20
- class Stoppable(ABC):
21
- @abstractmethod
22
- def stop(self) -> None:
23
- pass
24
-
25
-
26
- class Disposable(ABC):
27
- @abstractmethod
28
- def dispose(self) -> None:
29
- pass
30
-
31
-
32
- class LifecycleAware(ABC):
33
- def __init__(self, state: "LifecycleState") -> None:
34
- """
35
- Args:
36
- state(LifecycleState): lifecycle state
37
- """
38
- self.state = state
39
-
40
- def get_lifecycle_state(self) -> "LifecycleState":
41
- return self.state
42
-
43
-
44
- class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
45
- def __init__(self) -> None:
46
- self.logger = logger_factory.get_logger(self.__class__.__name__)
47
- self.lifecycle_state = LifecycleState(lifecycle=self)
48
-
49
- def initialize(self) -> None:
50
- if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
51
- self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
52
- return
53
- self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
54
- self.do_init()
55
- self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
56
-
57
- def start(self) -> None:
58
- if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
59
- self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
60
- return
61
- self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
62
- self.do_start()
63
- self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
64
-
65
- def stop(self) -> None:
66
- if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
67
- self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
68
- return
69
- self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
70
- self.do_stop()
71
- self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
72
-
73
- def dispose(self) -> None:
74
- if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
75
- self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
76
- return
77
- self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
78
- self.do_dispose()
79
- self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
80
-
81
- @abstractmethod
82
- def do_init(self) -> None:
83
- pass
84
-
85
- @abstractmethod
86
- def do_start(self) -> None:
87
- pass
88
-
89
- @abstractmethod
90
- def do_stop(self) -> None:
91
- pass
92
-
93
- @abstractmethod
94
- def do_dispose(self) -> None:
95
- pass
96
-
97
-
98
- class LifecyclePhase(enum.Enum):
99
- INITIALIZING = 1
100
- INITIALIZED = 2
101
- STARTING = 3
102
- STARTED = 4
103
- STOPPING = 5
104
- STOPPED = 6
105
- DISPOSING = 7
106
- DISPOSED = 8
107
-
108
-
109
- class LifecycleController(ABC):
110
- def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
111
- return phase is None or phase == LifecyclePhase.DISPOSED
112
-
113
- def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
114
- return phase is not None and (
115
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
116
- )
117
-
118
- def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
119
- return phase is not None and phase == LifecyclePhase.STARTED
120
-
121
- def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
122
- return phase is not None and (
123
- phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
124
- )
125
-
126
-
127
- LS = TypeVar("LS", bound=Lifecycle)
128
-
129
-
130
- class LifecycleState(LifecycleController, ABC):
131
- phase: Optional[LifecyclePhase]
132
-
133
- def __init__(self, lifecycle: LS) -> None:
134
- self.phase = None
135
- self.prev_phase = None
136
- self.lifecycle = lifecycle
137
- self.logger = logger_factory.get_logger(__name__)
138
-
139
- def is_initializing(self) -> bool:
140
- return self.phase == LifecyclePhase.INITIALIZING
141
-
142
- def is_initialized(self) -> bool:
143
- return self.phase == LifecyclePhase.INITIALIZED
144
-
145
- def is_starting(self) -> bool:
146
- return self.phase == LifecyclePhase.STARTING
147
-
148
- def is_started(self) -> bool:
149
- return self.phase == LifecyclePhase.STARTED
150
-
151
- def is_stopping(self) -> bool:
152
- return self.phase == LifecyclePhase.STOPPING
153
-
154
- def is_stopped(self) -> bool:
155
- return self.phase == LifecyclePhase.STOPPED
156
-
157
- def is_disposing(self) -> bool:
158
- return self.phase == LifecyclePhase.DISPOSING
159
-
160
- def is_disposed(self) -> bool:
161
- return self.phase == LifecyclePhase.DISPOSED
162
-
163
- def get_phase(self) -> Optional[LifecyclePhase]:
164
- return self.phase
165
-
166
- def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
167
- prev = "None"
168
- if self.phase is not None:
169
- prev = self.phase.name
170
- current = "None"
171
- if phase is not None:
172
- current = phase.name
173
- self.logger.info(
174
- "[setPhaseName][{}]{} --> {}".format(
175
- self.lifecycle.__class__.__name__,
176
- prev,
177
- current,
178
- )
179
- )
180
- self.phase = phase
181
-
182
- def rollback(self, err: Exception) -> None:
183
- self.phase = self.prev_phase
184
- self.prev_phase = None
 
 
1
+ import enum
2
+ from abc import ABC, abstractmethod
3
+ from typing import TypeVar, Optional
4
+
5
+ from core import logger_factory
6
+
7
+
8
+ class Initializable(ABC):
9
+ @abstractmethod
10
+ def initialize(self) -> None:
11
+ pass
12
+
13
+
14
+ class Startable(ABC):
15
+ @abstractmethod
16
+ def start(self) -> None:
17
+ pass
18
+
19
+
20
+ class Stoppable(ABC):
21
+ @abstractmethod
22
+ def stop(self) -> None:
23
+ pass
24
+
25
+
26
+ class Disposable(ABC):
27
+ @abstractmethod
28
+ def dispose(self) -> None:
29
+ pass
30
+
31
+
32
+ class LifecycleAware(ABC):
33
+ def __init__(self, state: "LifecycleState") -> None:
34
+ """
35
+ Args:
36
+ state(LifecycleState): lifecycle state
37
+ """
38
+ self.state = state
39
+
40
+ @property
41
+ def get_lifecycle_state(self) -> "LifecycleState":
42
+ return self.state
43
+
44
+
45
+ class Lifecycle(Initializable, Startable, Stoppable, Disposable, LifecycleAware, ABC):
46
+ def __init__(self) -> None:
47
+ self.logger = logger_factory.get_logger(self.__class__.__name__)
48
+ self.lifecycle_state = LifecycleState(lifecycle=self)
49
+
50
+ def initialize(self) -> None:
51
+ if not self.lifecycle_state.can_initialize(self.lifecycle_state.get_phase()):
52
+ self.logger.warning("[{}]cannot initialize".format(self.__class__.__name__))
53
+ return
54
+ self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZING)
55
+ self.do_init()
56
+ self.lifecycle_state.set_phase(LifecyclePhase.INITIALIZED)
57
+
58
+ def start(self) -> None:
59
+ if not self.lifecycle_state.can_start(self.lifecycle_state.get_phase()):
60
+ self.logger.warning("[{}]cannot start".format(self.__class__.__name__))
61
+ return
62
+ self.lifecycle_state.set_phase(LifecyclePhase.STARTING)
63
+ self.do_start()
64
+ self.lifecycle_state.set_phase(LifecyclePhase.STARTED)
65
+
66
+ def stop(self) -> None:
67
+ if not self.lifecycle_state.can_stop(self.lifecycle_state.get_phase()):
68
+ self.logger.warning("[{}]cannot stop".format(self.__class__.__name__))
69
+ return
70
+ self.lifecycle_state.set_phase(LifecyclePhase.STOPPING)
71
+ self.do_stop()
72
+ self.lifecycle_state.set_phase(LifecyclePhase.STOPPED)
73
+
74
+ def dispose(self) -> None:
75
+ if not self.lifecycle_state.can_dispose(self.lifecycle_state.get_phase()):
76
+ self.logger.warning("[{}]cannot dispose".format(self.__class__.__name__))
77
+ return
78
+ self.lifecycle_state.set_phase(LifecyclePhase.DISPOSING)
79
+ self.do_dispose()
80
+ self.lifecycle_state.set_phase(LifecyclePhase.DISPOSED)
81
+
82
+ @abstractmethod
83
+ def do_init(self) -> None:
84
+ pass
85
+
86
+ @abstractmethod
87
+ def do_start(self) -> None:
88
+ pass
89
+
90
+ @abstractmethod
91
+ def do_stop(self) -> None:
92
+ pass
93
+
94
+ @abstractmethod
95
+ def do_dispose(self) -> None:
96
+ pass
97
+
98
+
99
+ class LifecyclePhase(enum.Enum):
100
+ INITIALIZING = 1
101
+ INITIALIZED = 2
102
+ STARTING = 3
103
+ STARTED = 4
104
+ STOPPING = 5
105
+ STOPPED = 6
106
+ DISPOSING = 7
107
+ DISPOSED = 8
108
+
109
+
110
+ class LifecycleController(ABC):
111
+ def can_initialize(self, phase: Optional[LifecyclePhase]) -> bool:
112
+ return phase is None or phase == LifecyclePhase.DISPOSED
113
+
114
+ def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
115
+ return phase is not None and (
116
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
117
+ )
118
+
119
+ def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
120
+ return phase is not None and phase == LifecyclePhase.STARTED
121
+
122
+ def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
123
+ return phase is not None and (
124
+ phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
125
+ )
126
+
127
+
128
+ LS = TypeVar("LS", bound=Lifecycle)
129
+
130
+
131
+ class LifecycleState(LifecycleController, ABC):
132
+ phase: Optional[LifecyclePhase]
133
+
134
+ def __init__(self, lifecycle: LS) -> None:
135
+ self.phase = None
136
+ self.prev_phase = None
137
+ self.lifecycle = lifecycle
138
+ self.logger = logger_factory.get_logger(__name__)
139
+
140
+ def is_initializing(self) -> bool:
141
+ return self.phase == LifecyclePhase.INITIALIZING
142
+
143
+ def is_initialized(self) -> bool:
144
+ return self.phase == LifecyclePhase.INITIALIZED
145
+
146
+ def is_starting(self) -> bool:
147
+ return self.phase == LifecyclePhase.STARTING
148
+
149
+ def is_started(self) -> bool:
150
+ return self.phase == LifecyclePhase.STARTED
151
+
152
+ def is_stopping(self) -> bool:
153
+ return self.phase == LifecyclePhase.STOPPING
154
+
155
+ def is_stopped(self) -> bool:
156
+ return self.phase == LifecyclePhase.STOPPED
157
+
158
+ def is_disposing(self) -> bool:
159
+ return self.phase == LifecyclePhase.DISPOSING
160
+
161
+ def is_disposed(self) -> bool:
162
+ return self.phase == LifecyclePhase.DISPOSED
163
+
164
+ def get_phase(self) -> Optional[LifecyclePhase]:
165
+ return self.phase
166
+
167
+ def set_phase(self, phase: Optional[LifecyclePhase]) -> None:
168
+ prev = "None"
169
+ if self.phase is not None:
170
+ prev = self.phase.name
171
+ current = "None"
172
+ if phase is not None:
173
+ current = phase.name
174
+ self.logger.info(
175
+ "[setPhaseName][{}]{} --> {}".format(
176
+ self.lifecycle.__class__.__name__,
177
+ prev,
178
+ current,
179
+ )
180
+ )
181
+ self.phase = phase
182
+
183
+ def rollback(self, err: Exception) -> None:
184
+ self.phase = self.prev_phase
185
+ self.prev_phase = None
core/test_lifecycle.py CHANGED
@@ -1,7 +1,10 @@
 
1
  from unittest import TestCase
2
 
3
  from core.lifecycle import Lifecycle
4
 
 
 
5
 
6
  class SubLifecycle(Lifecycle):
7
  def __init__(self) -> None:
 
1
+ import logging
2
  from unittest import TestCase
3
 
4
  from core.lifecycle import Lifecycle
5
 
6
+ logging.basicConfig()
7
+
8
 
9
  class SubLifecycle(Lifecycle):
10
  def __init__(self) -> None:
dataset/docstore.json CHANGED
The diff for this file is too large to render. See raw diff
 
dataset/index_store.json CHANGED
@@ -1 +1 @@
1
- {"index_store/data": {"da495c94-4541-47e1-b93f-8535192a5f28": {"__type__": "vector_store", "__data__": "{\"index_id\": \"da495c94-4541-47e1-b93f-8535192a5f28\", \"summary\": null, \"nodes_dict\": {\"59108663-a5e1-4e3e-bb21-626158eef136\": \"59108663-a5e1-4e3e-bb21-626158eef136\", \"50de4ec9-febb-466f-9f9a-cc9296895e83\": \"50de4ec9-febb-466f-9f9a-cc9296895e83\", \"aa413a53-0dda-4ac4-8ae9-6e8e340bb4f0\": \"aa413a53-0dda-4ac4-8ae9-6e8e340bb4f0\", \"a0cc4323-ec8f-4fed-9401-e44125134341\": \"a0cc4323-ec8f-4fed-9401-e44125134341\", \"5321cc7b-2a86-48b8-b56c-415dde7c149b\": \"5321cc7b-2a86-48b8-b56c-415dde7c149b\", \"9e19fb91-8258-4aca-9692-2d027073499e\": \"9e19fb91-8258-4aca-9692-2d027073499e\", \"02e856e5-4211-4a27-9204-e966907f1d74\": \"02e856e5-4211-4a27-9204-e966907f1d74\", \"f3074870-8fbf-4322-b1d2-2111e6aac9af\": \"f3074870-8fbf-4322-b1d2-2111e6aac9af\", \"82677fb9-abe3-4038-8263-5576c47da4f2\": \"82677fb9-abe3-4038-8263-5576c47da4f2\", \"a08364a6-c23d-4df5-8b5d-84137fbebd4e\": \"a08364a6-c23d-4df5-8b5d-84137fbebd4e\", \"e45b082d-c3ec-45aa-b630-6db49a62728b\": \"e45b082d-c3ec-45aa-b630-6db49a62728b\", \"2c55445c-04b1-4705-9871-adaa02f38f1b\": \"2c55445c-04b1-4705-9871-adaa02f38f1b\", \"d0de9736-ccad-450e-b4a1-49d4cdb8b941\": \"d0de9736-ccad-450e-b4a1-49d4cdb8b941\", \"fd0d2375-39e2-4bce-8e39-1182a122a1b4\": \"fd0d2375-39e2-4bce-8e39-1182a122a1b4\", \"13221de7-6c68-4367-b1be-f35b06fc3a74\": \"13221de7-6c68-4367-b1be-f35b06fc3a74\", \"9f448401-cda9-4b5f-9a80-c79e111f9963\": \"9f448401-cda9-4b5f-9a80-c79e111f9963\", \"3bc7dfc2-3ddf-4384-a60c-6cd52e1314f4\": \"3bc7dfc2-3ddf-4384-a60c-6cd52e1314f4\", \"ce3e530c-ce2d-4f5f-a171-72a790c3c624\": \"ce3e530c-ce2d-4f5f-a171-72a790c3c624\", \"85f764bd-e560-48ba-a51e-2287b6fe19db\": \"85f764bd-e560-48ba-a51e-2287b6fe19db\", \"3a8e4c7c-9f7d-4735-93e7-9d847cff98de\": \"3a8e4c7c-9f7d-4735-93e7-9d847cff98de\", \"af881b61-03f4-4851-8946-794015e3436c\": \"af881b61-03f4-4851-8946-794015e3436c\", \"31579820-439e-4029-b8c4-a0d6528daa59\": \"31579820-439e-4029-b8c4-a0d6528daa59\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
 
1
+ {"index_store/data": {"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {"__type__": "vector_store", "__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}
dataset/vector_store.json CHANGED
The diff for this file is too large to render. See raw diff
 
docs/docs.pkl CHANGED
Binary files a/docs/docs.pkl and b/docs/docs.pkl differ
 
github_retriever.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
2
+ from llama_index import download_loader, GPTVectorStoreIndex
3
+ from llama_index import LLMPredictor, VectorStoreIndex, ServiceContext
4
+ from langchain.llms import AzureOpenAI
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from llama_index import LangchainEmbedding, ServiceContext
7
+ from llama_index import StorageContext, load_index_from_storage
8
+ from dotenv import load_dotenv
9
+ import os
10
+ import pickle
11
+
12
+
13
+ def main() -> None:
14
+ # define embedding
15
+ embedding = LangchainEmbedding(OpenAIEmbeddings(chunk_size=1))
16
+ # define LLM
17
+ llm_predictor = LLMPredictor(
18
+ llm=AzureOpenAI(
19
+ engine="text-davinci-003",
20
+ model_name="text-davinci-003",
21
+ )
22
+ )
23
+
24
+ # configure service context
25
+ service_context = ServiceContext.from_defaults(
26
+ llm_predictor=llm_predictor, embed_model=embedding
27
+ )
28
+ download_loader("GithubRepositoryReader")
29
+ docs = None
30
+ if os.path.exists("docs/docs.pkl"):
31
+ with open("docs/docs.pkl", "rb") as f:
32
+ docs = pickle.load(f)
33
+
34
+ if docs is None:
35
+ github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
36
+ loader = GithubRepositoryReader(
37
+ github_client,
38
+ owner="ctripcorp",
39
+ repo="x-pipe",
40
+ filter_directories=(
41
+ [".", "doc"],
42
+ GithubRepositoryReader.FilterType.INCLUDE,
43
+ ),
44
+ filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
45
+ verbose=True,
46
+ concurrent_requests=10,
47
+ )
48
+
49
+ docs = loader.load_data(branch="master")
50
+
51
+ with open("docs/docs.pkl", "wb") as f:
52
+ pickle.dump(docs, f)
53
+
54
+ index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
55
+
56
+ query_engine = index.as_query_engine(service_context=service_context)
57
+ response = query_engine.query("如何使用X-Pipe?")
58
+ print(response)
59
+
60
+
61
+ if __name__ == "__main__":
62
+ load_dotenv()
63
+ main()
langchain_manager/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (154 Bytes). View file
 
langchain_manager/manager.py CHANGED
@@ -1,12 +1,14 @@
1
  from abc import abstractmethod, ABC
2
 
3
- from langchain.base_language import BaseLanguageModel
4
- from langchain.embeddings.base import Embeddings as LCEmbeddings
5
- from langchain.embeddings.openai import OpenAIEmbeddings
6
- from langchain.llms import AzureOpenAI
7
 
 
8
 
9
- class BaseLangChainManager(ABC):
 
10
  def __init__(self) -> None:
11
  super().__init__()
12
 
@@ -22,49 +24,16 @@ class BaseLangChainManager(ABC):
22
  class LangChainAzureManager(BaseLangChainManager):
23
  def __init__(self) -> None:
24
  super().__init__()
25
- self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
26
- self.llm = AzureOpenAI(
27
- deployment_name="text-davinci-003",
28
- # model_name="text-davinci-003",
29
- model="text-davinci-003",
30
- client=None,
31
- # temperature set to 0.0(default 0.7) to get a certain answer from OpenAI,
32
- # as a wiki robot we won't want to get flexible answers
33
- temperature=0.0,
34
- # GPT-3 default is 4096, however, openai.py default is 256
35
- max_tokens=2048,
36
- )
37
 
38
  # Override
39
  def get_embedding(self) -> LCEmbeddings:
40
- return self.embedding
41
 
42
  # Override
43
  def get_llm(self) -> BaseLanguageModel:
44
- return self.llm
45
-
46
-
47
- class LangChainHuggingFaceManager(BaseLangChainManager):
48
- def __init__(self) -> None:
49
- super().__init__()
50
- from transformers import AutoTokenizer, AutoModel
51
-
52
- AutoTokenizer.from_pretrained("GanymedeNil/text2vec-large-chinese")
53
-
54
- AutoModel.from_pretrained("GanymedeNil/text2vec-large-chinese")
55
-
56
- self.embedding = OpenAIEmbeddings(client=None, chunk_size=1)
57
- self.llm = AzureOpenAI(
58
  deployment_name="text-davinci-003",
59
  # model_name="text-davinci-003",
60
  model="text-davinci-003",
61
  client=None,
62
  )
63
-
64
- # Override
65
- def get_embedding(self) -> LCEmbeddings:
66
- return self.embedding
67
-
68
- # Override
69
- def get_llm(self) -> BaseLanguageModel:
70
- return self.llm
 
1
  from abc import abstractmethod, ABC
2
 
3
+ from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
4
+ from langchain_manager.embeddings.openai import OpenAIEmbeddings
5
+ from langchain_manager.llms import AzureOpenAI
6
+ from langchain_manager.base_language import BaseLanguageModel
7
 
8
+ from core.lifecycle import Lifecycle
9
 
10
+
11
+ class BaseLangChainManager(Lifecycle, ABC):
12
  def __init__(self) -> None:
13
  super().__init__()
14
 
 
24
  class LangChainAzureManager(BaseLangChainManager):
25
  def __init__(self) -> None:
26
  super().__init__()
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  # Override
29
  def get_embedding(self) -> LCEmbeddings:
30
+ return OpenAIEmbeddings(client=None, chunk_size=1)
31
 
32
  # Override
33
  def get_llm(self) -> BaseLanguageModel:
34
+ return AzureOpenAI(
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  deployment_name="text-davinci-003",
36
  # model_name="text-davinci-003",
37
  model="text-davinci-003",
38
  client=None,
39
  )
 
 
 
 
 
 
 
 
llama/context.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
2
+ from type import Optional
3
+ from core.lifecycle import Lifecycle
4
+ from langchain.manager import BaseLangChainManager
5
+
6
+
7
+ class ServiceContextManager(Lifecycle):
8
+ service_context: Optional[ServiceContext]
9
+
10
+ def __init__(self, manager: BaseLangChainManager) -> None:
11
+ super().__init__()
12
+ self.manager = manager
13
+ self.service_context = None
14
+
15
+ def get_service_context(self) -> ServiceContext:
16
+ if self.lifecycle_state.is_started():
17
+ raise KeyError(
18
+ "incorrect lifecycle state: {}".format(self.lifecycle_state.phase)
19
+ )
20
+ if self.service_context is None:
21
+ raise ValueError(
22
+ "service context is not ready, check for lifecycle statement"
23
+ )
24
+ return self.service_context
25
+
26
+ def do_init(self) -> None:
27
+ # define embedding
28
+ embedding = LangchainEmbedding(self.manager.get_embedding())
29
+ # define LLM
30
+ llm_predictor = LLMPredictor(llm=self.manager.get_llm())
31
+ # configure service context
32
+ self.service_context = ServiceContext.from_defaults(
33
+ llm_predictor=llm_predictor, embed_model=embedding
34
+ )
35
+
36
+ def do_start(self) -> None:
37
+ pass
38
+
39
+ def do_stop(self) -> None:
40
+ pass
41
+
42
+ def do_dispose(self) -> None:
43
+ pass
44
+
45
+
46
+ class StorageContextManager(Lifecycle):
47
+ def __init__(self, dataset_path: Optional[str] = "./dataset") -> None:
48
+ super().__init__()
49
+ self.dataset_path = dataset_path
50
+
51
+ def do_init(self) -> None:
52
+ pass
53
+
54
+ def do_start(self) -> None:
55
+ pass
56
+
57
+ def do_stop(self) -> None:
58
+ pass
59
+
60
+ def do_dispose(self) -> None:
61
+ pass
llama/data_loader.py CHANGED
@@ -7,6 +7,8 @@ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
7
  from llama_index import download_loader
8
  from llama_index.readers.schema.base import Document
9
 
 
 
10
 
11
  class WikiLoader(ABC):
12
  @abstractmethod
@@ -14,7 +16,7 @@ class WikiLoader(ABC):
14
  pass
15
 
16
 
17
- class GithubLoader(WikiLoader):
18
  def __init__(
19
  self,
20
  github_owner: Optional[str] = None,
@@ -51,8 +53,7 @@ class GithubLoader(WikiLoader):
51
  verbose=True,
52
  concurrent_requests=10,
53
  )
54
- os.environ["http_proxy"] = "http://127.0.0.1:7890"
55
- os.environ["https_proxy"] = "http://127.0.0.1:7890"
56
  docs = loader.load_data(branch="master")
57
 
58
  with open("docs/docs.pkl", "wb") as f:
 
7
  from llama_index import download_loader
8
  from llama_index.readers.schema.base import Document
9
 
10
+ from core.lifecycle import Lifecycle
11
+
12
 
13
  class WikiLoader(ABC):
14
  @abstractmethod
 
16
  pass
17
 
18
 
19
+ class GithubLoader(WikiLoader, Lifecycle):
20
  def __init__(
21
  self,
22
  github_owner: Optional[str] = None,
 
53
  verbose=True,
54
  concurrent_requests=10,
55
  )
56
+
 
57
  docs = loader.load_data(branch="master")
58
 
59
  with open("docs/docs.pkl", "wb") as f:
llama/index.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.lifecycle import Lifecycle
2
+ from llama.context import ServiceContextManager
3
+ from llama_index.indices.vector_store import VectorStoreIndex
4
+ from typing import Optional
5
+
6
+
7
+ class IndexManager(Lifecycle):
8
+ index: Optional[VectorStoreIndex]
9
+
10
+ def __init__(self, context_manager: ServiceContextManager) -> None:
11
+ super().__init__()
12
+ self.index = None
13
+ self.context_manager = context_manager
14
+
15
+ def get_index(self) -> Optional[VectorStoreIndex]:
16
+ if not self.lifecycle_state.is_started():
17
+ raise Exception("Lifecycle state is not correct")
18
+ return self.index
llama/service_context.py DELETED
@@ -1,142 +0,0 @@
1
- from abc import abstractmethod, ABC
2
-
3
- from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding
4
-
5
- from core.lifecycle import Lifecycle
6
- from langchain_manager.manager import BaseLangChainManager
7
-
8
-
9
- # def get_callback_manager() -> CallbackManager:
10
- # from llama_index.callbacks import (
11
- # WandbCallbackHandler,
12
- # CallbackManager,
13
- # LlamaDebugHandler,
14
- # )
15
- # llama_debug = LlamaDebugHandler(print_trace_on_end=True)
16
- # # wandb.init args
17
- # run_args = dict(
18
- # project="llamaindex",
19
- # )
20
- # wandb_callback = WandbCallbackHandler(run_args=run_args)
21
- # return CallbackManager([llama_debug, wandb_callback])
22
-
23
-
24
- class ServiceContextManager(Lifecycle, ABC):
25
- @abstractmethod
26
- def get_service_context(self) -> ServiceContext:
27
- pass
28
-
29
-
30
- class AzureServiceContextManager(ServiceContextManager):
31
- lc_manager: BaseLangChainManager
32
- service_context: ServiceContext
33
-
34
- def __init__(self, lc_manager: BaseLangChainManager):
35
- super().__init__()
36
- self.lc_manager = lc_manager
37
-
38
- def get_service_context(self) -> ServiceContext:
39
- if self.service_context is None:
40
- raise ValueError(
41
- "service context is not ready, check for lifecycle statement"
42
- )
43
- return self.service_context
44
-
45
- def do_init(self) -> None:
46
- # define embedding
47
- embedding = LangchainEmbedding(self.lc_manager.get_embedding())
48
- # define LLM
49
- llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
50
- # configure service context
51
- self.service_context = ServiceContext.from_defaults(
52
- llm_predictor=llm_predictor,
53
- embed_model=embedding,
54
- # callback_manager=get_callback_manager(),
55
- )
56
-
57
- def do_start(self) -> None:
58
- self.logger.info(
59
- "[do_start][embedding] last used usage: %d",
60
- self.service_context.embed_model.total_tokens_used,
61
- )
62
- self.logger.info(
63
- "[do_start][predict] last used usage: %d",
64
- self.service_context.llm_predictor.total_tokens_used,
65
- )
66
-
67
- def do_stop(self) -> None:
68
- self.logger.info(
69
- "[do_stop][embedding] last used usage: %d",
70
- self.service_context.embed_model.total_tokens_used,
71
- )
72
- self.logger.info(
73
- "[do_stop][predict] last used usage: %d",
74
- self.service_context.llm_predictor.total_tokens_used,
75
- )
76
-
77
- def do_dispose(self) -> None:
78
- self.logger.info(
79
- "[do_dispose] total used token: %d",
80
- self.service_context.llm_predictor.total_tokens_used,
81
- )
82
-
83
-
84
- class HuggingFaceChineseOptServiceContextManager(ServiceContextManager):
85
- lc_manager: BaseLangChainManager
86
- service_context: ServiceContext
87
-
88
- def __init__(self, lc_manager: BaseLangChainManager):
89
- super().__init__()
90
- self.lc_manager = lc_manager
91
-
92
- def get_service_context(self) -> ServiceContext:
93
- if self.service_context is None:
94
- raise ValueError(
95
- "service context is not ready, check for lifecycle statement"
96
- )
97
- return self.service_context
98
-
99
- def do_init(self) -> None:
100
- # define embedding
101
- from langchain.embeddings import HuggingFaceEmbeddings
102
-
103
- model_name = "GanymedeNil/text2vec-large-chinese"
104
- hf_embedding = HuggingFaceEmbeddings(
105
- model_name=model_name, model_kwargs={"device": "cpu"}
106
- )
107
-
108
- embedding = LangchainEmbedding(hf_embedding)
109
- # define LLM
110
- llm_predictor = LLMPredictor(self.lc_manager.get_llm())
111
- # configure service context
112
- self.service_context = ServiceContext.from_defaults(
113
- llm_predictor=llm_predictor,
114
- embed_model=embedding,
115
- # callback_manager=get_callback_manager()
116
- )
117
-
118
- def do_start(self) -> None:
119
- self.logger.info(
120
- "[do_start][embedding] last used usage: %d",
121
- self.service_context.embed_model.total_tokens_used,
122
- )
123
- self.logger.info(
124
- "[do_start][predict] last used usage: %d",
125
- self.service_context.llm_predictor.total_tokens_used,
126
- )
127
-
128
- def do_stop(self) -> None:
129
- self.logger.info(
130
- "[do_stop][embedding] last used usage: %d",
131
- self.service_context.embed_model.total_tokens_used,
132
- )
133
- self.logger.info(
134
- "[do_stop][predict] last used usage: %d",
135
- self.service_context.llm_predictor.total_tokens_used,
136
- )
137
-
138
- def do_dispose(self) -> None:
139
- self.logger.info(
140
- "[do_dispose] total used token: %d",
141
- self.service_context.llm_predictor.total_tokens_used,
142
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama/storage_context.py DELETED
@@ -1,67 +0,0 @@
1
- from llama_index import StorageContext
2
- from typing import List
3
- from abc import abstractmethod, ABC
4
-
5
- from llama_index import Document
6
-
7
- from core.lifecycle import Lifecycle
8
- from llama.service_context import ServiceContextManager
9
-
10
-
11
- class StorageContextManager(Lifecycle, ABC):
12
- @abstractmethod
13
- def get_storage_context(self) -> StorageContext:
14
- pass
15
-
16
-
17
- class LocalStorageContextManager(StorageContextManager):
18
- storage_context: StorageContext
19
-
20
- def __init__(
21
- self,
22
- service_context_manager: ServiceContextManager,
23
- dataset_path: str = "./dataset",
24
- ) -> None:
25
- super().__init__()
26
- self.dataset_path = dataset_path
27
- self.service_context_manager = service_context_manager
28
-
29
- def get_storage_context(self) -> StorageContext:
30
- return self.storage_context
31
-
32
- def do_init(self) -> None:
33
- from llama.utils import is_local_storage_files_ready
34
-
35
- if is_local_storage_files_ready(self.dataset_path):
36
- self.storage_context = StorageContext.from_defaults(
37
- persist_dir=self.dataset_path
38
- )
39
- else:
40
- docs = self._download()
41
- self._indexing(docs)
42
-
43
- def do_start(self) -> None:
44
- # self.logger.info("[do_start]%", **self.storage_context.to_dict())
45
- pass
46
-
47
- def do_stop(self) -> None:
48
- # self.logger.info("[do_stop]%", **self.storage_context.to_dict())
49
- pass
50
-
51
- def do_dispose(self) -> None:
52
- self.storage_context.persist(self.dataset_path)
53
-
54
- def _download(self) -> List[Document]:
55
- from llama.data_loader import GithubLoader
56
-
57
- loader = GithubLoader()
58
- return loader.load()
59
-
60
- def _indexing(self, docs: List[Document]) -> None:
61
- from llama_index import GPTVectorStoreIndex
62
-
63
- index = GPTVectorStoreIndex.from_documents(
64
- docs, service_context=self.service_context_manager.get_service_context()
65
- )
66
- index.storage_context.persist(persist_dir=self.dataset_path)
67
- self.storage_context = index.storage_context
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama/utils.py DELETED
@@ -1,5 +0,0 @@
1
- import os
2
-
3
-
4
- def is_local_storage_files_ready(persist_dir: str) -> bool:
5
- return os.path.exists(persist_dir) and len(os.listdir(persist_dir)) != 0
 
 
 
 
 
 
llama/vector_storage.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from core.lifecycle import Lifecycle
2
+
3
+
4
+ class VectorStorageManager(Lifecycle):
5
+ def __init__(self) -> None:
6
+ super().__init__()
7
+
8
+ def do_init(self) -> None:
9
+ pass
10
+
11
+ def do_start(self) -> None:
12
+ pass
13
+
14
+ def do_stop(self) -> None:
15
+ pass
16
+
17
+ def do_dispose(self) -> None:
18
+ pass
local-requirements.txt DELETED
@@ -1 +0,0 @@
1
- python-dotenv
 
 
pyproject.toml CHANGED
@@ -1,7 +1,7 @@
1
  [tool.mypy]
2
  ignore_missing_imports = "True"
3
  disallow_untyped_defs = "True"
4
- exclude = ["notebooks", "build", "examples", "docs", "dataset", "github_retriever.py"]
5
 
6
  [tool.ruff]
7
  exclude = [
@@ -14,5 +14,6 @@ exclude = [
14
  "notebooks",
15
  "docs",
16
  "dataset",
 
17
  "github_retriever.py"
18
  ]
 
1
  [tool.mypy]
2
  ignore_missing_imports = "True"
3
  disallow_untyped_defs = "True"
4
+ exclude = ["notebooks", "build", "examples", "docs", "dataset", "app.py", "github_retriever.py"]
5
 
6
  [tool.ruff]
7
  exclude = [
 
14
  "notebooks",
15
  "docs",
16
  "dataset",
17
+ "app.py",
18
  "github_retriever.py"
19
  ]
requirements.txt CHANGED
@@ -1,10 +1,7 @@
1
- llama_index>=0.6.3
2
- llama_hub
3
- streamlit
4
- ruff
5
- black
6
- mypy
7
- accelerate
8
- python-dotenv
9
- sentence_transformers
10
- wandb
 
1
+ langchain>=0.0.154
2
+ openai>=0.26.4
3
+ llama_index>=0.6.32
4
+ llama_hub
5
+ ruff
6
+ black
7
+ mypy
 
 
 
xpipe_wiki/__init__.py DELETED
File without changes
xpipe_wiki/manager_factory.py DELETED
@@ -1,82 +0,0 @@
1
- import enum
2
- import os
3
-
4
- from core.helper import LifecycleHelper
5
- from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
6
-
7
- from multiprocessing import Lock
8
-
9
- lock = Lock()
10
-
11
-
12
- class XPipeRobotRevision(enum.Enum):
13
- SIMPLE_OPENAI_VERSION_0 = 1
14
- HUGGINGFACE_VERSION_0 = 2
15
-
16
-
17
- class XPipeRobotManagerFactory:
18
- """
19
- CAPABLE: Dict[XPipeRobotRevision, XPipeWikiRobotManager] =
20
- {XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0: XPipeWikiRobotManager()}
21
- """
22
-
23
- CAPABLE = dict() # type: dict[XPipeRobotRevision, XPipeWikiRobotManager]
24
-
25
- @classmethod
26
- def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
27
- with lock:
28
- if cls.CAPABLE.get(revision) is not None:
29
- return cls.CAPABLE[revision]
30
- if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
31
- manager = cls.create_simple_openai_version_0()
32
- elif revision == XPipeRobotRevision.HUGGINGFACE_VERSION_0:
33
- manager = cls.create_huggingface_version_0()
34
- cls.CAPABLE[revision] = manager
35
- return manager
36
-
37
- @classmethod
38
- def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
39
- from llama.service_context import AzureServiceContextManager
40
- from langchain_manager.manager import LangChainAzureManager
41
-
42
- service_context_manager = AzureServiceContextManager(
43
- lc_manager=LangChainAzureManager()
44
- )
45
- from llama.storage_context import LocalStorageContextManager
46
-
47
- dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
48
- storage_context_manager = LocalStorageContextManager(
49
- dataset_path=dataset_path, service_context_manager=service_context_manager
50
- )
51
-
52
- robot_manager = AzureXPipeWikiRobotManager(
53
- service_context_manager=service_context_manager,
54
- storage_context_manager=storage_context_manager,
55
- )
56
- LifecycleHelper.initialize_if_possible(robot_manager)
57
- LifecycleHelper.start_if_possible(robot_manager)
58
- return robot_manager
59
-
60
- @classmethod
61
- def create_huggingface_version_0(cls) -> AzureXPipeWikiRobotManager:
62
- from llama.service_context import HuggingFaceChineseOptServiceContextManager
63
- from langchain_manager.manager import LangChainAzureManager
64
-
65
- service_context_manager = HuggingFaceChineseOptServiceContextManager(
66
- lc_manager=LangChainAzureManager()
67
- )
68
-
69
- from llama.storage_context import LocalStorageContextManager
70
-
71
- dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
72
- storage_context_manager = LocalStorageContextManager(
73
- dataset_path=dataset_path, service_context_manager=service_context_manager
74
- )
75
-
76
- robot_manager = AzureXPipeWikiRobotManager(
77
- service_context_manager=service_context_manager,
78
- storage_context_manager=storage_context_manager,
79
- )
80
- LifecycleHelper.initialize_if_possible(robot_manager)
81
- LifecycleHelper.start_if_possible(robot_manager)
82
- return robot_manager
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
xpipe_wiki/robot_manager.py DELETED
@@ -1,79 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from typing import Any
3
-
4
- from llama_index import load_index_from_storage
5
- from llama_index.indices.query.base import BaseQueryEngine
6
- from llama_index.indices.response import ResponseMode
7
-
8
- from core.helper import LifecycleHelper
9
- from core.lifecycle import Lifecycle
10
- from llama.service_context import ServiceContextManager
11
- from llama.storage_context import StorageContextManager
12
-
13
-
14
- class XPipeWikiRobot(ABC):
15
- @abstractmethod
16
- def ask(self, question: str) -> Any:
17
- pass
18
-
19
-
20
- class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
21
- query_engine: BaseQueryEngine
22
-
23
- def __init__(self, query_engine: BaseQueryEngine) -> None:
24
- super().__init__()
25
- self.query_engine = query_engine
26
-
27
- def ask(self, question: str) -> Any:
28
- print("question: ", question)
29
- response = self.query_engine.query(question)
30
- print("response type: ", type(response))
31
- return response.__str__()
32
-
33
-
34
- class XPipeWikiRobotManager(Lifecycle):
35
- @abstractmethod
36
- def get_robot(self) -> XPipeWikiRobot:
37
- pass
38
-
39
-
40
- class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
41
- service_context_manager: ServiceContextManager
42
- storage_context_manager: StorageContextManager
43
- query_engine: BaseQueryEngine
44
-
45
- def __init__(
46
- self,
47
- service_context_manager: ServiceContextManager,
48
- storage_context_manager: StorageContextManager,
49
- ) -> None:
50
- super().__init__()
51
- self.service_context_manager = service_context_manager
52
- self.storage_context_manager = storage_context_manager
53
-
54
- def get_robot(self) -> XPipeWikiRobot:
55
- return AzureOpenAIXPipeWikiRobot(self.query_engine)
56
-
57
- def do_init(self) -> None:
58
- LifecycleHelper.initialize_if_possible(self.service_context_manager)
59
- LifecycleHelper.initialize_if_possible(self.storage_context_manager)
60
-
61
- def do_start(self) -> None:
62
- LifecycleHelper.start_if_possible(self.service_context_manager)
63
- LifecycleHelper.start_if_possible(self.storage_context_manager)
64
- index = load_index_from_storage(
65
- storage_context=self.storage_context_manager.get_storage_context(),
66
- service_context=self.service_context_manager.get_service_context(),
67
- )
68
- self.query_engine = index.as_query_engine(
69
- service_context=self.service_context_manager.get_service_context(),
70
- response_mode=ResponseMode.TREE_SUMMARIZE,
71
- )
72
-
73
- def do_stop(self) -> None:
74
- LifecycleHelper.stop_if_possible(self.storage_context_manager)
75
- LifecycleHelper.stop_if_possible(self.service_context_manager)
76
-
77
- def do_dispose(self) -> None:
78
- LifecycleHelper.dispose_if_possible(self.storage_context_manager)
79
- LifecycleHelper.dispose_if_possible(self.service_context_manager)