Spaces:
Sleeping
Sleeping
refactor
Browse files- Makefile +1 -1
- README.md +44 -0
- app.py +6 -60
- core/helper.py +24 -0
- core/lifecycle.py +2 -3
- dataset/docstore.json +0 -0
- dataset/graph_store.json +3 -1
- dataset/index_store.json +8 -1
- dataset/vector_store.json +0 -0
- langchain_manager/manager.py +1 -1
- llama/context.py +66 -18
- llama/data_loader.py +5 -7
- llama/utils.py +5 -0
- pyproject.toml +1 -2
- requirements.txt +2 -3
- xpipe_wiki/__init__.py +0 -0
- xpipe_wiki/manager_factory.py +42 -0
- xpipe_wiki/robot_manager.py +58 -0
Makefile
CHANGED
@@ -8,7 +8,7 @@ format:
|
|
8 |
lint:
|
9 |
mypy .
|
10 |
black . --check
|
11 |
-
ruff check .
|
12 |
|
13 |
test:
|
14 |
pytest tests
|
|
|
8 |
lint:
|
9 |
mypy .
|
10 |
black . --check
|
11 |
+
ruff check . --fix
|
12 |
|
13 |
test:
|
14 |
pytest tests
|
README.md
CHANGED
@@ -11,3 +11,47 @@ license: apache-2.0
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
+
|
15 |
+
Then, create a new Python virtual environment. The command below creates an environment in `.venv`,
|
16 |
+
and activates it:
|
17 |
+
|
18 |
+
```bash
|
19 |
+
python -m venv .venv
|
20 |
+
source .venv/bin/activate
|
21 |
+
```
|
22 |
+
|
23 |
+
if you are in windows, use the following to activate your virtual environment:
|
24 |
+
|
25 |
+
```bash
|
26 |
+
.venv\scripts\activate
|
27 |
+
```
|
28 |
+
|
29 |
+
Install the required dependencies (this will also install gpt-index through `pip install -e .`
|
30 |
+
so that you can start developing on it):
|
31 |
+
|
32 |
+
```bash
|
33 |
+
pip install -r requirements.txt
|
34 |
+
```
|
35 |
+
|
36 |
+
Now you should be set!
|
37 |
+
|
38 |
+
### Validating your Change
|
39 |
+
|
40 |
+
Let's make sure to `format/lint` our change. For bigger changes,
|
41 |
+
let's also make sure to `test` it and perhaps create an `example notebook`.
|
42 |
+
|
43 |
+
#### Formatting/Linting
|
44 |
+
|
45 |
+
You can format and lint your changes with the following commands in the root directory:
|
46 |
+
|
47 |
+
```bash
|
48 |
+
make format; make lint
|
49 |
+
```
|
50 |
+
|
51 |
+
You can also make use of our pre-commit hooks by setting up git hook scripts:
|
52 |
+
|
53 |
+
```bash
|
54 |
+
pre-commit install
|
55 |
+
```
|
56 |
+
|
57 |
+
We run an assortment of linters: `black`, `ruff`, `mypy`.
|
app.py
CHANGED
@@ -1,15 +1,9 @@
|
|
1 |
-
from llama_hub.github_repo import GithubRepositoryReader, GithubClient
|
2 |
-
from llama_index import download_loader, GPTVectorStoreIndex
|
3 |
-
from llama_index import LLMPredictor, ServiceContext, LangchainEmbedding
|
4 |
-
from langchain.llms import AzureOpenAI
|
5 |
-
from langchain.embeddings.openai import OpenAIEmbeddings
|
6 |
-
import os
|
7 |
-
import pickle
|
8 |
-
import streamlit as st
|
9 |
-
|
10 |
import logging
|
11 |
import sys
|
12 |
|
|
|
|
|
|
|
13 |
|
14 |
logging.basicConfig(
|
15 |
stream=sys.stdout, level=logging.DEBUG
|
@@ -34,59 +28,11 @@ with st.sidebar:
|
|
34 |
|
35 |
def main() -> None:
|
36 |
st.header("X-Pipe Wiki 机器人 💬")
|
37 |
-
|
38 |
-
|
39 |
-
# define LLM
|
40 |
-
llm_predictor = LLMPredictor(
|
41 |
-
llm=AzureOpenAI(
|
42 |
-
deployment_name="text-davinci-003",
|
43 |
-
model="text-davinci-003",
|
44 |
-
client=None,
|
45 |
-
)
|
46 |
-
)
|
47 |
-
|
48 |
-
# configure service context
|
49 |
-
service_context = ServiceContext.from_defaults(
|
50 |
-
llm_predictor=llm_predictor, embed_model=embedding
|
51 |
-
)
|
52 |
-
download_loader("GithubRepositoryReader")
|
53 |
-
docs = None
|
54 |
-
if os.path.exists("docs/docs.pkl"):
|
55 |
-
with open("docs/docs.pkl", "rb") as f:
|
56 |
-
docs = pickle.load(f)
|
57 |
-
|
58 |
-
if docs is None:
|
59 |
-
github_client = GithubClient(os.getenv("GITHUB_TOKEN"))
|
60 |
-
loader = GithubRepositoryReader(
|
61 |
-
github_client,
|
62 |
-
owner="ctripcorp",
|
63 |
-
repo="x-pipe",
|
64 |
-
filter_directories=(
|
65 |
-
[".", "doc"],
|
66 |
-
GithubRepositoryReader.FilterType.INCLUDE,
|
67 |
-
),
|
68 |
-
filter_file_extensions=([".md"], GithubRepositoryReader.FilterType.INCLUDE),
|
69 |
-
verbose=True,
|
70 |
-
concurrent_requests=10,
|
71 |
-
)
|
72 |
-
|
73 |
-
docs = loader.load_data(branch="master")
|
74 |
-
|
75 |
-
with open("docs/docs.pkl", "wb") as f:
|
76 |
-
pickle.dump(docs, f)
|
77 |
-
|
78 |
-
index = GPTVectorStoreIndex.from_documents(docs, service_context=service_context)
|
79 |
-
|
80 |
-
query_engine = index.as_query_engine(service_context=service_context)
|
81 |
-
|
82 |
query = st.text_input("X-Pipe Wiki 问题:")
|
83 |
if query:
|
84 |
-
|
85 |
-
docs, service_context=service_context
|
86 |
-
)
|
87 |
-
|
88 |
-
query_engine = index.as_query_engine(service_context=service_context)
|
89 |
-
response = query_engine.query(query)
|
90 |
st.write(response)
|
91 |
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import logging
|
2 |
import sys
|
3 |
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
from xpipe_wiki.manager_factory import XPipeRobotManagerFactory, XPipeRobotRevision
|
7 |
|
8 |
logging.basicConfig(
|
9 |
stream=sys.stdout, level=logging.DEBUG
|
|
|
28 |
|
29 |
def main() -> None:
|
30 |
st.header("X-Pipe Wiki 机器人 💬")
|
31 |
+
robot_manager = XPipeRobotManagerFactory.get_or_create(XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0)
|
32 |
+
robot = robot_manager.get_robot()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
query = st.text_input("X-Pipe Wiki 问题:")
|
34 |
if query:
|
35 |
+
response = robot.ask(question=query)
|
|
|
|
|
|
|
|
|
|
|
36 |
st.write(response)
|
37 |
|
38 |
|
core/helper.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from core.lifecycle import Lifecycle
|
2 |
+
|
3 |
+
|
4 |
+
class LifecycleHelper:
|
5 |
+
|
6 |
+
@classmethod
|
7 |
+
def initialize_if_possible(cls, ls: Lifecycle) -> None:
|
8 |
+
if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_initialize(ls.lifecycle_state.phase):
|
9 |
+
ls.initialize()
|
10 |
+
|
11 |
+
@classmethod
|
12 |
+
def start_if_possible(cls, ls: Lifecycle) -> None:
|
13 |
+
if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_start(ls.lifecycle_state.phase):
|
14 |
+
ls.start()
|
15 |
+
|
16 |
+
@classmethod
|
17 |
+
def stop_if_possible(cls, ls: Lifecycle) -> None:
|
18 |
+
if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_stop(ls.lifecycle_state.phase):
|
19 |
+
ls.stop()
|
20 |
+
|
21 |
+
@classmethod
|
22 |
+
def dispose_if_possible(cls, ls: Lifecycle) -> None:
|
23 |
+
if isinstance(ls, Lifecycle) and ls.get_lifecycle_state().can_dispose(ls.lifecycle_state.phase):
|
24 |
+
ls.dispose()
|
core/lifecycle.py
CHANGED
@@ -37,7 +37,6 @@ class LifecycleAware(ABC):
|
|
37 |
"""
|
38 |
self.state = state
|
39 |
|
40 |
-
@property
|
41 |
def get_lifecycle_state(self) -> "LifecycleState":
|
42 |
return self.state
|
43 |
|
@@ -113,7 +112,7 @@ class LifecycleController(ABC):
|
|
113 |
|
114 |
def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
|
115 |
return phase is not None and (
|
116 |
-
|
117 |
)
|
118 |
|
119 |
def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
|
@@ -121,7 +120,7 @@ class LifecycleController(ABC):
|
|
121 |
|
122 |
def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
|
123 |
return phase is not None and (
|
124 |
-
|
125 |
)
|
126 |
|
127 |
|
|
|
37 |
"""
|
38 |
self.state = state
|
39 |
|
|
|
40 |
def get_lifecycle_state(self) -> "LifecycleState":
|
41 |
return self.state
|
42 |
|
|
|
112 |
|
113 |
def can_start(self, phase: Optional[LifecyclePhase]) -> bool:
|
114 |
return phase is not None and (
|
115 |
+
phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
|
116 |
)
|
117 |
|
118 |
def can_stop(self, phase: Optional[LifecyclePhase]) -> bool:
|
|
|
120 |
|
121 |
def can_dispose(self, phase: Optional[LifecyclePhase]) -> bool:
|
122 |
return phase is not None and (
|
123 |
+
phase == LifecyclePhase.INITIALIZED or phase == LifecyclePhase.STOPPED
|
124 |
)
|
125 |
|
126 |
|
dataset/docstore.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
dataset/graph_store.json
CHANGED
@@ -1 +1,3 @@
|
|
1 |
-
{
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"graph_dict": {}
|
3 |
+
}
|
dataset/index_store.json
CHANGED
@@ -1 +1,8 @@
|
|
1 |
-
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"index_store/data": {
|
3 |
+
"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc": {
|
4 |
+
"__type__": "vector_store",
|
5 |
+
"__data__": "{\"index_id\": \"7fd1f2d1-8a3a-4e24-bf42-5c149a73c8dc\", \"summary\": null, \"nodes_dict\": {\"190d619c-e92d-4cc5-b43b-652f66f4d6e0\": \"190d619c-e92d-4cc5-b43b-652f66f4d6e0\", \"1158d24f-29db-457b-aeaa-2a051316796e\": \"1158d24f-29db-457b-aeaa-2a051316796e\", \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\": \"520004a6-3eb6-44cf-90ba-a51ed1c6b894\", \"359cdd4d-6096-4be4-991d-f3cc691c07c9\": \"359cdd4d-6096-4be4-991d-f3cc691c07c9\", \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\": \"3e4ffb91-090e-4e84-92cd-e8df8b3a87b8\", \"7aa9b872-c4df-4de6-88c5-55962b913ba9\": \"7aa9b872-c4df-4de6-88c5-55962b913ba9\", \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\": \"16aa0efc-39b4-4fa8-93ab-f31b5740670c\", \"1bebdce9-981b-4100-914d-48da73dc5f96\": \"1bebdce9-981b-4100-914d-48da73dc5f96\", \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\": \"bbe72c0d-ab9f-45c0-b581-d051f7aa797e\", \"229d36cc-1f60-485a-b44f-c53701b423de\": \"229d36cc-1f60-485a-b44f-c53701b423de\", \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\": \"dff86a2f-8cf4-4f01-a8ab-36977bb2db5c\", \"cdf5fa07-4780-4cc2-9073-851996c4f006\": \"cdf5fa07-4780-4cc2-9073-851996c4f006\", \"a27c62d4-e969-46a8-94d8-aa0ce552234e\": \"a27c62d4-e969-46a8-94d8-aa0ce552234e\", \"e85816a0-a950-44c7-af93-dcf15a889490\": \"e85816a0-a950-44c7-af93-dcf15a889490\", \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\": \"cce4f38a-e5d4-4561-aab9-d695eef3bacf\", \"b3716ae7-21ec-4112-979e-d5963c8235d5\": \"b3716ae7-21ec-4112-979e-d5963c8235d5\", \"55362a6d-27e6-46de-8fa8-c87a2039020f\": \"55362a6d-27e6-46de-8fa8-c87a2039020f\", \"785291f8-266d-4458-ba2c-2c110fac1038\": \"785291f8-266d-4458-ba2c-2c110fac1038\", \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\": \"c8655cf5-12f3-479a-a3b4-b43a2f6965ba\", \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\": \"67b2a33e-b9a3-4072-bc89-c1405d60dbe0\", \"42256b49-3c47-4fa2-8c64-871a757b9993\": \"42256b49-3c47-4fa2-8c64-871a757b9993\", \"f0a7c6ca-206e-477c-8f0a-416a355532d5\": \"f0a7c6ca-206e-477c-8f0a-416a355532d5\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"
|
6 |
+
}
|
7 |
+
}
|
8 |
+
}
|
dataset/vector_store.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
langchain_manager/manager.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
from abc import abstractmethod, ABC
|
2 |
|
|
|
3 |
from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
|
4 |
from langchain_manager.embeddings.openai import OpenAIEmbeddings
|
5 |
from langchain_manager.llms import AzureOpenAI
|
6 |
-
from langchain_manager.base_language import BaseLanguageModel
|
7 |
|
8 |
from core.lifecycle import Lifecycle
|
9 |
|
|
|
1 |
from abc import abstractmethod, ABC
|
2 |
|
3 |
+
from langchain_manager.base_language import BaseLanguageModel
|
4 |
from langchain_manager.embeddings.base import Embeddings as LCEmbeddings
|
5 |
from langchain_manager.embeddings.openai import OpenAIEmbeddings
|
6 |
from langchain_manager.llms import AzureOpenAI
|
|
|
7 |
|
8 |
from core.lifecycle import Lifecycle
|
9 |
|
llama/context.py
CHANGED
@@ -1,16 +1,26 @@
|
|
1 |
-
from
|
2 |
-
|
|
|
|
|
|
|
3 |
from core.lifecycle import Lifecycle
|
4 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
-
class ServiceContextManager
|
8 |
-
|
|
|
9 |
|
10 |
-
def __init__(self,
|
11 |
super().__init__()
|
12 |
-
self.
|
13 |
-
self.service_context = None
|
14 |
|
15 |
def get_service_context(self) -> ServiceContext:
|
16 |
if self.lifecycle_state.is_started():
|
@@ -25,37 +35,75 @@ class ServiceContextManager(Lifecycle):
|
|
25 |
|
26 |
def do_init(self) -> None:
|
27 |
# define embedding
|
28 |
-
embedding = LangchainEmbedding(self.
|
29 |
# define LLM
|
30 |
-
llm_predictor = LLMPredictor(llm=self.
|
31 |
# configure service context
|
32 |
self.service_context = ServiceContext.from_defaults(
|
33 |
llm_predictor=llm_predictor, embed_model=embedding
|
34 |
)
|
35 |
|
36 |
def do_start(self) -> None:
|
37 |
-
|
|
|
|
|
|
|
38 |
|
39 |
def do_stop(self) -> None:
|
40 |
-
|
|
|
|
|
|
|
41 |
|
42 |
def do_dispose(self) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
pass
|
44 |
|
45 |
|
46 |
-
class StorageContextManager
|
47 |
-
|
|
|
|
|
|
|
|
|
48 |
super().__init__()
|
49 |
self.dataset_path = dataset_path
|
|
|
|
|
|
|
|
|
50 |
|
51 |
def do_init(self) -> None:
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
def do_start(self) -> None:
|
55 |
-
|
56 |
|
57 |
def do_stop(self) -> None:
|
58 |
-
|
59 |
|
60 |
def do_dispose(self) -> None:
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import abstractmethod, ABC
|
2 |
+
|
3 |
+
from llama_index import ServiceContext, LLMPredictor, LangchainEmbedding, Document
|
4 |
+
from llama_index import StorageContext
|
5 |
+
|
6 |
from core.lifecycle import Lifecycle
|
7 |
+
from langchain_manager.manager import BaseLangChainManager
|
8 |
+
|
9 |
+
|
10 |
+
class ServiceContextManager(Lifecycle, ABC):
|
11 |
+
|
12 |
+
@abstractmethod
|
13 |
+
def get_service_context(self) -> ServiceContext:
|
14 |
+
pass
|
15 |
|
16 |
|
17 |
+
class AzureServiceContextManager(ServiceContextManager):
|
18 |
+
lc_manager: BaseLangChainManager
|
19 |
+
service_context: ServiceContext
|
20 |
|
21 |
+
def __init__(self, lc_manager: BaseLangChainManager):
|
22 |
super().__init__()
|
23 |
+
self.lc_manager = lc_manager
|
|
|
24 |
|
25 |
def get_service_context(self) -> ServiceContext:
|
26 |
if self.lifecycle_state.is_started():
|
|
|
35 |
|
36 |
def do_init(self) -> None:
|
37 |
# define embedding
|
38 |
+
embedding = LangchainEmbedding(self.lc_manager.get_embedding())
|
39 |
# define LLM
|
40 |
+
llm_predictor = LLMPredictor(llm=self.lc_manager.get_llm())
|
41 |
# configure service context
|
42 |
self.service_context = ServiceContext.from_defaults(
|
43 |
llm_predictor=llm_predictor, embed_model=embedding
|
44 |
)
|
45 |
|
46 |
def do_start(self) -> None:
|
47 |
+
self.logger.info("[do_start][embedding] last used usage: %d",
|
48 |
+
self.service_context.embed_model.total_tokens_used)
|
49 |
+
self.logger.info("[do_start][predict] last used usage: %d",
|
50 |
+
self.service_context.llm_predictor.total_tokens_used)
|
51 |
|
52 |
def do_stop(self) -> None:
|
53 |
+
self.logger.info("[do_stop][embedding] last used usage: %d",
|
54 |
+
self.service_context.embed_model.total_tokens_used)
|
55 |
+
self.logger.info("[do_stop][predict] last used usage: %d",
|
56 |
+
self.service_context.llm_predictor.total_tokens_used)
|
57 |
|
58 |
def do_dispose(self) -> None:
|
59 |
+
self.logger.info("[do_dispose] total used token: %d", self.service_context.llm_predictor.total_tokens_used)
|
60 |
+
|
61 |
+
|
62 |
+
class StorageContextManager(Lifecycle, ABC):
|
63 |
+
|
64 |
+
@abstractmethod
|
65 |
+
def get_storage_context(self) -> StorageContext:
|
66 |
pass
|
67 |
|
68 |
|
69 |
+
class LocalStorageContextManager(StorageContextManager):
|
70 |
+
storage_context: StorageContext
|
71 |
+
|
72 |
+
def __init__(self,
|
73 |
+
dataset_path: str = "./dataset",
|
74 |
+
service_context_manager: ServiceContextManager = None) -> None:
|
75 |
super().__init__()
|
76 |
self.dataset_path = dataset_path
|
77 |
+
self.service_context_manager = service_context_manager
|
78 |
+
|
79 |
+
def get_storage_context(self) -> StorageContext:
|
80 |
+
return self.storage_context
|
81 |
|
82 |
def do_init(self) -> None:
|
83 |
+
from llama.utils import is_local_storage_files_ready
|
84 |
+
if is_local_storage_files_ready(self.dataset_path):
|
85 |
+
self.storage_context = StorageContext.from_defaults(persist_dir=self.dataset_path)
|
86 |
+
else:
|
87 |
+
docs = self._download()
|
88 |
+
self._indexing(docs)
|
89 |
|
90 |
def do_start(self) -> None:
|
91 |
+
self.logger.info("[do_start]%", **self.storage_context.to_dict())
|
92 |
|
93 |
def do_stop(self) -> None:
|
94 |
+
self.logger.info("[do_stop]%", **self.storage_context.to_dict())
|
95 |
|
96 |
def do_dispose(self) -> None:
|
97 |
+
self.storage_context.persist(self.dataset_path)
|
98 |
+
|
99 |
+
def _download(self) -> [Document]:
|
100 |
+
from llama.data_loader import GithubLoader
|
101 |
+
loader = GithubLoader()
|
102 |
+
return loader.load()
|
103 |
+
|
104 |
+
def _indexing(self, docs: [Document]):
|
105 |
+
from llama_index import GPTVectorStoreIndex
|
106 |
+
index = GPTVectorStoreIndex.from_documents(docs,
|
107 |
+
service_context=self.service_context_manager.get_service_context())
|
108 |
+
index.storage_context.persist(persist_dir=self.dataset_path)
|
109 |
+
self.storage_context = index.storage_context
|
llama/data_loader.py
CHANGED
@@ -7,8 +7,6 @@ from llama_hub.github_repo import GithubRepositoryReader, GithubClient
|
|
7 |
from llama_index import download_loader
|
8 |
from llama_index.readers.schema.base import Document
|
9 |
|
10 |
-
from core.lifecycle import Lifecycle
|
11 |
-
|
12 |
|
13 |
class WikiLoader(ABC):
|
14 |
@abstractmethod
|
@@ -16,12 +14,12 @@ class WikiLoader(ABC):
|
|
16 |
pass
|
17 |
|
18 |
|
19 |
-
class GithubLoader(WikiLoader
|
20 |
def __init__(
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
):
|
26 |
super().__init__()
|
27 |
self.owner = (
|
|
|
7 |
from llama_index import download_loader
|
8 |
from llama_index.readers.schema.base import Document
|
9 |
|
|
|
|
|
10 |
|
11 |
class WikiLoader(ABC):
|
12 |
@abstractmethod
|
|
|
14 |
pass
|
15 |
|
16 |
|
17 |
+
class GithubLoader(WikiLoader):
|
18 |
def __init__(
|
19 |
+
self,
|
20 |
+
github_owner: Optional[str] = None,
|
21 |
+
repo: Optional[str] = None,
|
22 |
+
dirs: Optional[Sequence[str]] = None,
|
23 |
):
|
24 |
super().__init__()
|
25 |
self.owner = (
|
llama/utils.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
|
4 |
+
def is_local_storage_files_ready(persist_dir: str) -> bool:
|
5 |
+
return os.path.exists(persist_dir) and len(os.listdir(persist_dir)) != 0
|
pyproject.toml
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
[tool.mypy]
|
2 |
ignore_missing_imports = "True"
|
3 |
disallow_untyped_defs = "True"
|
4 |
-
exclude = ["notebooks", "build", "examples", "docs", "dataset", "
|
5 |
|
6 |
[tool.ruff]
|
7 |
exclude = [
|
@@ -14,6 +14,5 @@ exclude = [
|
|
14 |
"notebooks",
|
15 |
"docs",
|
16 |
"dataset",
|
17 |
-
"app.py",
|
18 |
"github_retriever.py"
|
19 |
]
|
|
|
1 |
[tool.mypy]
|
2 |
ignore_missing_imports = "True"
|
3 |
disallow_untyped_defs = "True"
|
4 |
+
exclude = ["notebooks", "build", "examples", "docs", "dataset", "github_retriever.py"]
|
5 |
|
6 |
[tool.ruff]
|
7 |
exclude = [
|
|
|
14 |
"notebooks",
|
15 |
"docs",
|
16 |
"dataset",
|
|
|
17 |
"github_retriever.py"
|
18 |
]
|
requirements.txt
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
-
|
2 |
-
openai>=0.26.4
|
3 |
-
llama_index>=0.6.32
|
4 |
llama_hub
|
|
|
5 |
ruff
|
6 |
black
|
7 |
mypy
|
|
|
1 |
+
llama_index>=0.6.3
|
|
|
|
|
2 |
llama_hub
|
3 |
+
|
4 |
ruff
|
5 |
black
|
6 |
mypy
|
xpipe_wiki/__init__.py
ADDED
File without changes
|
xpipe_wiki/manager_factory.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import enum
|
2 |
+
import os
|
3 |
+
|
4 |
+
from core.helper import LifecycleHelper
|
5 |
+
from xpipe_wiki.robot_manager import XPipeWikiRobotManager, AzureXPipeWikiRobotManager
|
6 |
+
|
7 |
+
|
8 |
+
class XPipeRobotRevision(enum.Enum):
|
9 |
+
SIMPLE_OPENAI_VERSION_0 = 1
|
10 |
+
|
11 |
+
|
12 |
+
CAPABLE = dict[XPipeRobotRevision, XPipeWikiRobotManager]
|
13 |
+
|
14 |
+
|
15 |
+
class XPipeRobotManagerFactory:
|
16 |
+
|
17 |
+
@classmethod
|
18 |
+
def get_or_create(cls, revision: XPipeRobotRevision) -> XPipeWikiRobotManager:
|
19 |
+
if CAPABLE.get(revision) is not None:
|
20 |
+
return CAPABLE[revision]
|
21 |
+
if revision == XPipeRobotRevision.SIMPLE_OPENAI_VERSION_0:
|
22 |
+
manager = cls.create_simple_openai_version_0()
|
23 |
+
CAPABLE[revision] = manager
|
24 |
+
return manager
|
25 |
+
|
26 |
+
@classmethod
|
27 |
+
def create_simple_openai_version_0(cls) -> AzureXPipeWikiRobotManager:
|
28 |
+
|
29 |
+
from llama.context import AzureServiceContextManager
|
30 |
+
from langchain_manager.manager import LangChainAzureManager
|
31 |
+
service_context_manager = AzureServiceContextManager(lc_manager=LangChainAzureManager())
|
32 |
+
|
33 |
+
from llama.context import LocalStorageContextManager
|
34 |
+
dataset_path = os.getenv("XPIPE_WIKI_DATASET_PATH", "./dataset")
|
35 |
+
storage_context_manager = LocalStorageContextManager(dataset_path=dataset_path,
|
36 |
+
service_context_manager=service_context_manager)
|
37 |
+
|
38 |
+
robot_manager = AzureXPipeWikiRobotManager(service_context_manager=service_context_manager,
|
39 |
+
storage_context_manager=storage_context_manager)
|
40 |
+
LifecycleHelper.initialize_if_possible(robot_manager)
|
41 |
+
LifecycleHelper.start_if_possible(robot_manager)
|
42 |
+
return robot_manager
|
xpipe_wiki/robot_manager.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
+
from typing import Any
|
3 |
+
|
4 |
+
from llama_index import load_index_from_storage
|
5 |
+
from llama_index.indices.query.base import BaseQueryEngine
|
6 |
+
from pydantic import dataclasses
|
7 |
+
|
8 |
+
from core.helper import LifecycleHelper
|
9 |
+
from core.lifecycle import Lifecycle
|
10 |
+
from llama.context import ServiceContextManager, StorageContextManager
|
11 |
+
|
12 |
+
|
13 |
+
class XPipeWikiRobot(ABC):
|
14 |
+
@abstractmethod
|
15 |
+
def ask(self, question: str) -> Any:
|
16 |
+
pass
|
17 |
+
|
18 |
+
|
19 |
+
@dataclasses
|
20 |
+
class AzureOpenAIXPipeWikiRobot(XPipeWikiRobot):
|
21 |
+
query_engine: BaseQueryEngine
|
22 |
+
|
23 |
+
def ask(self, question: str) -> Any:
|
24 |
+
return self.query_engine.query(question)
|
25 |
+
|
26 |
+
|
27 |
+
class XPipeWikiRobotManager(Lifecycle):
|
28 |
+
|
29 |
+
@abstractmethod
|
30 |
+
def get_robot(self) -> XPipeWikiRobot:
|
31 |
+
pass
|
32 |
+
|
33 |
+
|
34 |
+
@dataclasses
|
35 |
+
class AzureXPipeWikiRobotManager(XPipeWikiRobotManager):
|
36 |
+
service_context_manager: ServiceContextManager
|
37 |
+
storage_context_manager: StorageContextManager
|
38 |
+
|
39 |
+
def get_robot(self) -> XPipeWikiRobot:
|
40 |
+
index = load_index_from_storage(storage_context=self.storage_context_manager.get_storage_context())
|
41 |
+
query_engine = index.as_query_engine(service_context=self.service_context_manager.get_service_context())
|
42 |
+
return AzureOpenAIXPipeWikiRobot(query_engine)
|
43 |
+
|
44 |
+
def do_init(self) -> None:
|
45 |
+
LifecycleHelper.initialize_if_possible(self.service_context_manager)
|
46 |
+
LifecycleHelper.initialize_if_possible(self.storage_context_manager)
|
47 |
+
|
48 |
+
def do_start(self) -> None:
|
49 |
+
LifecycleHelper.start_if_possible(self.service_context_manager)
|
50 |
+
LifecycleHelper.start_if_possible(self.storage_context_manager)
|
51 |
+
|
52 |
+
def do_stop(self) -> None:
|
53 |
+
LifecycleHelper.stop_if_possible(self.storage_context_manager)
|
54 |
+
LifecycleHelper.stop_if_possible(self.service_context_manager)
|
55 |
+
|
56 |
+
def do_dispose(self) -> None:
|
57 |
+
LifecycleHelper.dispose_if_possible(self.storage_context_manager)
|
58 |
+
LifecycleHelper.dispose_if_possible(self.service_context_manager)
|