init
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Dockerfile +29 -0
- __init__.py +0 -0
- api/__init__.py +0 -0
- api/__pycache__/__init__.cpython-310.pyc +0 -0
- api/answer/__init__.py +0 -0
- api/answer/__pycache__/__init__.cpython-310.pyc +0 -0
- api/answer/__pycache__/routes.cpython-310.pyc +0 -0
- api/answer/routes.py +371 -0
- api/internal/__init__.py +0 -0
- api/internal/__pycache__/__init__.cpython-310.pyc +0 -0
- api/internal/__pycache__/routes.cpython-310.pyc +0 -0
- api/internal/routes.py +69 -0
- api/user/__init__.py +0 -0
- api/user/__pycache__/__init__.cpython-310.pyc +0 -0
- api/user/__pycache__/routes.cpython-310.pyc +0 -0
- api/user/__pycache__/tasks.cpython-310.pyc +0 -0
- api/user/routes.py +321 -0
- api/user/tasks.py +7 -0
- app.py +44 -0
- celery.py +9 -0
- celeryconfig.py +8 -0
- core/__init__.py +0 -0
- core/__pycache__/__init__.cpython-310.pyc +0 -0
- core/__pycache__/settings.cpython-310.pyc +0 -0
- core/settings.py +44 -0
- error.py +15 -0
- index.faiss +0 -0
- index.pkl +3 -0
- indexes/local/patil2016.pdf/index.faiss +0 -0
- indexes/local/patil2016.pdf/index.pkl +3 -0
- inputs/local/patil2016.pdf/patil2016.pdf +0 -0
- llm/__init__.py +0 -0
- llm/__pycache__/__init__.cpython-310.pyc +0 -0
- llm/__pycache__/anthropic.cpython-310.pyc +0 -0
- llm/__pycache__/base.cpython-310.pyc +0 -0
- llm/__pycache__/docsgpt_provider.cpython-310.pyc +0 -0
- llm/__pycache__/huggingface.cpython-310.pyc +0 -0
- llm/__pycache__/llama_cpp.cpython-310.pyc +0 -0
- llm/__pycache__/llm_creator.cpython-310.pyc +0 -0
- llm/__pycache__/openai.cpython-310.pyc +0 -0
- llm/__pycache__/sagemaker.cpython-310.pyc +0 -0
- llm/anthropic.py +40 -0
- llm/base.py +14 -0
- llm/docsgpt_provider.py +49 -0
- llm/huggingface.py +44 -0
- llm/llama_cpp.py +39 -0
- llm/llm_creator.py +26 -0
- llm/openai.py +60 -0
- llm/sagemaker.py +139 -0
- parser/__init__.py +1 -0
Dockerfile
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim-bullseye as builder
|
2 |
+
|
3 |
+
# Tiktoken requires Rust toolchain, so build it in a separate stage
|
4 |
+
RUN apt-get update && apt-get install -y gcc curl
|
5 |
+
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y
|
6 |
+
ENV PATH="/root/.cargo/bin:${PATH}"
|
7 |
+
RUN pip install --upgrade pip && pip install tiktoken==0.5.2
|
8 |
+
COPY requirements.txt .
|
9 |
+
RUN pip install -r requirements.txt
|
10 |
+
RUN apt-get install -y wget unzip
|
11 |
+
RUN wget https://d3dg1063dc54p9.cloudfront.net/models/embeddings/mpnet-base-v2.zip
|
12 |
+
RUN unzip mpnet-base-v2.zip -d model
|
13 |
+
RUN rm mpnet-base-v2.zip
|
14 |
+
|
15 |
+
FROM python:3.11-slim-bullseye
|
16 |
+
|
17 |
+
# Copy pre-built packages and binaries from builder stage
|
18 |
+
COPY --from=builder /usr/local/ /usr/local/
|
19 |
+
|
20 |
+
WORKDIR /app
|
21 |
+
COPY --from=builder /model /app/model
|
22 |
+
|
23 |
+
COPY . /app/application
|
24 |
+
ENV FLASK_APP=app.py
|
25 |
+
ENV FLASK_DEBUG=true
|
26 |
+
|
27 |
+
EXPOSE 7091
|
28 |
+
|
29 |
+
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
|
__init__.py
ADDED
File without changes
|
api/__init__.py
ADDED
File without changes
|
api/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (154 Bytes). View file
|
|
api/answer/__init__.py
ADDED
File without changes
|
api/answer/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (161 Bytes). View file
|
|
api/answer/__pycache__/routes.cpython-310.pyc
ADDED
Binary file (8.1 kB). View file
|
|
api/answer/routes.py
ADDED
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import os
|
3 |
+
from flask import Blueprint, request, Response
|
4 |
+
import json
|
5 |
+
import datetime
|
6 |
+
import logging
|
7 |
+
import traceback
|
8 |
+
|
9 |
+
from pymongo import MongoClient
|
10 |
+
from bson.objectid import ObjectId
|
11 |
+
from transformers import GPT2TokenizerFast
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
from application.core.settings import settings
|
16 |
+
from application.vectorstore.vector_creator import VectorCreator
|
17 |
+
from application.llm.llm_creator import LLMCreator
|
18 |
+
from application.error import bad_request
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
logger = logging.getLogger(__name__)
|
23 |
+
|
24 |
+
mongo = MongoClient(settings.MONGO_URI)
|
25 |
+
db = mongo["docsgpt"]
|
26 |
+
conversations_collection = db["conversations"]
|
27 |
+
vectors_collection = db["vectors"]
|
28 |
+
prompts_collection = db["prompts"]
|
29 |
+
answer = Blueprint('answer', __name__)
|
30 |
+
|
31 |
+
if settings.LLM_NAME == "gpt4":
|
32 |
+
gpt_model = 'gpt-4'
|
33 |
+
elif settings.LLM_NAME == "anthropic":
|
34 |
+
gpt_model = 'claude-2'
|
35 |
+
else:
|
36 |
+
gpt_model = 'gpt-3.5-turbo'
|
37 |
+
|
38 |
+
# load the prompts
|
39 |
+
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
40 |
+
with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
|
41 |
+
chat_combine_template = f.read()
|
42 |
+
|
43 |
+
with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f:
|
44 |
+
chat_reduce_template = f.read()
|
45 |
+
|
46 |
+
with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
|
47 |
+
chat_combine_creative = f.read()
|
48 |
+
|
49 |
+
with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
|
50 |
+
chat_combine_strict = f.read()
|
51 |
+
|
52 |
+
api_key_set = settings.API_KEY is not None
|
53 |
+
embeddings_key_set = settings.EMBEDDINGS_KEY is not None
|
54 |
+
|
55 |
+
|
56 |
+
async def async_generate(chain, question, chat_history):
|
57 |
+
result = await chain.arun({"question": question, "chat_history": chat_history})
|
58 |
+
return result
|
59 |
+
|
60 |
+
|
61 |
+
def count_tokens(string):
|
62 |
+
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
|
63 |
+
return len(tokenizer(string)['input_ids'])
|
64 |
+
|
65 |
+
|
66 |
+
def run_async_chain(chain, question, chat_history):
|
67 |
+
loop = asyncio.new_event_loop()
|
68 |
+
asyncio.set_event_loop(loop)
|
69 |
+
result = {}
|
70 |
+
try:
|
71 |
+
answer = loop.run_until_complete(async_generate(chain, question, chat_history))
|
72 |
+
finally:
|
73 |
+
loop.close()
|
74 |
+
result["answer"] = answer
|
75 |
+
return result
|
76 |
+
|
77 |
+
|
78 |
+
def get_vectorstore(data):
|
79 |
+
if "active_docs" in data:
|
80 |
+
if data["active_docs"].split("/")[0] == "default":
|
81 |
+
vectorstore = ""
|
82 |
+
elif data["active_docs"].split("/")[0] == "local":
|
83 |
+
vectorstore = "indexes/" + data["active_docs"]
|
84 |
+
else:
|
85 |
+
vectorstore = "vectors/" + data["active_docs"]
|
86 |
+
if data["active_docs"] == "default":
|
87 |
+
vectorstore = ""
|
88 |
+
else:
|
89 |
+
vectorstore = ""
|
90 |
+
vectorstore = os.path.join("application", vectorstore)
|
91 |
+
return vectorstore
|
92 |
+
|
93 |
+
|
94 |
+
def is_azure_configured():
|
95 |
+
return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME
|
96 |
+
|
97 |
+
|
98 |
+
def complete_stream(question, docsearch, chat_history, api_key, prompt_id, conversation_id):
|
99 |
+
llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
|
100 |
+
|
101 |
+
if prompt_id == 'default':
|
102 |
+
prompt = chat_combine_template
|
103 |
+
elif prompt_id == 'creative':
|
104 |
+
prompt = chat_combine_creative
|
105 |
+
elif prompt_id == 'strict':
|
106 |
+
prompt = chat_combine_strict
|
107 |
+
else:
|
108 |
+
prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
|
109 |
+
|
110 |
+
docs = docsearch.search(question, k=2)
|
111 |
+
if settings.LLM_NAME == "llama.cpp":
|
112 |
+
docs = [docs[0]]
|
113 |
+
# join all page_content together with a newline
|
114 |
+
docs_together = "\n".join([doc.page_content for doc in docs])
|
115 |
+
p_chat_combine = prompt.replace("{summaries}", docs_together)
|
116 |
+
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
117 |
+
source_log_docs = []
|
118 |
+
for doc in docs:
|
119 |
+
if doc.metadata:
|
120 |
+
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
121 |
+
else:
|
122 |
+
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
123 |
+
|
124 |
+
if len(chat_history) > 1:
|
125 |
+
tokens_current_history = 0
|
126 |
+
# count tokens in history
|
127 |
+
chat_history.reverse()
|
128 |
+
for i in chat_history:
|
129 |
+
if "prompt" in i and "response" in i:
|
130 |
+
tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
|
131 |
+
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
132 |
+
tokens_current_history += tokens_batch
|
133 |
+
messages_combine.append({"role": "user", "content": i["prompt"]})
|
134 |
+
messages_combine.append({"role": "system", "content": i["response"]})
|
135 |
+
messages_combine.append({"role": "user", "content": question})
|
136 |
+
|
137 |
+
response_full = ""
|
138 |
+
completion = llm.gen_stream(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
139 |
+
messages=messages_combine)
|
140 |
+
for line in completion:
|
141 |
+
data = json.dumps({"answer": str(line)})
|
142 |
+
response_full += str(line)
|
143 |
+
yield f"data: {data}\n\n"
|
144 |
+
|
145 |
+
# save conversation to database
|
146 |
+
if conversation_id is not None:
|
147 |
+
conversations_collection.update_one(
|
148 |
+
{"_id": ObjectId(conversation_id)},
|
149 |
+
{"$push": {"queries": {"prompt": question, "response": response_full, "sources": source_log_docs}}},
|
150 |
+
)
|
151 |
+
|
152 |
+
else:
|
153 |
+
# create new conversation
|
154 |
+
# generate summary
|
155 |
+
messages_summary = [{"role": "assistant", "content": "Summarise following conversation in no more than 3 "
|
156 |
+
"words, respond ONLY with the summary, use the same "
|
157 |
+
"language as the system \n\nUser: " + question + "\n\n" +
|
158 |
+
"AI: " +
|
159 |
+
response_full},
|
160 |
+
{"role": "user", "content": "Summarise following conversation in no more than 3 words, "
|
161 |
+
"respond ONLY with the summary, use the same language as the "
|
162 |
+
"system"}]
|
163 |
+
|
164 |
+
completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
165 |
+
messages=messages_summary, max_tokens=30)
|
166 |
+
conversation_id = conversations_collection.insert_one(
|
167 |
+
{"user": "local",
|
168 |
+
"date": datetime.datetime.utcnow(),
|
169 |
+
"name": completion,
|
170 |
+
"queries": [{"prompt": question, "response": response_full, "sources": source_log_docs}]}
|
171 |
+
).inserted_id
|
172 |
+
|
173 |
+
# send data.type = "end" to indicate that the stream has ended as json
|
174 |
+
data = json.dumps({"type": "id", "id": str(conversation_id)})
|
175 |
+
yield f"data: {data}\n\n"
|
176 |
+
data = json.dumps({"type": "end"})
|
177 |
+
yield f"data: {data}\n\n"
|
178 |
+
|
179 |
+
|
180 |
+
@answer.route("/stream", methods=["POST"])
|
181 |
+
def stream():
|
182 |
+
data = request.get_json()
|
183 |
+
# get parameter from url question
|
184 |
+
question = data["question"]
|
185 |
+
history = data["history"]
|
186 |
+
# history to json object from string
|
187 |
+
history = json.loads(history)
|
188 |
+
conversation_id = data["conversation_id"]
|
189 |
+
if 'prompt_id' in data:
|
190 |
+
prompt_id = data["prompt_id"]
|
191 |
+
else:
|
192 |
+
prompt_id = 'default'
|
193 |
+
|
194 |
+
# check if active_docs is set
|
195 |
+
|
196 |
+
if not api_key_set:
|
197 |
+
api_key = data["api_key"]
|
198 |
+
else:
|
199 |
+
api_key = settings.API_KEY
|
200 |
+
if not embeddings_key_set:
|
201 |
+
embeddings_key = data["embeddings_key"]
|
202 |
+
else:
|
203 |
+
embeddings_key = settings.EMBEDDINGS_KEY
|
204 |
+
if "active_docs" in data:
|
205 |
+
vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
|
206 |
+
else:
|
207 |
+
vectorstore = ""
|
208 |
+
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
|
209 |
+
|
210 |
+
return Response(
|
211 |
+
complete_stream(question, docsearch,
|
212 |
+
chat_history=history, api_key=api_key,
|
213 |
+
prompt_id=prompt_id,
|
214 |
+
conversation_id=conversation_id), mimetype="text/event-stream"
|
215 |
+
)
|
216 |
+
|
217 |
+
|
218 |
+
@answer.route("/api/answer", methods=["POST"])
|
219 |
+
def api_answer():
|
220 |
+
data = request.get_json()
|
221 |
+
question = data["question"]
|
222 |
+
history = data["history"]
|
223 |
+
if "conversation_id" not in data:
|
224 |
+
conversation_id = None
|
225 |
+
else:
|
226 |
+
conversation_id = data["conversation_id"]
|
227 |
+
print("-" * 5)
|
228 |
+
if not api_key_set:
|
229 |
+
api_key = data["api_key"]
|
230 |
+
else:
|
231 |
+
api_key = settings.API_KEY
|
232 |
+
if not embeddings_key_set:
|
233 |
+
embeddings_key = data["embeddings_key"]
|
234 |
+
else:
|
235 |
+
embeddings_key = settings.EMBEDDINGS_KEY
|
236 |
+
if 'prompt_id' in data:
|
237 |
+
prompt_id = data["prompt_id"]
|
238 |
+
else:
|
239 |
+
prompt_id = 'default'
|
240 |
+
|
241 |
+
if prompt_id == 'default':
|
242 |
+
prompt = chat_combine_template
|
243 |
+
elif prompt_id == 'creative':
|
244 |
+
prompt = chat_combine_creative
|
245 |
+
elif prompt_id == 'strict':
|
246 |
+
prompt = chat_combine_strict
|
247 |
+
else:
|
248 |
+
prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})["content"]
|
249 |
+
|
250 |
+
# use try and except to check for exception
|
251 |
+
try:
|
252 |
+
# check if the vectorstore is set
|
253 |
+
vectorstore = get_vectorstore(data)
|
254 |
+
# loading the index and the store and the prompt template
|
255 |
+
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
256 |
+
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
|
257 |
+
|
258 |
+
|
259 |
+
llm = LLMCreator.create_llm(settings.LLM_NAME, api_key=api_key)
|
260 |
+
|
261 |
+
|
262 |
+
|
263 |
+
docs = docsearch.search(question, k=2)
|
264 |
+
# join all page_content together with a newline
|
265 |
+
docs_together = "\n".join([doc.page_content for doc in docs])
|
266 |
+
p_chat_combine = prompt.replace("{summaries}", docs_together)
|
267 |
+
messages_combine = [{"role": "system", "content": p_chat_combine}]
|
268 |
+
source_log_docs = []
|
269 |
+
for doc in docs:
|
270 |
+
if doc.metadata:
|
271 |
+
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
272 |
+
else:
|
273 |
+
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
274 |
+
# join all page_content together with a newline
|
275 |
+
|
276 |
+
|
277 |
+
if len(history) > 1:
|
278 |
+
tokens_current_history = 0
|
279 |
+
# count tokens in history
|
280 |
+
history.reverse()
|
281 |
+
for i in history:
|
282 |
+
if "prompt" in i and "response" in i:
|
283 |
+
tokens_batch = count_tokens(i["prompt"]) + count_tokens(i["response"])
|
284 |
+
if tokens_current_history + tokens_batch < settings.TOKENS_MAX_HISTORY:
|
285 |
+
tokens_current_history += tokens_batch
|
286 |
+
messages_combine.append({"role": "user", "content": i["prompt"]})
|
287 |
+
messages_combine.append({"role": "system", "content": i["response"]})
|
288 |
+
messages_combine.append({"role": "user", "content": question})
|
289 |
+
|
290 |
+
|
291 |
+
completion = llm.gen(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
|
292 |
+
messages=messages_combine)
|
293 |
+
|
294 |
+
|
295 |
+
result = {"answer": completion, "sources": source_log_docs}
|
296 |
+
logger.debug(result)
|
297 |
+
|
298 |
+
# generate conversationId
|
299 |
+
if conversation_id is not None:
|
300 |
+
conversations_collection.update_one(
|
301 |
+
{"_id": ObjectId(conversation_id)},
|
302 |
+
{"$push": {"queries": {"prompt": question,
|
303 |
+
"response": result["answer"], "sources": result['sources']}}},
|
304 |
+
)
|
305 |
+
|
306 |
+
else:
|
307 |
+
# create new conversation
|
308 |
+
# generate summary
|
309 |
+
messages_summary = [
|
310 |
+
{"role": "assistant", "content": "Summarise following conversation in no more than 3 words, "
|
311 |
+
"respond ONLY with the summary, use the same language as the system \n\n"
|
312 |
+
"User: " + question + "\n\n" + "AI: " + result["answer"]},
|
313 |
+
{"role": "user", "content": "Summarise following conversation in no more than 3 words, "
|
314 |
+
"respond ONLY with the summary, use the same language as the system"}
|
315 |
+
]
|
316 |
+
|
317 |
+
completion = llm.gen(
|
318 |
+
model=gpt_model,
|
319 |
+
engine=settings.AZURE_DEPLOYMENT_NAME,
|
320 |
+
messages=messages_summary,
|
321 |
+
max_tokens=30
|
322 |
+
)
|
323 |
+
conversation_id = conversations_collection.insert_one(
|
324 |
+
{"user": "local",
|
325 |
+
"date": datetime.datetime.utcnow(),
|
326 |
+
"name": completion,
|
327 |
+
"queries": [{"prompt": question, "response": result["answer"], "sources": source_log_docs}]}
|
328 |
+
).inserted_id
|
329 |
+
|
330 |
+
result["conversation_id"] = str(conversation_id)
|
331 |
+
|
332 |
+
# mock result
|
333 |
+
# result = {
|
334 |
+
# "answer": "The answer is 42",
|
335 |
+
# "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"]
|
336 |
+
# }
|
337 |
+
return result
|
338 |
+
except Exception as e:
|
339 |
+
# print whole traceback
|
340 |
+
traceback.print_exc()
|
341 |
+
print(str(e))
|
342 |
+
return bad_request(500, str(e))
|
343 |
+
|
344 |
+
|
345 |
+
@answer.route("/api/search", methods=["POST"])
|
346 |
+
def api_search():
|
347 |
+
data = request.get_json()
|
348 |
+
# get parameter from url question
|
349 |
+
question = data["question"]
|
350 |
+
|
351 |
+
if not embeddings_key_set:
|
352 |
+
embeddings_key = data["embeddings_key"]
|
353 |
+
else:
|
354 |
+
embeddings_key = settings.EMBEDDINGS_KEY
|
355 |
+
if "active_docs" in data:
|
356 |
+
vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
|
357 |
+
else:
|
358 |
+
vectorstore = ""
|
359 |
+
docsearch = VectorCreator.create_vectorstore(settings.VECTOR_STORE, vectorstore, embeddings_key)
|
360 |
+
|
361 |
+
docs = docsearch.search(question, k=2)
|
362 |
+
|
363 |
+
source_log_docs = []
|
364 |
+
for doc in docs:
|
365 |
+
if doc.metadata:
|
366 |
+
source_log_docs.append({"title": doc.metadata['title'].split('/')[-1], "text": doc.page_content})
|
367 |
+
else:
|
368 |
+
source_log_docs.append({"title": doc.page_content, "text": doc.page_content})
|
369 |
+
#yield f"data:{data}\n\n"
|
370 |
+
return source_log_docs
|
371 |
+
|
api/internal/__init__.py
ADDED
File without changes
|
api/internal/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (163 Bytes). View file
|
|
api/internal/__pycache__/routes.cpython-310.pyc
ADDED
Binary file (2.07 kB). View file
|
|
api/internal/routes.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import datetime
|
3 |
+
from flask import Blueprint, request, send_from_directory
|
4 |
+
from pymongo import MongoClient
|
5 |
+
from werkzeug.utils import secure_filename
|
6 |
+
|
7 |
+
|
8 |
+
from application.core.settings import settings
|
9 |
+
mongo = MongoClient(settings.MONGO_URI)
|
10 |
+
db = mongo["docsgpt"]
|
11 |
+
conversations_collection = db["conversations"]
|
12 |
+
vectors_collection = db["vectors"]
|
13 |
+
|
14 |
+
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
15 |
+
|
16 |
+
|
17 |
+
internal = Blueprint('internal', __name__)
|
18 |
+
@internal.route("/api/download", methods=["get"])
|
19 |
+
def download_file():
|
20 |
+
user = secure_filename(request.args.get("user"))
|
21 |
+
job_name = secure_filename(request.args.get("name"))
|
22 |
+
filename = secure_filename(request.args.get("file"))
|
23 |
+
save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
|
24 |
+
return send_from_directory(save_dir, filename, as_attachment=True)
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
@internal.route("/api/upload_index", methods=["POST"])
|
29 |
+
def upload_index_files():
|
30 |
+
"""Upload two files(index.faiss, index.pkl) to the user's folder."""
|
31 |
+
if "user" not in request.form:
|
32 |
+
return {"status": "no user"}
|
33 |
+
user = secure_filename(request.form["user"])
|
34 |
+
if "name" not in request.form:
|
35 |
+
return {"status": "no name"}
|
36 |
+
job_name = secure_filename(request.form["name"])
|
37 |
+
save_dir = os.path.join(current_dir, "indexes", user, job_name)
|
38 |
+
if settings.VECTOR_STORE == "faiss":
|
39 |
+
if "file_faiss" not in request.files:
|
40 |
+
print("No file part")
|
41 |
+
return {"status": "no file"}
|
42 |
+
file_faiss = request.files["file_faiss"]
|
43 |
+
if file_faiss.filename == "":
|
44 |
+
return {"status": "no file name"}
|
45 |
+
if "file_pkl" not in request.files:
|
46 |
+
print("No file part")
|
47 |
+
return {"status": "no file"}
|
48 |
+
file_pkl = request.files["file_pkl"]
|
49 |
+
if file_pkl.filename == "":
|
50 |
+
return {"status": "no file name"}
|
51 |
+
# saves index files
|
52 |
+
|
53 |
+
if not os.path.exists(save_dir):
|
54 |
+
os.makedirs(save_dir)
|
55 |
+
file_faiss.save(os.path.join(save_dir, "index.faiss"))
|
56 |
+
file_pkl.save(os.path.join(save_dir, "index.pkl"))
|
57 |
+
# create entry in vectors_collection
|
58 |
+
vectors_collection.insert_one(
|
59 |
+
{
|
60 |
+
"user": user,
|
61 |
+
"name": job_name,
|
62 |
+
"language": job_name,
|
63 |
+
"location": save_dir,
|
64 |
+
"date": datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"),
|
65 |
+
"model": settings.EMBEDDINGS_NAME,
|
66 |
+
"type": "local",
|
67 |
+
}
|
68 |
+
)
|
69 |
+
return {"status": "ok"}
|
api/user/__init__.py
ADDED
File without changes
|
api/user/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (159 Bytes). View file
|
|
api/user/__pycache__/routes.cpython-310.pyc
ADDED
Binary file (8.12 kB). View file
|
|
api/user/__pycache__/tasks.cpython-310.pyc
ADDED
Binary file (466 Bytes). View file
|
|
api/user/routes.py
ADDED
@@ -0,0 +1,321 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from flask import Blueprint, request, jsonify
|
3 |
+
import requests
|
4 |
+
from pymongo import MongoClient
|
5 |
+
from bson.objectid import ObjectId
|
6 |
+
from werkzeug.utils import secure_filename
|
7 |
+
|
8 |
+
from application.api.user.tasks import ingest
|
9 |
+
|
10 |
+
from application.core.settings import settings
|
11 |
+
from application.vectorstore.vector_creator import VectorCreator
|
12 |
+
|
13 |
+
mongo = MongoClient(settings.MONGO_URI)
|
14 |
+
db = mongo["docsgpt"]
|
15 |
+
conversations_collection = db["conversations"]
|
16 |
+
vectors_collection = db["vectors"]
|
17 |
+
prompts_collection = db["prompts"]
|
18 |
+
feedback_collection = db["feedback"]
|
19 |
+
user = Blueprint('user', __name__)
|
20 |
+
|
21 |
+
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
22 |
+
|
23 |
+
@user.route("/api/delete_conversation", methods=["POST"])
|
24 |
+
def delete_conversation():
|
25 |
+
# deletes a conversation from the database
|
26 |
+
conversation_id = request.args.get("id")
|
27 |
+
# write to mongodb
|
28 |
+
conversations_collection.delete_one(
|
29 |
+
{
|
30 |
+
"_id": ObjectId(conversation_id),
|
31 |
+
}
|
32 |
+
)
|
33 |
+
|
34 |
+
return {"status": "ok"}
|
35 |
+
|
36 |
+
@user.route("/api/get_conversations", methods=["get"])
|
37 |
+
def get_conversations():
|
38 |
+
# provides a list of conversations
|
39 |
+
conversations = conversations_collection.find().sort("date", -1)
|
40 |
+
list_conversations = []
|
41 |
+
for conversation in conversations:
|
42 |
+
list_conversations.append({"id": str(conversation["_id"]), "name": conversation["name"]})
|
43 |
+
|
44 |
+
#list_conversations = [{"id": "default", "name": "default"}, {"id": "jeff", "name": "jeff"}]
|
45 |
+
|
46 |
+
return jsonify(list_conversations)
|
47 |
+
|
48 |
+
|
49 |
+
@user.route("/api/get_single_conversation", methods=["get"])
|
50 |
+
def get_single_conversation():
|
51 |
+
# provides data for a conversation
|
52 |
+
conversation_id = request.args.get("id")
|
53 |
+
conversation = conversations_collection.find_one({"_id": ObjectId(conversation_id)})
|
54 |
+
return jsonify(conversation['queries'])
|
55 |
+
|
56 |
+
@user.route("/api/update_conversation_name", methods=["POST"])
|
57 |
+
def update_conversation_name():
|
58 |
+
# update data for a conversation
|
59 |
+
data = request.get_json()
|
60 |
+
id = data["id"]
|
61 |
+
name = data["name"]
|
62 |
+
conversations_collection.update_one({"_id": ObjectId(id)},{"$set":{"name":name}})
|
63 |
+
return {"status": "ok"}
|
64 |
+
|
65 |
+
|
66 |
+
@user.route("/api/feedback", methods=["POST"])
|
67 |
+
def api_feedback():
|
68 |
+
data = request.get_json()
|
69 |
+
question = data["question"]
|
70 |
+
answer = data["answer"]
|
71 |
+
feedback = data["feedback"]
|
72 |
+
|
73 |
+
|
74 |
+
feedback_collection.insert_one(
|
75 |
+
{
|
76 |
+
"question": question,
|
77 |
+
"answer": answer,
|
78 |
+
"feedback": feedback,
|
79 |
+
}
|
80 |
+
)
|
81 |
+
return {"status": "ok"}
|
82 |
+
|
83 |
+
@user.route("/api/delete_by_ids", methods=["get"])
|
84 |
+
def delete_by_ids():
|
85 |
+
"""Delete by ID. These are the IDs in the vectorstore"""
|
86 |
+
|
87 |
+
ids = request.args.get("path")
|
88 |
+
if not ids:
|
89 |
+
return {"status": "error"}
|
90 |
+
|
91 |
+
if settings.VECTOR_STORE == "faiss":
|
92 |
+
result = vectors_collection.delete_index(ids=ids)
|
93 |
+
if result:
|
94 |
+
return {"status": "ok"}
|
95 |
+
return {"status": "error"}
|
96 |
+
|
97 |
+
@user.route("/api/delete_old", methods=["get"])
|
98 |
+
def delete_old():
|
99 |
+
"""Delete old indexes."""
|
100 |
+
import shutil
|
101 |
+
|
102 |
+
path = request.args.get("path")
|
103 |
+
dirs = path.split("/")
|
104 |
+
dirs_clean = []
|
105 |
+
for i in range(0, len(dirs)):
|
106 |
+
dirs_clean.append(secure_filename(dirs[i]))
|
107 |
+
# check that path strats with indexes or vectors
|
108 |
+
|
109 |
+
if dirs_clean[0] not in ["indexes", "vectors"]:
|
110 |
+
return {"status": "error"}
|
111 |
+
path_clean = "/".join(dirs_clean)
|
112 |
+
vectors_collection.delete_one({"name": dirs_clean[-1], 'user': dirs_clean[-2]})
|
113 |
+
if settings.VECTOR_STORE == "faiss":
|
114 |
+
try:
|
115 |
+
shutil.rmtree(os.path.join(current_dir, path_clean))
|
116 |
+
except FileNotFoundError:
|
117 |
+
pass
|
118 |
+
else:
|
119 |
+
vetorstore = VectorCreator.create_vectorstore(
|
120 |
+
settings.VECTOR_STORE, path=os.path.join(current_dir, path_clean)
|
121 |
+
)
|
122 |
+
vetorstore.delete_index()
|
123 |
+
|
124 |
+
return {"status": "ok"}
|
125 |
+
|
126 |
+
@user.route("/api/upload", methods=["POST"])
|
127 |
+
def upload_file():
|
128 |
+
"""Upload a file to get vectorized and indexed."""
|
129 |
+
if "user" not in request.form:
|
130 |
+
return {"status": "no user"}
|
131 |
+
user = secure_filename(request.form["user"])
|
132 |
+
if "name" not in request.form:
|
133 |
+
return {"status": "no name"}
|
134 |
+
job_name = secure_filename(request.form["name"])
|
135 |
+
# check if the post request has the file part
|
136 |
+
if "file" not in request.files:
|
137 |
+
print("No file part")
|
138 |
+
return {"status": "no file"}
|
139 |
+
file = request.files["file"]
|
140 |
+
if file.filename == "":
|
141 |
+
return {"status": "no file name"}
|
142 |
+
|
143 |
+
if file:
|
144 |
+
filename = secure_filename(file.filename)
|
145 |
+
# save dir
|
146 |
+
save_dir = os.path.join(current_dir, settings.UPLOAD_FOLDER, user, job_name)
|
147 |
+
# create dir if not exists
|
148 |
+
if not os.path.exists(save_dir):
|
149 |
+
os.makedirs(save_dir)
|
150 |
+
|
151 |
+
file.save(os.path.join(save_dir, filename))
|
152 |
+
task = ingest.delay(settings.UPLOAD_FOLDER, [".rst", ".md", ".pdf", ".txt", ".docx",
|
153 |
+
".csv", ".epub", ".html", ".mdx"],
|
154 |
+
job_name, filename, user)
|
155 |
+
# task id
|
156 |
+
task_id = task.id
|
157 |
+
return {"status": "ok", "task_id": task_id}
|
158 |
+
else:
|
159 |
+
return {"status": "error"}
|
160 |
+
|
161 |
+
@user.route("/api/task_status", methods=["GET"])
|
162 |
+
def task_status():
|
163 |
+
"""Get celery job status."""
|
164 |
+
task_id = request.args.get("task_id")
|
165 |
+
from application.celery import celery
|
166 |
+
task = celery.AsyncResult(task_id)
|
167 |
+
task_meta = task.info
|
168 |
+
return {"status": task.status, "result": task_meta}
|
169 |
+
|
170 |
+
|
171 |
+
@user.route("/api/combine", methods=["GET"])
|
172 |
+
def combined_json():
|
173 |
+
user = "local"
|
174 |
+
"""Provide json file with combined available indexes."""
|
175 |
+
# get json from https://d3dg1063dc54p9.cloudfront.net/combined.json
|
176 |
+
|
177 |
+
data = [
|
178 |
+
{
|
179 |
+
"name": "default",
|
180 |
+
"language": "default",
|
181 |
+
"version": "",
|
182 |
+
"description": "default",
|
183 |
+
"fullName": "default",
|
184 |
+
"date": "default",
|
185 |
+
"docLink": "default",
|
186 |
+
"model": settings.EMBEDDINGS_NAME,
|
187 |
+
"location": "remote",
|
188 |
+
}
|
189 |
+
]
|
190 |
+
# structure: name, language, version, description, fullName, date, docLink
|
191 |
+
# append data from vectors_collection
|
192 |
+
for index in vectors_collection.find({"user": user}):
|
193 |
+
data.append(
|
194 |
+
{
|
195 |
+
"name": index["name"],
|
196 |
+
"language": index["language"],
|
197 |
+
"version": "",
|
198 |
+
"description": index["name"],
|
199 |
+
"fullName": index["name"],
|
200 |
+
"date": index["date"],
|
201 |
+
"docLink": index["location"],
|
202 |
+
"model": settings.EMBEDDINGS_NAME,
|
203 |
+
"location": "local",
|
204 |
+
}
|
205 |
+
)
|
206 |
+
if settings.VECTOR_STORE == "faiss":
|
207 |
+
data_remote = requests.get("https://d3dg1063dc54p9.cloudfront.net/combined.json").json()
|
208 |
+
for index in data_remote:
|
209 |
+
index["location"] = "remote"
|
210 |
+
data.append(index)
|
211 |
+
|
212 |
+
return jsonify(data)
|
213 |
+
|
214 |
+
|
215 |
+
@user.route("/api/docs_check", methods=["POST"])
|
216 |
+
def check_docs():
|
217 |
+
# check if docs exist in a vectorstore folder
|
218 |
+
data = request.get_json()
|
219 |
+
# split docs on / and take first part
|
220 |
+
if data["docs"].split("/")[0] == "local":
|
221 |
+
return {"status": "exists"}
|
222 |
+
vectorstore = "vectors/" + data["docs"]
|
223 |
+
base_path = "https://raw.githubusercontent.com/arc53/DocsHUB/main/"
|
224 |
+
if os.path.exists(vectorstore) or data["docs"] == "default":
|
225 |
+
return {"status": "exists"}
|
226 |
+
else:
|
227 |
+
r = requests.get(base_path + vectorstore + "index.faiss")
|
228 |
+
|
229 |
+
if r.status_code != 200:
|
230 |
+
return {"status": "null"}
|
231 |
+
else:
|
232 |
+
if not os.path.exists(vectorstore):
|
233 |
+
os.makedirs(vectorstore)
|
234 |
+
with open(vectorstore + "index.faiss", "wb") as f:
|
235 |
+
f.write(r.content)
|
236 |
+
|
237 |
+
# download the store
|
238 |
+
r = requests.get(base_path + vectorstore + "index.pkl")
|
239 |
+
with open(vectorstore + "index.pkl", "wb") as f:
|
240 |
+
f.write(r.content)
|
241 |
+
|
242 |
+
return {"status": "loaded"}
|
243 |
+
|
244 |
+
@user.route("/api/create_prompt", methods=["POST"])
|
245 |
+
def create_prompt():
|
246 |
+
data = request.get_json()
|
247 |
+
content = data["content"]
|
248 |
+
name = data["name"]
|
249 |
+
if name == "":
|
250 |
+
return {"status": "error"}
|
251 |
+
user = "local"
|
252 |
+
resp = prompts_collection.insert_one(
|
253 |
+
{
|
254 |
+
"name": name,
|
255 |
+
"content": content,
|
256 |
+
"user": user,
|
257 |
+
}
|
258 |
+
)
|
259 |
+
new_id = str(resp.inserted_id)
|
260 |
+
return {"id": new_id}
|
261 |
+
|
262 |
+
@user.route("/api/get_prompts", methods=["GET"])
|
263 |
+
def get_prompts():
|
264 |
+
user = "local"
|
265 |
+
prompts = prompts_collection.find({"user": user})
|
266 |
+
list_prompts = []
|
267 |
+
list_prompts.append({"id": "default", "name": "default", "type": "public"})
|
268 |
+
list_prompts.append({"id": "creative", "name": "creative", "type": "public"})
|
269 |
+
list_prompts.append({"id": "strict", "name": "strict", "type": "public"})
|
270 |
+
for prompt in prompts:
|
271 |
+
list_prompts.append({"id": str(prompt["_id"]), "name": prompt["name"], "type": "private"})
|
272 |
+
|
273 |
+
return jsonify(list_prompts)
|
274 |
+
|
275 |
+
@user.route("/api/get_single_prompt", methods=["GET"])
|
276 |
+
def get_single_prompt():
|
277 |
+
prompt_id = request.args.get("id")
|
278 |
+
if prompt_id == 'default':
|
279 |
+
with open(os.path.join(current_dir, "prompts", "chat_combine_default.txt"), "r") as f:
|
280 |
+
chat_combine_template = f.read()
|
281 |
+
return jsonify({"content": chat_combine_template})
|
282 |
+
elif prompt_id == 'creative':
|
283 |
+
with open(os.path.join(current_dir, "prompts", "chat_combine_creative.txt"), "r") as f:
|
284 |
+
chat_reduce_creative = f.read()
|
285 |
+
return jsonify({"content": chat_reduce_creative})
|
286 |
+
elif prompt_id == 'strict':
|
287 |
+
with open(os.path.join(current_dir, "prompts", "chat_combine_strict.txt"), "r") as f:
|
288 |
+
chat_reduce_strict = f.read()
|
289 |
+
return jsonify({"content": chat_reduce_strict})
|
290 |
+
|
291 |
+
|
292 |
+
prompt = prompts_collection.find_one({"_id": ObjectId(prompt_id)})
|
293 |
+
return jsonify({"content": prompt["content"]})
|
294 |
+
|
295 |
+
@user.route("/api/delete_prompt", methods=["POST"])
|
296 |
+
def delete_prompt():
|
297 |
+
data = request.get_json()
|
298 |
+
id = data["id"]
|
299 |
+
prompts_collection.delete_one(
|
300 |
+
{
|
301 |
+
"_id": ObjectId(id),
|
302 |
+
}
|
303 |
+
)
|
304 |
+
return {"status": "ok"}
|
305 |
+
|
306 |
+
@user.route("/api/update_prompt", methods=["POST"])
|
307 |
+
def update_prompt_name():
|
308 |
+
data = request.get_json()
|
309 |
+
id = data["id"]
|
310 |
+
name = data["name"]
|
311 |
+
content = data["content"]
|
312 |
+
# check if name is null
|
313 |
+
if name == "":
|
314 |
+
return {"status": "error"}
|
315 |
+
prompts_collection.update_one({"_id": ObjectId(id)},{"$set":{"name":name, "content": content}})
|
316 |
+
return {"status": "ok"}
|
317 |
+
|
318 |
+
|
319 |
+
|
320 |
+
|
321 |
+
|
api/user/tasks.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from application.worker import ingest_worker
|
2 |
+
from application.celery import celery
|
3 |
+
|
4 |
+
@celery.task(bind=True)
|
5 |
+
def ingest(self, directory, formats, name_job, filename, user):
|
6 |
+
resp = ingest_worker(self, directory, formats, name_job, filename, user)
|
7 |
+
return resp
|
app.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import platform
|
2 |
+
import dotenv
|
3 |
+
from application.celery import celery
|
4 |
+
from flask import Flask, request, redirect
|
5 |
+
from application.core.settings import settings
|
6 |
+
from application.api.user.routes import user
|
7 |
+
from application.api.answer.routes import answer
|
8 |
+
from application.api.internal.routes import internal
|
9 |
+
|
10 |
+
if platform.system() == "Windows":
|
11 |
+
import pathlib
|
12 |
+
pathlib.PosixPath = pathlib.WindowsPath
|
13 |
+
|
14 |
+
dotenv.load_dotenv()
|
15 |
+
|
16 |
+
app = Flask(__name__)
|
17 |
+
app.register_blueprint(user)
|
18 |
+
app.register_blueprint(answer)
|
19 |
+
app.register_blueprint(internal)
|
20 |
+
app.config.update(
|
21 |
+
UPLOAD_FOLDER="inputs",
|
22 |
+
CELERY_BROKER_URL=settings.CELERY_BROKER_URL,
|
23 |
+
CELERY_RESULT_BACKEND=settings.CELERY_RESULT_BACKEND,
|
24 |
+
MONGO_URI=settings.MONGO_URI
|
25 |
+
)
|
26 |
+
celery.config_from_object("application.celeryconfig")
|
27 |
+
|
28 |
+
@app.route("/")
|
29 |
+
def home():
|
30 |
+
if request.remote_addr in ('0.0.0.0', '127.0.0.1', 'localhost', '172.18.0.1'):
|
31 |
+
return redirect('http://localhost:5173')
|
32 |
+
else:
|
33 |
+
return 'Welcome to DocsGPT Backend!'
|
34 |
+
|
35 |
+
@app.after_request
|
36 |
+
def after_request(response):
|
37 |
+
response.headers.add("Access-Control-Allow-Origin", "*")
|
38 |
+
response.headers.add("Access-Control-Allow-Headers", "Content-Type,Authorization")
|
39 |
+
response.headers.add("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS")
|
40 |
+
return response
|
41 |
+
|
42 |
+
if __name__ == "__main__":
|
43 |
+
app.run(debug=True, port=7091)
|
44 |
+
|
celery.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from celery import Celery
|
2 |
+
from application.core.settings import settings
|
3 |
+
|
4 |
+
def make_celery(app_name=__name__):
|
5 |
+
celery = Celery(app_name, broker=settings.CELERY_BROKER_URL, backend=settings.CELERY_RESULT_BACKEND)
|
6 |
+
celery.conf.update(settings)
|
7 |
+
return celery
|
8 |
+
|
9 |
+
celery = make_celery()
|
celeryconfig.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
broker_url = os.getenv("CELERY_BROKER_URL")
|
4 |
+
result_backend = os.getenv("CELERY_RESULT_BACKEND")
|
5 |
+
|
6 |
+
task_serializer = 'json'
|
7 |
+
result_serializer = 'json'
|
8 |
+
accept_content = ['json']
|
core/__init__.py
ADDED
File without changes
|
core/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (155 Bytes). View file
|
|
core/__pycache__/settings.cpython-310.pyc
ADDED
Binary file (1.92 kB). View file
|
|
core/settings.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
from typing import Optional
|
3 |
+
import os
|
4 |
+
|
5 |
+
from pydantic_settings import BaseSettings
|
6 |
+
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
7 |
+
|
8 |
+
|
9 |
+
class Settings(BaseSettings):
|
10 |
+
LLM_NAME: str = "docsgpt"
|
11 |
+
EMBEDDINGS_NAME: str = "huggingface_sentence-transformers/all-mpnet-base-v2"
|
12 |
+
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
|
13 |
+
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/1"
|
14 |
+
MONGO_URI: str = "mongodb://localhost:27017/docsgpt"
|
15 |
+
MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
|
16 |
+
TOKENS_MAX_HISTORY: int = 150
|
17 |
+
UPLOAD_FOLDER: str = "inputs"
|
18 |
+
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch"
|
19 |
+
|
20 |
+
API_URL: str = "http://localhost:7091" # backend url for celery worker
|
21 |
+
|
22 |
+
API_KEY: Optional[str] = None # LLM api key
|
23 |
+
EMBEDDINGS_KEY: Optional[str] = None # api key for embeddings (if using openai, just copy API_KEY)
|
24 |
+
OPENAI_API_BASE: Optional[str] = None # azure openai api base url
|
25 |
+
OPENAI_API_VERSION: Optional[str] = None # azure openai api version
|
26 |
+
AZURE_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for answering
|
27 |
+
AZURE_EMBEDDINGS_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for embeddings
|
28 |
+
|
29 |
+
# elasticsearch
|
30 |
+
ELASTIC_CLOUD_ID: Optional[str] = None # cloud id for elasticsearch
|
31 |
+
ELASTIC_USERNAME: Optional[str] = None # username for elasticsearch
|
32 |
+
ELASTIC_PASSWORD: Optional[str] = None # password for elasticsearch
|
33 |
+
ELASTIC_URL: Optional[str] = None # url for elasticsearch
|
34 |
+
ELASTIC_INDEX: Optional[str] = "docsgpt" # index name for elasticsearch
|
35 |
+
|
36 |
+
# SageMaker config
|
37 |
+
SAGEMAKER_ENDPOINT: Optional[str] = None # SageMaker endpoint name
|
38 |
+
SAGEMAKER_REGION: Optional[str] = None # SageMaker region name
|
39 |
+
SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
|
40 |
+
SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
|
41 |
+
|
42 |
+
|
43 |
+
path = Path(__file__).parent.parent.absolute()
|
44 |
+
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
|
error.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import jsonify
|
2 |
+
from werkzeug.http import HTTP_STATUS_CODES
|
3 |
+
|
4 |
+
|
5 |
+
def response_error(code_status, message=None):
|
6 |
+
payload = {'error': HTTP_STATUS_CODES.get(code_status, "something went wrong")}
|
7 |
+
if message:
|
8 |
+
payload['message'] = message
|
9 |
+
response = jsonify(payload)
|
10 |
+
response.status_code = code_status
|
11 |
+
return response
|
12 |
+
|
13 |
+
|
14 |
+
def bad_request(status_code=400, message=''):
|
15 |
+
return response_error(code_status=status_code, message=message)
|
index.faiss
ADDED
Binary file (9.26 kB). View file
|
|
index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1653826159295b5a262df5228ec9678a919a9fcc3ff94248eeaa55f434c071ef
|
3 |
+
size 7866
|
indexes/local/patil2016.pdf/index.faiss
ADDED
Binary file (15.4 kB). View file
|
|
indexes/local/patil2016.pdf/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccc1aa0edd32b66234b113edba42b67f5fc498851e584863124f44abf3920273
|
3 |
+
size 28255
|
inputs/local/patil2016.pdf/patil2016.pdf
ADDED
Binary file (280 kB). View file
|
|
llm/__init__.py
ADDED
File without changes
|
llm/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (154 Bytes). View file
|
|
llm/__pycache__/anthropic.cpython-310.pyc
ADDED
Binary file (1.65 kB). View file
|
|
llm/__pycache__/base.cpython-310.pyc
ADDED
Binary file (734 Bytes). View file
|
|
llm/__pycache__/docsgpt_provider.cpython-310.pyc
ADDED
Binary file (1.59 kB). View file
|
|
llm/__pycache__/huggingface.cpython-310.pyc
ADDED
Binary file (1.81 kB). View file
|
|
llm/__pycache__/llama_cpp.cpython-310.pyc
ADDED
Binary file (1.58 kB). View file
|
|
llm/__pycache__/llm_creator.cpython-310.pyc
ADDED
Binary file (1.15 kB). View file
|
|
llm/__pycache__/openai.cpython-310.pyc
ADDED
Binary file (2.16 kB). View file
|
|
llm/__pycache__/sagemaker.cpython-310.pyc
ADDED
Binary file (4.33 kB). View file
|
|
llm/anthropic.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from application.llm.base import BaseLLM
|
2 |
+
from application.core.settings import settings
|
3 |
+
|
4 |
+
class AnthropicLLM(BaseLLM):
|
5 |
+
|
6 |
+
def __init__(self, api_key=None):
|
7 |
+
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
8 |
+
self.api_key = api_key or settings.ANTHROPIC_API_KEY # If not provided, use a default from settings
|
9 |
+
self.anthropic = Anthropic(api_key=self.api_key)
|
10 |
+
self.HUMAN_PROMPT = HUMAN_PROMPT
|
11 |
+
self.AI_PROMPT = AI_PROMPT
|
12 |
+
|
13 |
+
def gen(self, model, messages, engine=None, max_tokens=300, stream=False, **kwargs):
|
14 |
+
context = messages[0]['content']
|
15 |
+
user_question = messages[-1]['content']
|
16 |
+
prompt = f"### Context \n {context} \n ### Question \n {user_question}"
|
17 |
+
if stream:
|
18 |
+
return self.gen_stream(model, prompt, max_tokens, **kwargs)
|
19 |
+
|
20 |
+
completion = self.anthropic.completions.create(
|
21 |
+
model=model,
|
22 |
+
max_tokens_to_sample=max_tokens,
|
23 |
+
stream=stream,
|
24 |
+
prompt=f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT}",
|
25 |
+
)
|
26 |
+
return completion.completion
|
27 |
+
|
28 |
+
def gen_stream(self, model, messages, engine=None, max_tokens=300, **kwargs):
|
29 |
+
context = messages[0]['content']
|
30 |
+
user_question = messages[-1]['content']
|
31 |
+
prompt = f"### Context \n {context} \n ### Question \n {user_question}"
|
32 |
+
stream_response = self.anthropic.completions.create(
|
33 |
+
model=model,
|
34 |
+
prompt=f"{self.HUMAN_PROMPT} {prompt}{self.AI_PROMPT}",
|
35 |
+
max_tokens_to_sample=max_tokens,
|
36 |
+
stream=True,
|
37 |
+
)
|
38 |
+
|
39 |
+
for completion in stream_response:
|
40 |
+
yield completion.completion
|
llm/base.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
+
|
3 |
+
|
4 |
+
class BaseLLM(ABC):
|
5 |
+
def __init__(self):
|
6 |
+
pass
|
7 |
+
|
8 |
+
@abstractmethod
|
9 |
+
def gen(self, *args, **kwargs):
|
10 |
+
pass
|
11 |
+
|
12 |
+
@abstractmethod
|
13 |
+
def gen_stream(self, *args, **kwargs):
|
14 |
+
pass
|
llm/docsgpt_provider.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from application.llm.base import BaseLLM
|
2 |
+
import json
|
3 |
+
import requests
|
4 |
+
|
5 |
+
class DocsGPTAPILLM(BaseLLM):
|
6 |
+
|
7 |
+
def __init__(self, *args, **kwargs):
|
8 |
+
self.endpoint = "https://llm.docsgpt.co.uk"
|
9 |
+
|
10 |
+
|
11 |
+
def gen(self, model, engine, messages, stream=False, **kwargs):
|
12 |
+
context = messages[0]['content']
|
13 |
+
user_question = messages[-1]['content']
|
14 |
+
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
15 |
+
|
16 |
+
response = requests.post(
|
17 |
+
f"{self.endpoint}/answer",
|
18 |
+
json={
|
19 |
+
"prompt": prompt,
|
20 |
+
"max_new_tokens": 30
|
21 |
+
}
|
22 |
+
)
|
23 |
+
response_clean = response.json()['a'].split("###")[0]
|
24 |
+
|
25 |
+
return response_clean
|
26 |
+
|
27 |
+
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
28 |
+
context = messages[0]['content']
|
29 |
+
user_question = messages[-1]['content']
|
30 |
+
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
31 |
+
|
32 |
+
# send prompt to endpoint /stream
|
33 |
+
response = requests.post(
|
34 |
+
f"{self.endpoint}/stream",
|
35 |
+
json={
|
36 |
+
"prompt": prompt,
|
37 |
+
"max_new_tokens": 256
|
38 |
+
},
|
39 |
+
stream=True
|
40 |
+
)
|
41 |
+
|
42 |
+
for line in response.iter_lines():
|
43 |
+
if line:
|
44 |
+
#data = json.loads(line)
|
45 |
+
data_str = line.decode('utf-8')
|
46 |
+
if data_str.startswith("data: "):
|
47 |
+
data = json.loads(data_str[6:])
|
48 |
+
yield data['a']
|
49 |
+
|
llm/huggingface.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from application.llm.base import BaseLLM
|
2 |
+
|
3 |
+
class HuggingFaceLLM(BaseLLM):
|
4 |
+
|
5 |
+
def __init__(self, api_key, llm_name='Arc53/DocsGPT-7B',q=False):
|
6 |
+
global hf
|
7 |
+
|
8 |
+
from langchain.llms import HuggingFacePipeline
|
9 |
+
if q:
|
10 |
+
import torch
|
11 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained(llm_name)
|
13 |
+
bnb_config = BitsAndBytesConfig(
|
14 |
+
load_in_4bit=True,
|
15 |
+
bnb_4bit_use_double_quant=True,
|
16 |
+
bnb_4bit_quant_type="nf4",
|
17 |
+
bnb_4bit_compute_dtype=torch.bfloat16
|
18 |
+
)
|
19 |
+
model = AutoModelForCausalLM.from_pretrained(llm_name,quantization_config=bnb_config)
|
20 |
+
else:
|
21 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
22 |
+
tokenizer = AutoTokenizer.from_pretrained(llm_name)
|
23 |
+
model = AutoModelForCausalLM.from_pretrained(llm_name)
|
24 |
+
|
25 |
+
pipe = pipeline(
|
26 |
+
"text-generation", model=model,
|
27 |
+
tokenizer=tokenizer, max_new_tokens=2000,
|
28 |
+
device_map="auto", eos_token_id=tokenizer.eos_token_id
|
29 |
+
)
|
30 |
+
hf = HuggingFacePipeline(pipeline=pipe)
|
31 |
+
|
32 |
+
def gen(self, model, engine, messages, stream=False, **kwargs):
|
33 |
+
context = messages[0]['content']
|
34 |
+
user_question = messages[-1]['content']
|
35 |
+
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
36 |
+
|
37 |
+
result = hf(prompt)
|
38 |
+
|
39 |
+
return result.content
|
40 |
+
|
41 |
+
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
42 |
+
|
43 |
+
raise NotImplementedError("HuggingFaceLLM Streaming is not implemented yet.")
|
44 |
+
|
llm/llama_cpp.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from application.llm.base import BaseLLM
|
2 |
+
from application.core.settings import settings
|
3 |
+
|
4 |
+
class LlamaCpp(BaseLLM):
|
5 |
+
|
6 |
+
def __init__(self, api_key, llm_name=settings.MODEL_PATH, **kwargs):
|
7 |
+
global llama
|
8 |
+
try:
|
9 |
+
from llama_cpp import Llama
|
10 |
+
except ImportError:
|
11 |
+
raise ImportError("Please install llama_cpp using pip install llama-cpp-python")
|
12 |
+
|
13 |
+
llama = Llama(model_path=llm_name, n_ctx=2048)
|
14 |
+
|
15 |
+
def gen(self, model, engine, messages, stream=False, **kwargs):
|
16 |
+
context = messages[0]['content']
|
17 |
+
user_question = messages[-1]['content']
|
18 |
+
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
19 |
+
|
20 |
+
result = llama(prompt, max_tokens=150, echo=False)
|
21 |
+
|
22 |
+
# import sys
|
23 |
+
# print(result['choices'][0]['text'].split('### Answer \n')[-1], file=sys.stderr)
|
24 |
+
|
25 |
+
return result['choices'][0]['text'].split('### Answer \n')[-1]
|
26 |
+
|
27 |
+
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
28 |
+
context = messages[0]['content']
|
29 |
+
user_question = messages[-1]['content']
|
30 |
+
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
31 |
+
|
32 |
+
result = llama(prompt, max_tokens=150, echo=False, stream=stream)
|
33 |
+
|
34 |
+
# import sys
|
35 |
+
# print(list(result), file=sys.stderr)
|
36 |
+
|
37 |
+
for item in result:
|
38 |
+
for choice in item['choices']:
|
39 |
+
yield choice['text']
|
llm/llm_creator.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from application.llm.openai import OpenAILLM, AzureOpenAILLM
|
2 |
+
from application.llm.sagemaker import SagemakerAPILLM
|
3 |
+
from application.llm.huggingface import HuggingFaceLLM
|
4 |
+
from application.llm.llama_cpp import LlamaCpp
|
5 |
+
from application.llm.anthropic import AnthropicLLM
|
6 |
+
from application.llm.docsgpt_provider import DocsGPTAPILLM
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
class LLMCreator:
|
11 |
+
llms = {
|
12 |
+
'openai': OpenAILLM,
|
13 |
+
'azure_openai': AzureOpenAILLM,
|
14 |
+
'sagemaker': SagemakerAPILLM,
|
15 |
+
'huggingface': HuggingFaceLLM,
|
16 |
+
'llama.cpp': LlamaCpp,
|
17 |
+
'anthropic': AnthropicLLM,
|
18 |
+
'docsgpt': DocsGPTAPILLM
|
19 |
+
}
|
20 |
+
|
21 |
+
@classmethod
|
22 |
+
def create_llm(cls, type, *args, **kwargs):
|
23 |
+
llm_class = cls.llms.get(type.lower())
|
24 |
+
if not llm_class:
|
25 |
+
raise ValueError(f"No LLM class found for type {type}")
|
26 |
+
return llm_class(*args, **kwargs)
|
llm/openai.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from application.llm.base import BaseLLM
|
2 |
+
from application.core.settings import settings
|
3 |
+
|
4 |
+
class OpenAILLM(BaseLLM):
|
5 |
+
|
6 |
+
def __init__(self, api_key):
|
7 |
+
global openai
|
8 |
+
from openai import OpenAI
|
9 |
+
|
10 |
+
self.client = OpenAI(
|
11 |
+
api_key=api_key,
|
12 |
+
)
|
13 |
+
self.api_key = api_key
|
14 |
+
|
15 |
+
def _get_openai(self):
|
16 |
+
# Import openai when needed
|
17 |
+
import openai
|
18 |
+
|
19 |
+
return openai
|
20 |
+
|
21 |
+
def gen(self, model, engine, messages, stream=False, **kwargs):
|
22 |
+
response = self.client.chat.completions.create(model=model,
|
23 |
+
messages=messages,
|
24 |
+
stream=stream,
|
25 |
+
**kwargs)
|
26 |
+
|
27 |
+
return response.choices[0].message.content
|
28 |
+
|
29 |
+
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
30 |
+
response = self.client.chat.completions.create(model=model,
|
31 |
+
messages=messages,
|
32 |
+
stream=stream,
|
33 |
+
**kwargs)
|
34 |
+
|
35 |
+
for line in response:
|
36 |
+
# import sys
|
37 |
+
# print(line.choices[0].delta.content, file=sys.stderr)
|
38 |
+
if line.choices[0].delta.content is not None:
|
39 |
+
yield line.choices[0].delta.content
|
40 |
+
|
41 |
+
|
42 |
+
class AzureOpenAILLM(OpenAILLM):
|
43 |
+
|
44 |
+
def __init__(self, openai_api_key, openai_api_base, openai_api_version, deployment_name):
|
45 |
+
super().__init__(openai_api_key)
|
46 |
+
self.api_base = settings.OPENAI_API_BASE,
|
47 |
+
self.api_version = settings.OPENAI_API_VERSION,
|
48 |
+
self.deployment_name = settings.AZURE_DEPLOYMENT_NAME,
|
49 |
+
from openai import AzureOpenAI
|
50 |
+
self.client = AzureOpenAI(
|
51 |
+
api_key=openai_api_key,
|
52 |
+
api_version=settings.OPENAI_API_VERSION,
|
53 |
+
api_base=settings.OPENAI_API_BASE,
|
54 |
+
deployment_name=settings.AZURE_DEPLOYMENT_NAME,
|
55 |
+
)
|
56 |
+
|
57 |
+
def _get_openai(self):
|
58 |
+
openai = super()._get_openai()
|
59 |
+
|
60 |
+
return openai
|
llm/sagemaker.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from application.llm.base import BaseLLM
|
2 |
+
from application.core.settings import settings
|
3 |
+
import json
|
4 |
+
import io
|
5 |
+
|
6 |
+
|
7 |
+
|
8 |
+
class LineIterator:
|
9 |
+
"""
|
10 |
+
A helper class for parsing the byte stream input.
|
11 |
+
|
12 |
+
The output of the model will be in the following format:
|
13 |
+
```
|
14 |
+
b'{"outputs": [" a"]}\n'
|
15 |
+
b'{"outputs": [" challenging"]}\n'
|
16 |
+
b'{"outputs": [" problem"]}\n'
|
17 |
+
...
|
18 |
+
```
|
19 |
+
|
20 |
+
While usually each PayloadPart event from the event stream will contain a byte array
|
21 |
+
with a full json, this is not guaranteed and some of the json objects may be split across
|
22 |
+
PayloadPart events. For example:
|
23 |
+
```
|
24 |
+
{'PayloadPart': {'Bytes': b'{"outputs": '}}
|
25 |
+
{'PayloadPart': {'Bytes': b'[" problem"]}\n'}}
|
26 |
+
```
|
27 |
+
|
28 |
+
This class accounts for this by concatenating bytes written via the 'write' function
|
29 |
+
and then exposing a method which will return lines (ending with a '\n' character) within
|
30 |
+
the buffer via the 'scan_lines' function. It maintains the position of the last read
|
31 |
+
position to ensure that previous bytes are not exposed again.
|
32 |
+
"""
|
33 |
+
|
34 |
+
def __init__(self, stream):
|
35 |
+
self.byte_iterator = iter(stream)
|
36 |
+
self.buffer = io.BytesIO()
|
37 |
+
self.read_pos = 0
|
38 |
+
|
39 |
+
def __iter__(self):
|
40 |
+
return self
|
41 |
+
|
42 |
+
def __next__(self):
|
43 |
+
while True:
|
44 |
+
self.buffer.seek(self.read_pos)
|
45 |
+
line = self.buffer.readline()
|
46 |
+
if line and line[-1] == ord('\n'):
|
47 |
+
self.read_pos += len(line)
|
48 |
+
return line[:-1]
|
49 |
+
try:
|
50 |
+
chunk = next(self.byte_iterator)
|
51 |
+
except StopIteration:
|
52 |
+
if self.read_pos < self.buffer.getbuffer().nbytes:
|
53 |
+
continue
|
54 |
+
raise
|
55 |
+
if 'PayloadPart' not in chunk:
|
56 |
+
print('Unknown event type:' + chunk)
|
57 |
+
continue
|
58 |
+
self.buffer.seek(0, io.SEEK_END)
|
59 |
+
self.buffer.write(chunk['PayloadPart']['Bytes'])
|
60 |
+
|
61 |
+
class SagemakerAPILLM(BaseLLM):
|
62 |
+
|
63 |
+
def __init__(self, *args, **kwargs):
|
64 |
+
import boto3
|
65 |
+
runtime = boto3.client(
|
66 |
+
'runtime.sagemaker',
|
67 |
+
aws_access_key_id='xxx',
|
68 |
+
aws_secret_access_key='xxx',
|
69 |
+
region_name='us-west-2'
|
70 |
+
)
|
71 |
+
|
72 |
+
|
73 |
+
self.endpoint = settings.SAGEMAKER_ENDPOINT
|
74 |
+
self.runtime = runtime
|
75 |
+
|
76 |
+
|
77 |
+
def gen(self, model, engine, messages, stream=False, **kwargs):
|
78 |
+
context = messages[0]['content']
|
79 |
+
user_question = messages[-1]['content']
|
80 |
+
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
81 |
+
|
82 |
+
|
83 |
+
# Construct payload for endpoint
|
84 |
+
payload = {
|
85 |
+
"inputs": prompt,
|
86 |
+
"stream": False,
|
87 |
+
"parameters": {
|
88 |
+
"do_sample": True,
|
89 |
+
"temperature": 0.1,
|
90 |
+
"max_new_tokens": 30,
|
91 |
+
"repetition_penalty": 1.03,
|
92 |
+
"stop": ["</s>", "###"]
|
93 |
+
}
|
94 |
+
}
|
95 |
+
body_bytes = json.dumps(payload).encode('utf-8')
|
96 |
+
|
97 |
+
# Invoke the endpoint
|
98 |
+
response = self.runtime.invoke_endpoint(EndpointName=self.endpoint,
|
99 |
+
ContentType='application/json',
|
100 |
+
Body=body_bytes)
|
101 |
+
result = json.loads(response['Body'].read().decode())
|
102 |
+
import sys
|
103 |
+
print(result[0]['generated_text'], file=sys.stderr)
|
104 |
+
return result[0]['generated_text'][len(prompt):]
|
105 |
+
|
106 |
+
def gen_stream(self, model, engine, messages, stream=True, **kwargs):
|
107 |
+
context = messages[0]['content']
|
108 |
+
user_question = messages[-1]['content']
|
109 |
+
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
110 |
+
|
111 |
+
|
112 |
+
# Construct payload for endpoint
|
113 |
+
payload = {
|
114 |
+
"inputs": prompt,
|
115 |
+
"stream": True,
|
116 |
+
"parameters": {
|
117 |
+
"do_sample": True,
|
118 |
+
"temperature": 0.1,
|
119 |
+
"max_new_tokens": 512,
|
120 |
+
"repetition_penalty": 1.03,
|
121 |
+
"stop": ["</s>", "###"]
|
122 |
+
}
|
123 |
+
}
|
124 |
+
body_bytes = json.dumps(payload).encode('utf-8')
|
125 |
+
|
126 |
+
# Invoke the endpoint
|
127 |
+
response = self.runtime.invoke_endpoint_with_response_stream(EndpointName=self.endpoint,
|
128 |
+
ContentType='application/json',
|
129 |
+
Body=body_bytes)
|
130 |
+
#result = json.loads(response['Body'].read().decode())
|
131 |
+
event_stream = response['Body']
|
132 |
+
start_json = b'{'
|
133 |
+
for line in LineIterator(event_stream):
|
134 |
+
if line != b'' and start_json in line:
|
135 |
+
#print(line)
|
136 |
+
data = json.loads(line[line.find(start_json):].decode('utf-8'))
|
137 |
+
if data['token']['text'] not in ["</s>", "###"]:
|
138 |
+
print(data['token']['text'],end='')
|
139 |
+
yield data['token']['text']
|
parser/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|