Spaces:
Running
Running
minor fix
Browse files- app/main.py +0 -11
- app/rag.py +4 -60
app/main.py
CHANGED
@@ -23,29 +23,18 @@ app = FastAPI(middleware=middleware)
|
|
23 |
|
24 |
files_dir = os.path.expanduser("~/wtp_be_files/")
|
25 |
session_assistant = ChatPDF()
|
26 |
-
# session_messages = []
|
27 |
-
|
28 |
-
# async def stream_generator(agent_text_stream):
|
29 |
-
# for text in agent_text_stream:
|
30 |
-
# print(text)
|
31 |
-
# yield text
|
32 |
|
33 |
@app.get("/query")
|
34 |
async def process_input(text: str):
|
35 |
if text and len(text.strip()) > 0:
|
36 |
text = text.strip()
|
37 |
print("PRINTING STREAM")
|
38 |
-
# agent_text_stream = session_assistant.ask(text)
|
39 |
-
# print(agent_text_stream)
|
40 |
-
# session_messages.append((text, True))
|
41 |
-
# session_messages.append((agent_text, False))
|
42 |
return StreamingResponse(session_assistant.ask(text), media_type='text/event-stream')
|
43 |
|
44 |
|
45 |
@app.post("/upload")
|
46 |
def upload(files: list[UploadFile]):
|
47 |
session_assistant.clear()
|
48 |
-
# session_messages = []
|
49 |
|
50 |
try:
|
51 |
os.makedirs(files_dir)
|
|
|
23 |
|
24 |
files_dir = os.path.expanduser("~/wtp_be_files/")
|
25 |
session_assistant = ChatPDF()
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
@app.get("/query")
|
28 |
async def process_input(text: str):
|
29 |
if text and len(text.strip()) > 0:
|
30 |
text = text.strip()
|
31 |
print("PRINTING STREAM")
|
|
|
|
|
|
|
|
|
32 |
return StreamingResponse(session_assistant.ask(text), media_type='text/event-stream')
|
33 |
|
34 |
|
35 |
@app.post("/upload")
|
36 |
def upload(files: list[UploadFile]):
|
37 |
session_assistant.clear()
|
|
|
38 |
|
39 |
try:
|
40 |
os.makedirs(files_dir)
|
app/rag.py
CHANGED
@@ -1,25 +1,18 @@
|
|
1 |
import os
|
2 |
import logging
|
3 |
-
|
4 |
from llama_index.core import (
|
5 |
SimpleDirectoryReader,
|
6 |
VectorStoreIndex,
|
7 |
StorageContext,
|
8 |
Settings,
|
9 |
get_response_synthesizer)
|
10 |
-
# from llama_index.core.query_engine import RetrieverQueryEngine, TransformQueryEngine
|
11 |
from llama_index.core.node_parser import SentenceSplitter
|
12 |
from llama_index.core.schema import TextNode, MetadataMode
|
13 |
-
# from llama_index.core.retrievers import VectorIndexRetriever
|
14 |
-
|
15 |
-
# from llama_index.core.response_synthesizers import ResponseMode
|
16 |
-
# from transformers import AutoTokenizer
|
17 |
from llama_index.core.vector_stores import VectorStoreQuery
|
18 |
-
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
19 |
-
from qdrant_client import QdrantClient
|
20 |
-
|
21 |
from llama_index.llms.llama_cpp import LlamaCPP
|
22 |
from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
|
|
|
|
23 |
|
24 |
|
25 |
QDRANT_API_URL = os.getenv('QDRANT_API_URL')
|
@@ -31,38 +24,12 @@ logger = logging.getLogger(__name__)
|
|
31 |
class ChatPDF:
|
32 |
query_engine = None
|
33 |
|
34 |
-
model_url = "https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_k_m.gguf"
|
35 |
-
# model_url = "https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q8_0.gguf"
|
36 |
-
# model_url = "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.gguf"
|
37 |
-
|
38 |
-
# def messages_to_prompt(messages):
|
39 |
-
# prompt = ""
|
40 |
-
# for message in messages:
|
41 |
-
# if message.role == 'system':
|
42 |
-
# prompt += f"<|system|>\n{message.content}</s>\n"
|
43 |
-
# elif message.role == 'user':
|
44 |
-
# prompt += f"<|user|>\n{message.content}</s>\n"
|
45 |
-
# elif message.role == 'assistant':
|
46 |
-
# prompt += f"<|assistant|>\n{message.content}</s>\n"
|
47 |
-
|
48 |
-
# if not prompt.startswith("<|system|>\n"):
|
49 |
-
# prompt = "<|system|>\n</s>\n" + prompt
|
50 |
-
|
51 |
-
# prompt = prompt + "<|assistant|>\n"
|
52 |
-
|
53 |
-
# return prompt
|
54 |
-
|
55 |
-
# def completion_to_prompt(completion):
|
56 |
-
# return f"<|system|>\n</s>\n<|user|>\n{completion}</s>\n<|assistant|>\n"
|
57 |
-
|
58 |
-
|
59 |
def __init__(self):
|
60 |
self.text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=24)
|
61 |
|
62 |
logger.info("initializing the vector store related objects")
|
63 |
# client = QdrantClient(host="localhost", port=6333)
|
64 |
client = QdrantClient(url=QDRANT_API_URL, api_key=QDRANT_API_KEY)
|
65 |
-
# client = QdrantClient(":memory:")
|
66 |
self.vector_store = QdrantVectorStore(
|
67 |
client=client,
|
68 |
collection_name="rag_documents",
|
@@ -75,7 +42,7 @@ class ChatPDF:
|
|
75 |
)
|
76 |
|
77 |
llm = LlamaCPP(
|
78 |
-
model_url=
|
79 |
temperature=0.1,
|
80 |
max_new_tokens=256,
|
81 |
context_window=3900,
|
@@ -132,36 +99,13 @@ class ChatPDF:
|
|
132 |
transformations=Settings.transformations,
|
133 |
)
|
134 |
|
135 |
-
# logger.info("configure retriever")
|
136 |
-
# retriever = VectorIndexRetriever(
|
137 |
-
# index=index,
|
138 |
-
# similarity_top_k=6,
|
139 |
-
# # vector_store_query_mode="hybrid"
|
140 |
-
# )
|
141 |
-
|
142 |
-
# logger.info("configure response synthesizer")
|
143 |
-
# response_synthesizer = get_response_synthesizer(
|
144 |
-
# # streaming=True,
|
145 |
-
# response_mode=ResponseMode.COMPACT,
|
146 |
-
# )
|
147 |
-
|
148 |
-
# logger.info("assemble query engine")
|
149 |
-
# self.query_engine = RetrieverQueryEngine(
|
150 |
-
# retriever=retriever,
|
151 |
-
# response_synthesizer=response_synthesizer,
|
152 |
-
# )
|
153 |
-
|
154 |
self.query_engine = index.as_query_engine(
|
155 |
streaming=True,
|
156 |
-
|
157 |
)
|
158 |
|
159 |
async def ask(self, query: str):
|
160 |
-
if not self.query_engine:
|
161 |
-
return "Please, add a PDF document first."
|
162 |
-
|
163 |
logger.info("retrieving the response to the query")
|
164 |
-
# response = self.query_engine.query(str_or_query_bundle=query)
|
165 |
streaming_response = self.query_engine.query(query)
|
166 |
print(streaming_response)
|
167 |
# return streaming_response.response_gen
|
|
|
1 |
import os
|
2 |
import logging
|
|
|
3 |
from llama_index.core import (
|
4 |
SimpleDirectoryReader,
|
5 |
VectorStoreIndex,
|
6 |
StorageContext,
|
7 |
Settings,
|
8 |
get_response_synthesizer)
|
|
|
9 |
from llama_index.core.node_parser import SentenceSplitter
|
10 |
from llama_index.core.schema import TextNode, MetadataMode
|
|
|
|
|
|
|
|
|
11 |
from llama_index.core.vector_stores import VectorStoreQuery
|
|
|
|
|
|
|
12 |
from llama_index.llms.llama_cpp import LlamaCPP
|
13 |
from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
14 |
+
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
15 |
+
from qdrant_client import QdrantClient
|
16 |
|
17 |
|
18 |
QDRANT_API_URL = os.getenv('QDRANT_API_URL')
|
|
|
24 |
class ChatPDF:
|
25 |
query_engine = None
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def __init__(self):
|
28 |
self.text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=24)
|
29 |
|
30 |
logger.info("initializing the vector store related objects")
|
31 |
# client = QdrantClient(host="localhost", port=6333)
|
32 |
client = QdrantClient(url=QDRANT_API_URL, api_key=QDRANT_API_KEY)
|
|
|
33 |
self.vector_store = QdrantVectorStore(
|
34 |
client=client,
|
35 |
collection_name="rag_documents",
|
|
|
42 |
)
|
43 |
|
44 |
llm = LlamaCPP(
|
45 |
+
model_url="https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_k_m.gguf",
|
46 |
temperature=0.1,
|
47 |
max_new_tokens=256,
|
48 |
context_window=3900,
|
|
|
99 |
transformations=Settings.transformations,
|
100 |
)
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
self.query_engine = index.as_query_engine(
|
103 |
streaming=True,
|
104 |
+
similarity_top_k=6,
|
105 |
)
|
106 |
|
107 |
async def ask(self, query: str):
|
|
|
|
|
|
|
108 |
logger.info("retrieving the response to the query")
|
|
|
109 |
streaming_response = self.query_engine.query(query)
|
110 |
print(streaming_response)
|
111 |
# return streaming_response.response_gen
|