Spaces:
Runtime error
Runtime error
fix logger
Browse files- app/rag.py +15 -14
app/rag.py
CHANGED
@@ -26,20 +26,21 @@ class ChatPDF:
|
|
26 |
doc_ids = []
|
27 |
nodes = []
|
28 |
hyde_query_engine = None
|
|
|
29 |
|
30 |
def __init__(self):
|
31 |
logging.basicConfig(level=logging.INFO)
|
32 |
-
logger = logging.getLogger(__name__)
|
33 |
|
34 |
text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=100)
|
35 |
|
36 |
-
logger.info("initializing the vector store related objects")
|
37 |
client = QdrantClient(url=QDRANT_API_URL, api_key=QDRANT_API_KEY)
|
38 |
vector_store = QdrantVectorStore(client=client, collection_name="rag_documents")
|
39 |
|
40 |
-
logger.info("initializing the OllamaEmbedding")
|
41 |
embed_model = OllamaEmbedding(model_name='mxbai-embed-large')
|
42 |
-
logger.info("initializing the global settings")
|
43 |
Settings.embed_model = embed_model
|
44 |
Settings.llm = Ollama(model="qwen:1.8b", request_timeout=1000000)
|
45 |
Settings.transformations = [text_parser]
|
@@ -47,44 +48,44 @@ class ChatPDF:
|
|
47 |
def ingest(self, dir_path: str):
|
48 |
docs = SimpleDirectoryReader(input_dir=dir_path).load_data()
|
49 |
|
50 |
-
logger.info("enumerating docs")
|
51 |
for doc_idx, doc in enumerate(docs):
|
52 |
curr_text_chunks = text_parser.split_text(doc.text)
|
53 |
text_chunks.extend(curr_text_chunks)
|
54 |
doc_ids.extend([doc_idx] * len(curr_text_chunks))
|
55 |
|
56 |
-
logger.info("enumerating text_chunks")
|
57 |
for idx, text_chunk in enumerate(text_chunks):
|
58 |
node = TextNode(text=text_chunk)
|
59 |
src_doc = docs[doc_ids[idx]]
|
60 |
node.metadata = src_doc.metadata
|
61 |
nodes.append(node)
|
62 |
|
63 |
-
logger.info("enumerating nodes")
|
64 |
for node in nodes:
|
65 |
node_embedding = embed_model.get_text_embedding(
|
66 |
node.get_content(metadata_mode=MetadataMode.ALL)
|
67 |
)
|
68 |
node.embedding = node_embedding
|
69 |
|
70 |
-
logger.info("initializing the storage context")
|
71 |
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
72 |
-
logger.info("indexing the nodes in VectorStoreIndex")
|
73 |
index = VectorStoreIndex(
|
74 |
nodes=nodes,
|
75 |
storage_context=storage_context,
|
76 |
transformations=Settings.transformations,
|
77 |
)
|
78 |
|
79 |
-
logger.info("initializing the VectorIndexRetriever with top_k as 5")
|
80 |
vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
|
81 |
response_synthesizer = get_response_synthesizer()
|
82 |
-
logger.info("creating the RetrieverQueryEngine instance")
|
83 |
vector_query_engine = RetrieverQueryEngine(
|
84 |
retriever=vector_retriever,
|
85 |
response_synthesizer=response_synthesizer,
|
86 |
)
|
87 |
-
logger.info("creating the HyDEQueryTransform instance")
|
88 |
hyde = HyDEQueryTransform(include_original=True)
|
89 |
self.hyde_query_engine = TransformQueryEngine(vector_query_engine, hyde)
|
90 |
|
@@ -92,9 +93,9 @@ class ChatPDF:
|
|
92 |
if not self.hyde_query_engine:
|
93 |
return "Please, add a PDF document first."
|
94 |
|
95 |
-
logger.info("retrieving the response to the query")
|
96 |
response = self.hyde_query_engine.query(str_or_query_bundle=query)
|
97 |
-
|
98 |
return response
|
99 |
|
100 |
def clear(self):
|
|
|
26 |
doc_ids = []
|
27 |
nodes = []
|
28 |
hyde_query_engine = None
|
29 |
+
logger = None
|
30 |
|
31 |
def __init__(self):
|
32 |
logging.basicConfig(level=logging.INFO)
|
33 |
+
self.logger = logging.getLogger(__name__)
|
34 |
|
35 |
text_parser = SentenceSplitter(chunk_size=512, chunk_overlap=100)
|
36 |
|
37 |
+
self.logger.info("initializing the vector store related objects")
|
38 |
client = QdrantClient(url=QDRANT_API_URL, api_key=QDRANT_API_KEY)
|
39 |
vector_store = QdrantVectorStore(client=client, collection_name="rag_documents")
|
40 |
|
41 |
+
self.logger.info("initializing the OllamaEmbedding")
|
42 |
embed_model = OllamaEmbedding(model_name='mxbai-embed-large')
|
43 |
+
self.logger.info("initializing the global settings")
|
44 |
Settings.embed_model = embed_model
|
45 |
Settings.llm = Ollama(model="qwen:1.8b", request_timeout=1000000)
|
46 |
Settings.transformations = [text_parser]
|
|
|
48 |
def ingest(self, dir_path: str):
|
49 |
docs = SimpleDirectoryReader(input_dir=dir_path).load_data()
|
50 |
|
51 |
+
self.logger.info("enumerating docs")
|
52 |
for doc_idx, doc in enumerate(docs):
|
53 |
curr_text_chunks = text_parser.split_text(doc.text)
|
54 |
text_chunks.extend(curr_text_chunks)
|
55 |
doc_ids.extend([doc_idx] * len(curr_text_chunks))
|
56 |
|
57 |
+
self.logger.info("enumerating text_chunks")
|
58 |
for idx, text_chunk in enumerate(text_chunks):
|
59 |
node = TextNode(text=text_chunk)
|
60 |
src_doc = docs[doc_ids[idx]]
|
61 |
node.metadata = src_doc.metadata
|
62 |
nodes.append(node)
|
63 |
|
64 |
+
self.logger.info("enumerating nodes")
|
65 |
for node in nodes:
|
66 |
node_embedding = embed_model.get_text_embedding(
|
67 |
node.get_content(metadata_mode=MetadataMode.ALL)
|
68 |
)
|
69 |
node.embedding = node_embedding
|
70 |
|
71 |
+
self.logger.info("initializing the storage context")
|
72 |
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
73 |
+
self.logger.info("indexing the nodes in VectorStoreIndex")
|
74 |
index = VectorStoreIndex(
|
75 |
nodes=nodes,
|
76 |
storage_context=storage_context,
|
77 |
transformations=Settings.transformations,
|
78 |
)
|
79 |
|
80 |
+
self.logger.info("initializing the VectorIndexRetriever with top_k as 5")
|
81 |
vector_retriever = VectorIndexRetriever(index=index, similarity_top_k=5)
|
82 |
response_synthesizer = get_response_synthesizer()
|
83 |
+
self.logger.info("creating the RetrieverQueryEngine instance")
|
84 |
vector_query_engine = RetrieverQueryEngine(
|
85 |
retriever=vector_retriever,
|
86 |
response_synthesizer=response_synthesizer,
|
87 |
)
|
88 |
+
self.logger.info("creating the HyDEQueryTransform instance")
|
89 |
hyde = HyDEQueryTransform(include_original=True)
|
90 |
self.hyde_query_engine = TransformQueryEngine(vector_query_engine, hyde)
|
91 |
|
|
|
93 |
if not self.hyde_query_engine:
|
94 |
return "Please, add a PDF document first."
|
95 |
|
96 |
+
self.logger.info("retrieving the response to the query")
|
97 |
response = self.hyde_query_engine.query(str_or_query_bundle=query)
|
98 |
+
self.logger.info(response)
|
99 |
return response
|
100 |
|
101 |
def clear(self):
|