Spaces:
Runtime error
Runtime error
app update post reader and utils
Browse filesonce reader, retriever and utils methods are moved to respective scripts app is slimmed down
app.py
CHANGED
@@ -3,50 +3,37 @@ import pandas as pd
|
|
3 |
import logging
|
4 |
import asyncio
|
5 |
import os
|
6 |
-
import re
|
7 |
-
import json
|
8 |
from uuid import uuid4
|
9 |
from datetime import datetime
|
10 |
from pathlib import Path
|
11 |
from huggingface_hub import CommitScheduler
|
12 |
from auditqa.sample_questions import QUESTIONS
|
13 |
from auditqa.reports import files, report_list
|
14 |
-
from langchain.schema import (
|
15 |
-
HumanMessage,
|
16 |
-
SystemMessage,
|
17 |
-
)
|
18 |
-
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
19 |
-
from langchain_community.llms import HuggingFaceEndpoint
|
20 |
from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
|
21 |
-
from
|
22 |
-
from
|
23 |
-
from
|
24 |
-
|
25 |
-
from qdrant_client.http import models as rest
|
26 |
from dotenv import load_dotenv
|
27 |
load_dotenv()
|
28 |
-
|
29 |
-
#
|
30 |
-
HF_token = os.environ["LLAMA_3_1"]
|
31 |
SPACES_LOG = os.environ["SPACES_LOG"]
|
|
|
|
|
32 |
# create the local logs repo
|
33 |
JSON_DATASET_DIR = Path("json_dataset")
|
34 |
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
35 |
JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
|
36 |
|
37 |
-
# the logs are written to dataset repo
|
38 |
# https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
|
39 |
scheduler = CommitScheduler(
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
)
|
46 |
-
|
47 |
-
model_config = getconfig("model_params.cfg")
|
48 |
-
|
49 |
-
|
50 |
|
51 |
#### VECTOR STORE ####
|
52 |
# reports contain the already created chunks from Markdown version of pdf reports
|
@@ -54,68 +41,11 @@ model_config = getconfig("model_params.cfg")
|
|
54 |
# We need to create the local vectorstore collection once using load_chunks
|
55 |
# vectorestore colection are stored on persistent storage so this needs to be run only once
|
56 |
# hence, comment out line below when creating for first time
|
57 |
-
#
|
58 |
# once the vectore embeddings are created we will use qdrant client to access these
|
59 |
vectorstores = get_local_qdrant()
|
60 |
|
61 |
|
62 |
-
|
63 |
-
#### FUNCTIONS ####
|
64 |
-
# App UI and and its functionality is inspired and adapted from
|
65 |
-
# https://huggingface.co/spaces/Ekimetrics/climate-question-answering
|
66 |
-
|
67 |
-
|
68 |
-
def save_logs(logs) -> None:
|
69 |
-
""" Every interaction with app saves the log of question and answer,
|
70 |
-
this is to get the usage statistics of app and evaluate model performances
|
71 |
-
"""
|
72 |
-
with scheduler.lock:
|
73 |
-
with JSON_DATASET_PATH.open("a") as f:
|
74 |
-
json.dump(logs, f)
|
75 |
-
f.write("\n")
|
76 |
-
logging.info("logging done")
|
77 |
-
|
78 |
-
|
79 |
-
def make_html_source(source,i):
|
80 |
-
"""
|
81 |
-
takes the text and converts it into html format for display in "source" side tab
|
82 |
-
"""
|
83 |
-
meta = source.metadata
|
84 |
-
content = source.page_content.strip()
|
85 |
-
|
86 |
-
name = meta['filename']
|
87 |
-
card = f"""
|
88 |
-
<div class="card" id="doc{i}">
|
89 |
-
<div class="card-content">
|
90 |
-
<h2>Doc {i} - {meta['filename']} - Page {int(meta['page'])}</h2>
|
91 |
-
<p>{content}</p>
|
92 |
-
</div>
|
93 |
-
<div class="card-footer">
|
94 |
-
<span>{name}</span>
|
95 |
-
<a href="{meta['filename']}#page={int(meta['page'])}" target="_blank" class="pdf-link">
|
96 |
-
<span role="img" aria-label="Open PDF">🔗</span>
|
97 |
-
</a>
|
98 |
-
</div>
|
99 |
-
</div>
|
100 |
-
"""
|
101 |
-
|
102 |
-
return card
|
103 |
-
|
104 |
-
def parse_output_llm_with_sources(output):
|
105 |
-
# Split the content into a list of text and "[Doc X]" references
|
106 |
-
content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output)
|
107 |
-
parts = []
|
108 |
-
for part in content_parts:
|
109 |
-
if part.startswith("Doc"):
|
110 |
-
subparts = part.split(",")
|
111 |
-
subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts]
|
112 |
-
subparts = [f"""<a href="#doc{subpart}" class="a-doc-ref" target="_self"><span class='doc-ref'><sup>{subpart}</sup></span></a>""" for subpart in subparts]
|
113 |
-
parts.append("".join(subparts))
|
114 |
-
else:
|
115 |
-
parts.append(part)
|
116 |
-
content_parts = "".join(parts)
|
117 |
-
return content_parts
|
118 |
-
|
119 |
def start_chat(query,history):
|
120 |
history = history + [(query,None)]
|
121 |
history = [tuple(x) for x in history]
|
@@ -141,64 +71,18 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
141 |
|
142 |
##------------------------fetch collection from vectorstore------------------------------
|
143 |
vectorstore = vectorstores["allreports"]
|
144 |
-
##---------------------construct filter for metdata filtering---------------------------
|
145 |
-
if len(reports) == 0:
|
146 |
-
("defining filter for:{}:{}:{}".format(sources,subtype,year))
|
147 |
-
filter=rest.Filter(
|
148 |
-
must=[rest.FieldCondition(
|
149 |
-
key="metadata.source",
|
150 |
-
match=rest.MatchValue(value=sources)
|
151 |
-
),
|
152 |
-
rest.FieldCondition(
|
153 |
-
key="metadata.subtype",
|
154 |
-
match=rest.MatchValue(value=subtype)
|
155 |
-
),
|
156 |
-
rest.FieldCondition(
|
157 |
-
key="metadata.year",
|
158 |
-
match=rest.MatchAny(any=year)
|
159 |
-
),])
|
160 |
-
else:
|
161 |
-
print("defining filter for allreports:",reports)
|
162 |
-
filter=rest.Filter(
|
163 |
-
must=[
|
164 |
-
rest.FieldCondition(
|
165 |
-
key="metadata.filename",
|
166 |
-
match=rest.MatchAny(any=reports)
|
167 |
-
)])
|
168 |
-
|
169 |
|
170 |
##------------------------------get context----------------------------------------------
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
for
|
175 |
-
# similarity score threshold can be used to make adjustments in quality and quantity for Retriever
|
176 |
-
# However need to make balancing, as retrieved results are again used by Ranker to fetch best among
|
177 |
-
# retreived results
|
178 |
-
retriever = vectorstore.as_retriever(
|
179 |
-
search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6,
|
180 |
-
"k": int(model_config.get('retriever','TOP_K')),
|
181 |
-
"filter":filter})
|
182 |
-
model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")
|
183 |
-
compressor = CrossEncoderReranker(model=model, top_n=3)
|
184 |
-
compression_retriever = ContextualCompressionRetriever(
|
185 |
-
base_compressor=compressor, base_retriever=retriever
|
186 |
-
)
|
187 |
-
context_retrieved = compression_retriever.invoke(question)
|
188 |
-
logging.info(len(context_retrieved))
|
189 |
-
for doc in context_retrieved:
|
190 |
-
logging.info(doc.metadata)
|
191 |
|
192 |
-
def format_docs(docs):
|
193 |
-
return "\n\n".join(doc.page_content for doc in docs)
|
194 |
-
|
195 |
-
context_retrieved_formatted = format_docs(context_retrieved)
|
196 |
-
context_retrieved_lst.append(context_retrieved_formatted)
|
197 |
-
|
198 |
##------------------- -------------Prompt--------------------------------------------------
|
199 |
SYSTEM_PROMPT = """
|
200 |
You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
|
201 |
Guidelines:
|
|
|
202 |
- If the passages have useful facts or numbers, use them in your answer.
|
203 |
- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
|
204 |
- Do not use the sentence 'Doc i says ...' to say where information came from.
|
@@ -215,87 +99,59 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
215 |
Question: {question} - Explained to audit expert
|
216 |
Answer in english with the passages citations:
|
217 |
""".format(context = context_retrieved_lst, question=query)
|
|
|
|
|
|
|
218 |
|
219 |
-
|
220 |
-
SystemMessage(content=SYSTEM_PROMPT),
|
221 |
-
HumanMessage(
|
222 |
-
content=USER_PROMPT
|
223 |
-
),]
|
224 |
-
|
225 |
-
##-----------------------getting inference endpoints------------------------------
|
226 |
-
|
227 |
-
# Set up the streaming callback handler
|
228 |
-
callback = StreamingStdOutCallbackHandler()
|
229 |
-
|
230 |
-
# Initialize the HuggingFaceEndpoint with streaming enabled
|
231 |
-
llm_qa = HuggingFaceEndpoint(
|
232 |
-
endpoint_url=model_config.get('reader', 'ENDPOINT'),
|
233 |
-
max_new_tokens=512,
|
234 |
-
repetition_penalty=1.03,
|
235 |
-
timeout=70,
|
236 |
-
huggingfacehub_api_token=HF_token,
|
237 |
-
streaming=True, # Enable streaming for real-time token generation
|
238 |
-
callbacks=[callback] # Add the streaming callback handler
|
239 |
-
)
|
240 |
-
|
241 |
-
# Create a ChatHuggingFace instance with the streaming-enabled endpoint
|
242 |
-
chat_model = ChatHuggingFace(llm=llm_qa)
|
243 |
-
|
244 |
-
# Prepare the HTML for displaying source documents
|
245 |
docs_html = []
|
246 |
for i, d in enumerate(context_retrieved, 1):
|
247 |
docs_html.append(make_html_source(d, i))
|
248 |
docs_html = "".join(docs_html)
|
249 |
|
250 |
-
|
251 |
answer_yet = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
-
|
254 |
-
|
|
|
255 |
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
# #callbacks = [StreamingStdOutCallbackHandler()]
|
270 |
-
# llm_qa = HuggingFaceEndpoint(
|
271 |
-
# endpoint_url= model_config.get('reader','ENDPOINT'),
|
272 |
-
# max_new_tokens=512,
|
273 |
-
# repetition_penalty=1.03,
|
274 |
-
# timeout=70,
|
275 |
-
# huggingfacehub_api_token=HF_token,)
|
276 |
-
|
277 |
-
# # create RAG
|
278 |
-
# chat_model = ChatHuggingFace(llm=llm_qa)
|
279 |
-
|
280 |
-
# ##-------------------------- get answers ---------------------------------------
|
281 |
-
# answer_lst = []
|
282 |
-
# for question, context in zip(question_lst , context_retrieved_lst):
|
283 |
-
# answer = chat_model.invoke(messages)
|
284 |
-
# answer_lst.append(answer.content)
|
285 |
-
# docs_html = []
|
286 |
-
# for i, d in enumerate(context_retrieved, 1):
|
287 |
-
# docs_html.append(make_html_source(d, i))
|
288 |
-
# docs_html = "".join(docs_html)
|
289 |
-
|
290 |
-
# previous_answer = history[-1][1]
|
291 |
-
# previous_answer = previous_answer if previous_answer is not None else ""
|
292 |
-
# answer_yet = previous_answer + answer_lst[0]
|
293 |
-
# answer_yet = parse_output_llm_with_sources(answer_yet)
|
294 |
-
# history[-1] = (query,answer_yet)
|
295 |
-
|
296 |
-
# history = [tuple(x) for x in history]
|
297 |
-
|
298 |
-
# yield history,docs_html
|
299 |
|
300 |
# logging the event
|
301 |
try:
|
@@ -309,7 +165,8 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
309 |
"question":query,
|
310 |
"sources":sources,
|
311 |
"retriever":model_config.get('retriever','MODEL'),
|
312 |
-
"
|
|
|
313 |
"docs":[doc.page_content for doc in context_retrieved],
|
314 |
"answer": history[-1][1],
|
315 |
"time": timestamp,
|
|
|
3 |
import logging
|
4 |
import asyncio
|
5 |
import os
|
|
|
|
|
6 |
from uuid import uuid4
|
7 |
from datetime import datetime
|
8 |
from pathlib import Path
|
9 |
from huggingface_hub import CommitScheduler
|
10 |
from auditqa.sample_questions import QUESTIONS
|
11 |
from auditqa.reports import files, report_list
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
from auditqa.process_chunks import load_chunks, getconfig, get_local_qdrant
|
13 |
+
from auditqa.retriever import get_context
|
14 |
+
from auditqa.reader import nvidia_client, dedicated_endpoint
|
15 |
+
from auditqa.utils import make_html_source, parse_output_llm_with_sources, save_logs, get_message_template
|
16 |
+
|
|
|
17 |
from dotenv import load_dotenv
|
18 |
load_dotenv()
|
19 |
+
|
20 |
+
# fetch tokens and model config params
|
|
|
21 |
SPACES_LOG = os.environ["SPACES_LOG"]
|
22 |
+
model_config = getconfig("model_params.cfg")
|
23 |
+
|
24 |
# create the local logs repo
|
25 |
JSON_DATASET_DIR = Path("json_dataset")
|
26 |
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
|
27 |
JSON_DATASET_PATH = JSON_DATASET_DIR / f"logs-{uuid4()}.json"
|
28 |
|
29 |
+
# the logs are written to dataset repo periodically from local logs
|
30 |
# https://huggingface.co/spaces/Wauplin/space_to_dataset_saver
|
31 |
scheduler = CommitScheduler(
|
32 |
+
repo_id="GIZ/spaces_logs",
|
33 |
+
repo_type="dataset",
|
34 |
+
folder_path=JSON_DATASET_DIR,
|
35 |
+
path_in_repo="audit_chatbot",
|
36 |
+
token=SPACES_LOG )
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
#### VECTOR STORE ####
|
39 |
# reports contain the already created chunks from Markdown version of pdf reports
|
|
|
41 |
# We need to create the local vectorstore collection once using load_chunks
|
42 |
# vectorestore colection are stored on persistent storage so this needs to be run only once
|
43 |
# hence, comment out line below when creating for first time
|
44 |
+
#vectorstores = load_chunks()
|
45 |
# once the vectore embeddings are created we will use qdrant client to access these
|
46 |
vectorstores = get_local_qdrant()
|
47 |
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
def start_chat(query,history):
|
50 |
history = history + [(query,None)]
|
51 |
history = [tuple(x) for x in history]
|
|
|
71 |
|
72 |
##------------------------fetch collection from vectorstore------------------------------
|
73 |
vectorstore = vectorstores["allreports"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
##------------------------------get context----------------------------------------------
|
76 |
+
context_retrieved = get_context(vectorstore=vectorstore,query=query,reports=reports,
|
77 |
+
sources=sources,subtype=subtype,year=year)
|
78 |
+
context_retrieved_formatted = "||".join(doc.page_content for doc in context_retrieved)
|
79 |
+
context_retrieved_lst = [doc.page_content for doc in context_retrieved]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
##------------------- -------------Prompt--------------------------------------------------
|
82 |
SYSTEM_PROMPT = """
|
83 |
You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
|
84 |
Guidelines:
|
85 |
+
- Passeges are provided as comma separated list of strings
|
86 |
- If the passages have useful facts or numbers, use them in your answer.
|
87 |
- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
|
88 |
- Do not use the sentence 'Doc i says ...' to say where information came from.
|
|
|
99 |
Question: {question} - Explained to audit expert
|
100 |
Answer in english with the passages citations:
|
101 |
""".format(context = context_retrieved_lst, question=query)
|
102 |
+
|
103 |
+
##-------------------- apply message template ------------------------------
|
104 |
+
messages = get_message_template(model_config.get('reader','TYPE'),SYSTEM_PROMPT,USER_PROMPT)
|
105 |
|
106 |
+
## -----------------Prepare HTML for displaying source documents --------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
docs_html = []
|
108 |
for i, d in enumerate(context_retrieved, 1):
|
109 |
docs_html.append(make_html_source(d, i))
|
110 |
docs_html = "".join(docs_html)
|
111 |
|
112 |
+
##-----------------------get answer from endpoints------------------------------
|
113 |
answer_yet = ""
|
114 |
+
if model_config.get('reader','TYPE') == 'NVIDIA':
|
115 |
+
chat_model = nvidia_client()
|
116 |
+
async def process_stream():
|
117 |
+
nonlocal answer_yet
|
118 |
+
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
|
119 |
+
#nonlocal answer_yet # Use the outer scope's answer_yet variable
|
120 |
+
# Iterate over the streaming response chunks
|
121 |
+
response = chat_model.chat_completion(
|
122 |
+
model=model_config.get("reader","NVIDIA_MODEL"),
|
123 |
+
messages=messages,
|
124 |
+
stream=True,
|
125 |
+
max_tokens=int(model_config.get('reader','MAX_TOKENS')),
|
126 |
+
)
|
127 |
+
for message in response:
|
128 |
+
token = message.choices[0].delta.content
|
129 |
+
if token:
|
130 |
+
answer_yet += token
|
131 |
+
parsed_answer = parse_output_llm_with_sources(answer_yet)
|
132 |
+
history[-1] = (query, parsed_answer)
|
133 |
+
yield [tuple(x) for x in history], docs_html
|
134 |
+
|
135 |
+
# Stream the response updates
|
136 |
+
async for update in process_stream():
|
137 |
+
yield update
|
138 |
|
139 |
+
else:
|
140 |
+
chat_model = dedicated_endpoint()
|
141 |
+
async def process_stream():
|
142 |
# Without nonlocal, Python would create a new local variable answer_yet inside process_stream(), instead of modifying the one from the outer scope.
|
143 |
+
nonlocal answer_yet # Use the outer scope's answer_yet variable
|
144 |
+
# Iterate over the streaming response chunks
|
145 |
+
async for chunk in chat_model.astream(messages):
|
146 |
+
token = chunk.content
|
147 |
+
answer_yet += token
|
148 |
+
parsed_answer = parse_output_llm_with_sources(answer_yet)
|
149 |
+
history[-1] = (query, parsed_answer)
|
150 |
+
yield [tuple(x) for x in history], docs_html
|
151 |
+
|
152 |
+
# Stream the response updates
|
153 |
+
async for update in process_stream():
|
154 |
+
yield update
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
# logging the event
|
157 |
try:
|
|
|
165 |
"question":query,
|
166 |
"sources":sources,
|
167 |
"retriever":model_config.get('retriever','MODEL'),
|
168 |
+
"endpoint_type":model_config.get('reader','TYPE')
|
169 |
+
"raeder":model_config.get('reader','NVIDIA_MODEL'),
|
170 |
"docs":[doc.page_content for doc in context_retrieved],
|
171 |
"answer": history[-1][1],
|
172 |
"time": timestamp,
|