Spaces:
Sleeping
Sleeping
AFischer1985
commited on
Update run.py
Browse files
run.py
CHANGED
@@ -2,11 +2,10 @@
|
|
2 |
# Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: October 10th, 2024
|
5 |
-
# Last update: October
|
6 |
##########################################################################################
|
7 |
|
8 |
import os
|
9 |
-
|
10 |
import torch
|
11 |
from transformers import AutoTokenizer, AutoModel # chromaDB
|
12 |
from datetime import datetime, date #add_doc,
|
@@ -26,7 +25,9 @@ from huggingface_hub import InferenceClient #multimodal_response
|
|
26 |
|
27 |
myModel="mistralai/Mixtral-8x7b-instruct-v0.1"
|
28 |
#myModel="princeton-nlp/gemma-2-9b-it-SimPO"
|
29 |
-
#
|
|
|
|
|
30 |
#tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
|
31 |
#cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
|
32 |
#cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
|
@@ -82,8 +83,9 @@ def format_prompt0(message, history):
|
|
82 |
#-------------------------------------------------------------------------
|
83 |
|
84 |
def format_prompt(message, history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False,
|
85 |
-
|
86 |
-
startOfString="<
|
|
|
87 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
88 |
prompt = ""
|
89 |
if RAGAddon is not None:
|
@@ -222,7 +224,7 @@ def add_doc(path, session):
|
|
222 |
if(len(doc[0])>5):
|
223 |
if(not "cuda" in device):
|
224 |
doc="\n\n".join(doc[0][0:5])
|
225 |
-
gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing excerpt (first 5 pages on CPU setups)!")
|
226 |
else:
|
227 |
doc="\n\n".join(doc[0])
|
228 |
gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing!")
|
@@ -254,105 +256,17 @@ def add_doc(path, session):
|
|
254 |
print(len(x))
|
255 |
if(len(x)==0):
|
256 |
chunkSize=40000
|
257 |
-
for i in range(round(len(corpus)/chunkSize+0.5)):
|
258 |
print("embed batch "+str(i)+" of "+str(round(len(corpus)/chunkSize+0.5)))
|
259 |
ids=list(range(i*chunkSize,(i*chunkSize+chunkSize)))
|
260 |
batch=corpus[i*chunkSize:(i*chunkSize+chunkSize)]
|
261 |
textIDs=[str(id) for id in ids[0:len(batch)]]
|
262 |
-
ids=[str(id+len(x)+1) for id in ids[0:len(batch)]]
|
263 |
collection.add(documents=batch, ids=ids,
|
264 |
-
metadatas=[{"date": str("2024-10-10")} for b in batch])
|
265 |
print("finished batch "+str(i)+" of "+str(round(len(corpus)/40000+0.5)))
|
266 |
now = datetime.now()
|
267 |
gr.Info(f"Indexing complete!")
|
268 |
-
print(now-then)
|
269 |
return(collection)
|
270 |
|
271 |
-
|
272 |
-
#--------------------------------------------------------
|
273 |
-
# Function for response to user queries and pot. addenda
|
274 |
-
#--------------------------------------------------------
|
275 |
-
|
276 |
-
def multimodal_response(message, history, dropdown, hfToken, request: gr.Request):
|
277 |
-
print("def multimodal response!")
|
278 |
-
if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided
|
279 |
-
inferenceClient = InferenceClient(model=myModel, token=hfToken)
|
280 |
-
else:
|
281 |
-
inferenceClient = InferenceClient(myModel)
|
282 |
-
global databases
|
283 |
-
if request:
|
284 |
-
session=request.session_hash
|
285 |
-
else:
|
286 |
-
session="0"
|
287 |
-
length=str(len(history))
|
288 |
-
print(databases)
|
289 |
-
if(not databases[-1][1]==session):
|
290 |
-
databases.append((date.today(),session))
|
291 |
-
#print(databases)
|
292 |
-
query=message["text"]
|
293 |
-
if(len(message["files"])>0): # is there at least one file attached?
|
294 |
-
collection=add_doc(message["files"][0], session)
|
295 |
-
else: # otherwise, you still want to get the collection with the session-based db
|
296 |
-
collection=add_doc(message["text"], session)
|
297 |
-
client = chromadb.PersistentClient(path=dbPath)
|
298 |
-
print(str(client.list_collections()))
|
299 |
-
x=collection.get(include=[])["ids"]
|
300 |
-
ragQuery=[format_prompt(query, history) if len(history)>0 else query]
|
301 |
-
context=collection.query(query_texts=ragQuery, n_results=3)
|
302 |
-
context=["<Kontext "+str(i)+"> "+str(c)+"</Kontext "+str(i)+">" for i,c in enumerate(context["documents"][0])]
|
303 |
-
gr.Info("Kontext:\n"+str(context))
|
304 |
-
generate_kwargs = dict(
|
305 |
-
temperature=float(0.9),
|
306 |
-
max_new_tokens=5000,
|
307 |
-
top_p=0.95,
|
308 |
-
repetition_penalty=1.0,
|
309 |
-
do_sample=True,
|
310 |
-
seed=42,
|
311 |
-
)
|
312 |
-
system="Mit Blick auf das folgende Gespräch und den relevanten Kontext, antworte auf die aktuelle Frage des Nutzers. "+\
|
313 |
-
"Antworte ausschließlich auf Basis der Informationen im Kontext.\n\nKontext:\n\n"+\
|
314 |
-
str("\n\n".join(context))
|
315 |
-
#"Given the following conversation, relevant context, and a follow up question, "+\
|
316 |
-
#"reply with an answer to the current question the user is asking. "+\
|
317 |
-
#"Return only your response to the question given the above information "+\
|
318 |
-
#"following the users instructions as needed.\n\nContext:"+\
|
319 |
-
print(system)
|
320 |
-
#formatted_prompt = format_prompt0(system+"\n"+query, history)
|
321 |
-
formatted_prompt = format_prompt(query, history,system=system)
|
322 |
-
print(formatted_prompt)
|
323 |
-
output = ""
|
324 |
-
try:
|
325 |
-
stream = inferenceClient.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
326 |
-
for response in stream:
|
327 |
-
output += response.token.text
|
328 |
-
yield output
|
329 |
-
except Exception as e:
|
330 |
-
output = "Für weitere Antworten von der KI gebe bitte einen gültigen HuggingFace-Token an."
|
331 |
-
if(len(context)>0):
|
332 |
-
output += "\nBis dahin helfen dir hoffentlich die folgenden Quellen weiter:"
|
333 |
-
yield output
|
334 |
-
print(str(e))
|
335 |
-
if(len(context)>0):
|
336 |
-
output=output+"\n\n<br><details open><summary><strong>Quellen</strong></summary><br><ul>"+ "".join(["<li>" + c + "</li>" for c in context])+"</ul></details>"
|
337 |
-
yield output
|
338 |
-
|
339 |
-
#------------------------------
|
340 |
-
# Launch Gradio-ChatInterface
|
341 |
-
#------------------------------
|
342 |
-
|
343 |
-
i=gr.ChatInterface(multimodal_response,
|
344 |
-
title="Frag dein PDF",
|
345 |
-
multimodal=True,
|
346 |
-
additional_inputs=[
|
347 |
-
gr.Dropdown(
|
348 |
-
info="Wähle eine Variante",
|
349 |
-
choices=["1","2","3"],
|
350 |
-
value="1",
|
351 |
-
label="Variante"),
|
352 |
-
gr.Textbox(
|
353 |
-
value="",
|
354 |
-
label="HF_token"),
|
355 |
-
])
|
356 |
-
i.launch() #allowed_paths=["."])
|
357 |
-
|
358 |
-
|
|
|
2 |
# Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
|
3 |
# Author: Andreas Fischer
|
4 |
# Date: October 10th, 2024
|
5 |
+
# Last update: October 25th, 2024
|
6 |
##########################################################################################
|
7 |
|
8 |
import os
|
|
|
9 |
import torch
|
10 |
from transformers import AutoTokenizer, AutoModel # chromaDB
|
11 |
from datetime import datetime, date #add_doc,
|
|
|
25 |
|
26 |
myModel="mistralai/Mixtral-8x7b-instruct-v0.1"
|
27 |
#myModel="princeton-nlp/gemma-2-9b-it-SimPO"
|
28 |
+
#myModel="google/gemma-2-2b-it"
|
29 |
+
#myModel="meta-llama/Llama-3.1-8B-Instruct"
|
30 |
+
#mod=myModel
|
31 |
#tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
|
32 |
#cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
|
33 |
#cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
|
|
|
83 |
#-------------------------------------------------------------------------
|
84 |
|
85 |
def format_prompt(message, history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False,
|
86 |
+
startOfString="<s>", template0=" [INST] {system} [/INST] </s>",template1=" [INST] {message} [/INST]",template2=" {response}</s>"): # mistralai/Mixtral-8x7B-Instruct-v0.1
|
87 |
+
#startOfString="<bos>",template0="<start_of_turn>user\n{system}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n",template1="<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n",template2="<end_of_turn>\n"): # google/gemma-2-2b-it
|
88 |
+
#startOfString="", template0="<|start_header_id|>system<|end_header_id|>\n\n{system}\n<|eot_id|>", template1="<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>", template2="<|start_header_id|>assistant<|end_header_id|>\n\n{response}</eot_id>"): # meta-llama/Llama-3.1-8B-Instruct?
|
89 |
if zeichenlimit is None: zeichenlimit=1000000000 # :-)
|
90 |
prompt = ""
|
91 |
if RAGAddon is not None:
|
|
|
224 |
if(len(doc[0])>5):
|
225 |
if(not "cuda" in device):
|
226 |
doc="\n\n".join(doc[0][0:5])
|
227 |
+
gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing excerpt (demo-mode: first 5 pages on CPU setups)!")
|
228 |
else:
|
229 |
doc="\n\n".join(doc[0])
|
230 |
gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing!")
|
|
|
256 |
print(len(x))
|
257 |
if(len(x)==0):
|
258 |
chunkSize=40000
|
259 |
+
for i in range(round(len(corpus)/chunkSize+0.5)):
|
260 |
print("embed batch "+str(i)+" of "+str(round(len(corpus)/chunkSize+0.5)))
|
261 |
ids=list(range(i*chunkSize,(i*chunkSize+chunkSize)))
|
262 |
batch=corpus[i*chunkSize:(i*chunkSize+chunkSize)]
|
263 |
textIDs=[str(id) for id in ids[0:len(batch)]]
|
264 |
+
ids=[str(id+len(x)+1) for id in ids[0:len(batch)]]
|
265 |
collection.add(documents=batch, ids=ids,
|
266 |
+
metadatas=[{"date": str("2024-10-10")} for b in batch])
|
267 |
print("finished batch "+str(i)+" of "+str(round(len(corpus)/40000+0.5)))
|
268 |
now = datetime.now()
|
269 |
gr.Info(f"Indexing complete!")
|
270 |
+
print(now-then)
|
271 |
return(collection)
|
272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|