AFischer1985 commited on
Commit
a97d3f8
·
verified ·
1 Parent(s): 52823c9

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +12 -98
run.py CHANGED
@@ -2,11 +2,10 @@
2
  # Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
3
  # Author: Andreas Fischer
4
  # Date: October 10th, 2024
5
- # Last update: October 24th, 2024
6
  ##########################################################################################
7
 
8
  import os
9
-
10
  import torch
11
  from transformers import AutoTokenizer, AutoModel # chromaDB
12
  from datetime import datetime, date #add_doc,
@@ -26,7 +25,9 @@ from huggingface_hub import InferenceClient #multimodal_response
26
 
27
  myModel="mistralai/Mixtral-8x7b-instruct-v0.1"
28
  #myModel="princeton-nlp/gemma-2-9b-it-SimPO"
29
- #mod="mistralai/Mixtral-8x7b-instruct-v0.1"
 
 
30
  #tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
31
  #cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
32
  #cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
@@ -82,8 +83,9 @@ def format_prompt0(message, history):
82
  #-------------------------------------------------------------------------
83
 
84
  def format_prompt(message, history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False,
85
- #startOfString="",template0="<start_of_turn>user\n{system}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n",template1="<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n",template2="<end_of_turn>\n"):
86
- startOfString="<s>", template0=" [INST] {system} [/INST] </s>",template1=" [INST] {message} [/INST]",template2=" {response}</s>"):
 
87
  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
88
  prompt = ""
89
  if RAGAddon is not None:
@@ -222,7 +224,7 @@ def add_doc(path, session):
222
  if(len(doc[0])>5):
223
  if(not "cuda" in device):
224
  doc="\n\n".join(doc[0][0:5])
225
- gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing excerpt (first 5 pages on CPU setups)!")
226
  else:
227
  doc="\n\n".join(doc[0])
228
  gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing!")
@@ -254,105 +256,17 @@ def add_doc(path, session):
254
  print(len(x))
255
  if(len(x)==0):
256
  chunkSize=40000
257
- for i in range(round(len(corpus)/chunkSize+0.5)): #0 is first batch, 3 is last (incomplete) batch given 133497 texts
258
  print("embed batch "+str(i)+" of "+str(round(len(corpus)/chunkSize+0.5)))
259
  ids=list(range(i*chunkSize,(i*chunkSize+chunkSize)))
260
  batch=corpus[i*chunkSize:(i*chunkSize+chunkSize)]
261
  textIDs=[str(id) for id in ids[0:len(batch)]]
262
- ids=[str(id+len(x)+1) for id in ids[0:len(batch)]] # id refers to chromadb-unique ID
263
  collection.add(documents=batch, ids=ids,
264
- metadatas=[{"date": str("2024-10-10")} for b in batch]) #"textID":textIDs, "id":ids,
265
  print("finished batch "+str(i)+" of "+str(round(len(corpus)/40000+0.5)))
266
  now = datetime.now()
267
  gr.Info(f"Indexing complete!")
268
- print(now-then) #zu viel GB für sentences (GPU), bzw. 0:00:10.375087 für chunks
269
  return(collection)
270
 
271
-
272
- #--------------------------------------------------------
273
- # Function for response to user queries and pot. addenda
274
- #--------------------------------------------------------
275
-
276
- def multimodal_response(message, history, dropdown, hfToken, request: gr.Request):
277
- print("def multimodal response!")
278
- if(hfToken.startswith("hf_")): # use HF-hub with custom token if token is provided
279
- inferenceClient = InferenceClient(model=myModel, token=hfToken)
280
- else:
281
- inferenceClient = InferenceClient(myModel)
282
- global databases
283
- if request:
284
- session=request.session_hash
285
- else:
286
- session="0"
287
- length=str(len(history))
288
- print(databases)
289
- if(not databases[-1][1]==session):
290
- databases.append((date.today(),session))
291
- #print(databases)
292
- query=message["text"]
293
- if(len(message["files"])>0): # is there at least one file attached?
294
- collection=add_doc(message["files"][0], session)
295
- else: # otherwise, you still want to get the collection with the session-based db
296
- collection=add_doc(message["text"], session)
297
- client = chromadb.PersistentClient(path=dbPath)
298
- print(str(client.list_collections()))
299
- x=collection.get(include=[])["ids"]
300
- ragQuery=[format_prompt(query, history) if len(history)>0 else query]
301
- context=collection.query(query_texts=ragQuery, n_results=3)
302
- context=["<Kontext "+str(i)+"> "+str(c)+"</Kontext "+str(i)+">" for i,c in enumerate(context["documents"][0])]
303
- gr.Info("Kontext:\n"+str(context))
304
- generate_kwargs = dict(
305
- temperature=float(0.9),
306
- max_new_tokens=5000,
307
- top_p=0.95,
308
- repetition_penalty=1.0,
309
- do_sample=True,
310
- seed=42,
311
- )
312
- system="Mit Blick auf das folgende Gespräch und den relevanten Kontext, antworte auf die aktuelle Frage des Nutzers. "+\
313
- "Antworte ausschließlich auf Basis der Informationen im Kontext.\n\nKontext:\n\n"+\
314
- str("\n\n".join(context))
315
- #"Given the following conversation, relevant context, and a follow up question, "+\
316
- #"reply with an answer to the current question the user is asking. "+\
317
- #"Return only your response to the question given the above information "+\
318
- #"following the users instructions as needed.\n\nContext:"+\
319
- print(system)
320
- #formatted_prompt = format_prompt0(system+"\n"+query, history)
321
- formatted_prompt = format_prompt(query, history,system=system)
322
- print(formatted_prompt)
323
- output = ""
324
- try:
325
- stream = inferenceClient.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
326
- for response in stream:
327
- output += response.token.text
328
- yield output
329
- except Exception as e:
330
- output = "Für weitere Antworten von der KI gebe bitte einen gültigen HuggingFace-Token an."
331
- if(len(context)>0):
332
- output += "\nBis dahin helfen dir hoffentlich die folgenden Quellen weiter:"
333
- yield output
334
- print(str(e))
335
- if(len(context)>0):
336
- output=output+"\n\n<br><details open><summary><strong>Quellen</strong></summary><br><ul>"+ "".join(["<li>" + c + "</li>" for c in context])+"</ul></details>"
337
- yield output
338
-
339
- #------------------------------
340
- # Launch Gradio-ChatInterface
341
- #------------------------------
342
-
343
- i=gr.ChatInterface(multimodal_response,
344
- title="Frag dein PDF",
345
- multimodal=True,
346
- additional_inputs=[
347
- gr.Dropdown(
348
- info="Wähle eine Variante",
349
- choices=["1","2","3"],
350
- value="1",
351
- label="Variante"),
352
- gr.Textbox(
353
- value="",
354
- label="HF_token"),
355
- ])
356
- i.launch() #allowed_paths=["."])
357
-
358
-
 
2
  # Title: Gradio Interface to LLM-chatbot with dynamic RAG-funcionality and ChromaDB
3
  # Author: Andreas Fischer
4
  # Date: October 10th, 2024
5
+ # Last update: October 25th, 2024
6
  ##########################################################################################
7
 
8
  import os
 
9
  import torch
10
  from transformers import AutoTokenizer, AutoModel # chromaDB
11
  from datetime import datetime, date #add_doc,
 
25
 
26
  myModel="mistralai/Mixtral-8x7b-instruct-v0.1"
27
  #myModel="princeton-nlp/gemma-2-9b-it-SimPO"
28
+ #myModel="google/gemma-2-2b-it"
29
+ #myModel="meta-llama/Llama-3.1-8B-Instruct"
30
+ #mod=myModel
31
  #tok=AutoTokenizer.from_pretrained(mod) #,token="hf_...")
32
  #cha=[{"role":"system","content":"A"},{"role":"user","content":"B"},{"role":"assistant","content":"C"}]
33
  #cha=[{"role":"user","content":"U1"},{"role":"assistant","content":"A1"},{"role":"user","content":"U2"},{"role":"assistant","content":"A2"}]
 
83
  #-------------------------------------------------------------------------
84
 
85
  def format_prompt(message, history=None, system=None, RAGAddon=None, system2=None, zeichenlimit=None,historylimit=4, removeHTML=False,
86
+ startOfString="<s>", template0=" [INST] {system} [/INST] </s>",template1=" [INST] {message} [/INST]",template2=" {response}</s>"): # mistralai/Mixtral-8x7B-Instruct-v0.1
87
+ #startOfString="<bos>",template0="<start_of_turn>user\n{system}<end_of_turn>\n<start_of_turn>model\n<end_of_turn>\n",template1="<start_of_turn>user\n{message}<end_of_turn>\n<start_of_turn>model\n",template2="<end_of_turn>\n"): # google/gemma-2-2b-it
88
+ #startOfString="", template0="<|start_header_id|>system<|end_header_id|>\n\n{system}\n<|eot_id|>", template1="<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>", template2="<|start_header_id|>assistant<|end_header_id|>\n\n{response}</eot_id>"): # meta-llama/Llama-3.1-8B-Instruct?
89
  if zeichenlimit is None: zeichenlimit=1000000000 # :-)
90
  prompt = ""
91
  if RAGAddon is not None:
 
224
  if(len(doc[0])>5):
225
  if(not "cuda" in device):
226
  doc="\n\n".join(doc[0][0:5])
227
+ gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing excerpt (demo-mode: first 5 pages on CPU setups)!")
228
  else:
229
  doc="\n\n".join(doc[0])
230
  gr.Info("PDF uploaded to DB_"+str(session)+", start Indexing!")
 
256
  print(len(x))
257
  if(len(x)==0):
258
  chunkSize=40000
259
+ for i in range(round(len(corpus)/chunkSize+0.5)):
260
  print("embed batch "+str(i)+" of "+str(round(len(corpus)/chunkSize+0.5)))
261
  ids=list(range(i*chunkSize,(i*chunkSize+chunkSize)))
262
  batch=corpus[i*chunkSize:(i*chunkSize+chunkSize)]
263
  textIDs=[str(id) for id in ids[0:len(batch)]]
264
+ ids=[str(id+len(x)+1) for id in ids[0:len(batch)]]
265
  collection.add(documents=batch, ids=ids,
266
+ metadatas=[{"date": str("2024-10-10")} for b in batch])
267
  print("finished batch "+str(i)+" of "+str(round(len(corpus)/40000+0.5)))
268
  now = datetime.now()
269
  gr.Info(f"Indexing complete!")
270
+ print(now-then)
271
  return(collection)
272