Update app.py
Browse files
app.py
CHANGED
@@ -329,6 +329,28 @@ def document_retrieval_chroma(llm, prompt):
|
|
329 |
#ChromaDb um die embedings zu speichern
|
330 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
331 |
return db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
###########################################
|
333 |
#dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
|
334 |
def document_retrieval_mongodb(llm, prompt):
|
@@ -368,7 +390,6 @@ def rag_chain2(prompt, db, k=3):
|
|
368 |
neu_prompt = rag_template
|
369 |
for i, chunk in enumerate(retrieved_chunks):
|
370 |
neu_prompt += f"{i+1}. {chunk}\n"
|
371 |
-
print("neu_prompt:.................")
|
372 |
print(neu_prompt)
|
373 |
return neu_prompt
|
374 |
|
@@ -492,14 +513,12 @@ def generate_text_zu_bild(file, prompt, k, rag_option):
|
|
492 |
document_storage_chroma(splits)
|
493 |
db = document_retrieval_chroma()
|
494 |
#mit RAG:
|
495 |
-
print("hier!!!!!!!!!!!!!!!!!!!!")
|
496 |
neu_text_mit_chunks = rag_chain2(prompt, db, k)
|
497 |
#für Chat LLM:
|
498 |
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
499 |
#als reiner prompt:
|
500 |
prompt_neu = generate_prompt_with_history(neu_text_mit_chunks, history)
|
501 |
-
|
502 |
-
print(prompt_neu)
|
503 |
headers, payload = process_image(file, prompt_neu)
|
504 |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
505 |
#als json ausgeben
|
|
|
329 |
#ChromaDb um die embedings zu speichern
|
330 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
331 |
return db
|
332 |
+
|
333 |
+
############################################
|
334 |
+
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
335 |
+
#zweite Variante, passend zu rag_chain2 für generate_text_mit_bild- ohne llm vorher festlegen zu müssen
|
336 |
+
def document_retrieval_chroma2():
|
337 |
+
#OpenAI embeddings -------------------------------
|
338 |
+
embeddings = OpenAIEmbeddings()
|
339 |
+
|
340 |
+
#HF embeddings -----------------------------------
|
341 |
+
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
342 |
+
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
343 |
+
#etwas weniger rechenaufwendig:
|
344 |
+
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
345 |
+
#oder einfach ohne Langchain:
|
346 |
+
#embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
347 |
+
|
348 |
+
#ChromaDb um die embedings zu speichern
|
349 |
+
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
350 |
+
print ("Chroma DB bereit ...................")
|
351 |
+
|
352 |
+
return db
|
353 |
+
|
354 |
###########################################
|
355 |
#dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
|
356 |
def document_retrieval_mongodb(llm, prompt):
|
|
|
390 |
neu_prompt = rag_template
|
391 |
for i, chunk in enumerate(retrieved_chunks):
|
392 |
neu_prompt += f"{i+1}. {chunk}\n"
|
|
|
393 |
print(neu_prompt)
|
394 |
return neu_prompt
|
395 |
|
|
|
513 |
document_storage_chroma(splits)
|
514 |
db = document_retrieval_chroma()
|
515 |
#mit RAG:
|
|
|
516 |
neu_text_mit_chunks = rag_chain2(prompt, db, k)
|
517 |
#für Chat LLM:
|
518 |
#prompt = generate_prompt_with_history_openai(neu_text_mit_chunks, history)
|
519 |
#als reiner prompt:
|
520 |
prompt_neu = generate_prompt_with_history(neu_text_mit_chunks, history)
|
521 |
+
|
|
|
522 |
headers, payload = process_image(file, prompt_neu)
|
523 |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
524 |
#als json ausgeben
|