rasmus1610 commited on
Commit
e22d4b7
1 Parent(s): d02f3a9

reranking and polishing

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ carotid_embeddings_sentence_transformers_061123.csv filter=lfs diff=lfs merge=lfs -text
__pycache__/app.cpython-310.pyc CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
 
__pycache__/llm.cpython-310.pyc ADDED
Binary file (1.22 kB). View file
 
__pycache__/qa.cpython-310.pyc ADDED
Binary file (4.4 kB). View file
 
app.py CHANGED
@@ -1,66 +1,32 @@
1
  import openai
2
  import numpy as np
3
  import pandas as pd
4
- from sentence_transformers import SentenceTransformer
5
  import re
6
  import gradio as gr
7
  import json
8
- import requests
 
 
9
 
10
- # Calculate the cosine similarity
11
- def cos_sim(vector1, vector2):
12
- cosine_similarity = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
13
- return cosine_similarity
14
 
15
- def sim_search(df, query, n=3, dot=False):
16
- embedding = model.encode(query)
17
- if dot:
18
- df['similarities'] = df.embeddings.apply(lambda x: x@embedding)
19
- print("using dot product")
20
- else:
21
- df['similarities'] = df.embeddings.apply(lambda x: cos_sim(x, embedding))
22
- print("using cosine similarity")
23
- res = df.sort_values('similarities', ascending=False).head(n)
24
- return res
25
-
26
- def create_prompt(context, question):
27
- return f"""
28
- Context information is below.
29
- ---------------------
30
- {context}
31
- ---------------------
32
- Given the context information and not prior knowledge, answer the query.
33
- Query: {question}
34
- Answer: \
35
- """
36
-
37
-
38
- def answer_question(question, model="gpt-3.5-turbo",n=3):
39
- r = sim_search(df, question,n=n)
40
- context = "\n\n".join(r.chunks)
41
- prompt = create_prompt(context, question)
42
- response = openai.ChatCompletion.create(
43
- model="gpt-3.5-turbo",
44
- messages=[
45
- {"role": "system", "content": "You are a helpful assistant answering questions in german. You answer only in german. If you do not know an answer you say it. You do not fabricate answers."},
46
- {"role": "user", "content": prompt},
47
- ]
48
- )
49
- return response.choices[0].message.content
50
-
51
- df = pd.read_csv("carotid_embeddings_sentence_transformers.csv")
52
- df["embeddings"] = df.embeddings.apply(json.loads)
53
- model = SentenceTransformer('thenlper/gte-base')
54
 
55
  def gradio_answer(input):
56
- return answer_question(input, n=2)
57
 
58
  desc_string = """
59
- In dieser Demo kannst du einer KI Fragen zum Inhalt der ['S3-Leitlinie Diagnostik, Therapie und Nachsorge der extracraniellen Carotisstenose'](https://register.awmf.org/de/leitlinien/detail/004-028) stellen. Ein paar Beispiele findest du unten.
 
 
60
 
61
- ### Wie funktioniert das?
 
 
62
 
63
- Mit Hilfe eines neuronalen Netzwerkes werden die Fragen in eine Vektor-Repräsentation ('Embedding') umgewandelt. Dann wird die Ähnlichkeit des 'Frage-Vektors' mit genauso erstellten Vektoren von Textpassagen der Leitlinie berechnet. Die Textpassagen, die am ähnlichsten sind, werden dann einem 'Large Language Model (LLM)' zur Beantwortung der Frage als Context gegeben. Diese Technik heißt ['retrieval-augmented Generation (RAG)'](https://research.ibm.com/blog/retrieval-augmented-generation-RAG).
64
  """
65
 
66
  demo = gr.Interface(
@@ -70,9 +36,11 @@ demo = gr.Interface(
70
  title="🧠 Q&A S3 Leitlinie Carotisstenose",
71
  description=desc_string,
72
  examples=[
73
- "In welchen Intervallen ist eine Nachuntersuchung nach Carotis-Stent (CAS) angezeigt?",
74
  "Ist eine ambulante Therapie der Carotisstenose mittels CEA oder CAS möglich und sinnvoll?",
75
- "Was sollte man als Bradykardie-Therapie bei Nachdilatation eines Stents einsetzen?"]
 
 
76
  )
77
 
78
  demo.launch()
 
1
  import openai
2
  import numpy as np
3
  import pandas as pd
4
+ from sentence_transformers import SentenceTransformer, CrossEncoder
5
  import re
6
  import gradio as gr
7
  import json
8
+ import ast
9
+ from llm import OpenAILLM
10
+ from qa import QuestionAnswerer
11
 
12
+ df = pd.read_csv("/Users/mariusvach/Code/python/leitlinien_chatbot/carotis_chatbot/carotid_embeddings_sentence_transformers_061123.csv")
13
+ df["embeddings"] = df.embeddings.apply(ast.literal_eval)
 
 
14
 
15
+ qa = QuestionAnswerer(df, SentenceTransformer('thenlper/gte-base'), OpenAILLM('gpt-3.5-turbo-16k'), CrossEncoder('cross-encoder/ms-marco-TinyBERT-L-2-v2'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def gradio_answer(input):
18
+ return qa.answer_question(input, n=5, )
19
 
20
  desc_string = """
21
+ In dieser Demo kannst du einer KI Fragen zum Inhalt der ['S3-Leitlinie Diagnostik, Therapie und Nachsorge der extracraniellen Carotisstenose'](https://register.awmf.org/de/leitlinien/detail/004-028) stellen. Ein paar Beispiel-Fragen findest du unten.
22
+
23
+ ## Wie funktioniert das?
24
 
25
+ 1. Die Frage wird durch ein neuronalen Netzwerk in eine Vektor-Repräsentation ('Embedding') umgewandelt.
26
+ 2. Die Ähnlichkeit des 'Frage-Vektors' wird mit den genauso erzeugten Vektoren von Textpassagen der Leitlinie verglichen.
27
+ 3. Ein ‚Large Language Model (LLM)‘ beantwortet nun mit Hilfe der ähnlichsten Textpassagen die gestellte Frage.
28
 
29
+ Diese Technik heißt [‚Retrieval-augmented Generation (RAG)'](https://research.ibm.com/blog/retrieval-augmented-generation-RAG). Hierdurch kann verhindert werden, dass LLMs Fakten einfach erfinden.
30
  """
31
 
32
  demo = gr.Interface(
 
36
  title="🧠 Q&A S3 Leitlinie Carotisstenose",
37
  description=desc_string,
38
  examples=[
39
+ "Welche Sensitivität hatte die Transcranielle Doppler-Sonographie (TCD) bei der Detektion eines perioperativen Schlaganfalls?",
40
  "Ist eine ambulante Therapie der Carotisstenose mittels CEA oder CAS möglich und sinnvoll?",
41
+ "Wie viele zerebrale Ischämien in Deutschland werden durch >50%ige Stenosen oder Verschlüsse der extracraniellen A. carotis verursacht?",
42
+ "Welche Symptome können durch Stenosen der A. carotis verursacht werden?"
43
+ ]
44
  )
45
 
46
  demo.launch()
carotid_embeddings_sentence_transformers.csv DELETED
The diff for this file is too large to render. See raw diff
 
carotid_embeddings_sentence_transformers_061123.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c16207bb8fbb5f55fdf33300e3d75043d36e8fb562854ebd585c20f2f595b0c0
3
+ size 10699905
llm.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import os
3
+
4
+ openai.api_key = os.environ["OPENAI_API_KEY"]
5
+
6
+ class BaseLLM:
7
+ def __init__(self, model):
8
+ self.model = model
9
+
10
+ def get_response(self, system_prompt, query):
11
+ raise NotImplementedError
12
+
13
+
14
+ class OpenAILLM(BaseLLM):
15
+ def __init__(self, model):
16
+ self.model = model
17
+
18
+ def get_response(self, system_prompt, query, **kwargs):
19
+ response = openai.ChatCompletion.create(
20
+ model=self.model,
21
+ messages=[
22
+ {"role": "system", "content": system_prompt},
23
+ {"role": "user", "content": query},
24
+ ],
25
+ **kwargs,
26
+ )
27
+ return response.choices[0].message.content
qa.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from rank_bm25 import BM25Okapi
4
+
5
+ from llm import OpenAILLM
6
+
7
+ def cosine_similarity(vector1, vector2):
8
+ return np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
9
+
10
+ class QuestionAnswerer:
11
+ def __init__(self, docs, embedding_model, llm=OpenAILLM('gpt-3.5-turbo'), cross_encoder=None):
12
+ self.docs = docs
13
+ self.bm25 = BM25Okapi([c.split(" ") for c in self.docs.chunks.values[1:]])
14
+ self.embedding_model = embedding_model
15
+ self.llm = llm
16
+ self.cross_encoder = cross_encoder
17
+
18
+ def sim_search(self, query, n=10, use_hyde=False, use_dot_product=False):
19
+ if use_hyde:
20
+ generated_doc = self._get_generated_doc(query)
21
+ print("generated document (hyde): \n", generated_doc)
22
+ embedding = self.embedding_model.encode(generated_doc)
23
+ else:
24
+ embedding = self.embedding_model.encode(query)
25
+
26
+ if use_dot_product:
27
+ similarities = self.docs['embeddings'].apply(lambda x: np.dot(x, embedding))
28
+ else:
29
+ similarities = self.docs['embeddings'].apply(lambda x: cosine_similarity(x, embedding))
30
+
31
+ self.docs['similarities'] = similarities
32
+ return self.docs.sort_values('similarities', ascending=False).head(n)
33
+
34
+ def sim_search_rerank(self, query, n=10, sim_search_n=100, **kwargs):
35
+ search_results = self.sim_search(query, n=sim_search_n, use_hyde=False, **kwargs)
36
+ reranked_results = self.rerank(search_results, query)
37
+ return reranked_results[:n]
38
+
39
+ def sim_search_bm25(self, query, n=10):
40
+ tokenized_query = query.split(" ")
41
+ doc_scores = self.bm25.get_scores(tokenized_query)
42
+ self.docs['bm25'] = np.insert(doc_scores, 0, 0) #hack because I have to remove the first item, because I cannot split it
43
+ result = self.docs.sort_values('bm25', ascending=False)[:n]
44
+ return result
45
+
46
+ def _create_prompt(self, context, question):
47
+ return f"""
48
+ Context information is below.
49
+ ---------------------
50
+ {context}
51
+ ---------------------
52
+ Given the context information and not prior knowledge, answer the query.
53
+ Query: {question}
54
+ Answer: \
55
+ """
56
+
57
+ def _get_generated_doc(self, question):
58
+ prompt = f"""Write a guideline section in German answering the question below
59
+ ---------------------
60
+ Question: {question}
61
+ ---------------------
62
+ Answer: \
63
+ """
64
+ system_prompt = "You are an experienced radiologist answering medical questions. You answer only in German."
65
+ return self.llm.get_response(system_prompt, prompt)
66
+
67
+
68
+ def rerank(self, docs, query):
69
+ inp = [[query, doc.chunks] for doc in docs.itertuples()]
70
+ cross_scores = self.cross_encoder.predict(inp) if self.cross_encoder else []
71
+ docs['cross_score'] = cross_scores
72
+ return docs.sort_values('cross_score', ascending=False)
73
+
74
+ def answer_question(self, question, n=3, use_hyde=False, use_reranker=False, use_bm25=False):
75
+ if use_reranker and use_hyde:
76
+ print('reranking together with hyde is not supported yet')
77
+ if use_reranker:
78
+ search_results = self.sim_search_rerank(question, n=n)
79
+ if use_bm25:
80
+ search_results = self.sim_search_bm25(question, n=n)
81
+ else:
82
+ search_results = self.sim_search(question, n=n, use_hyde=use_hyde)
83
+
84
+ context = "\n\n".join(search_results['chunks'])
85
+
86
+ prompt = self._create_prompt(context, question)
87
+
88
+ system_prompt = "You are a helpful assistant answering questions in German. You answer only in German. If you do not know an answer you say it. You do not fabricate answers."
89
+
90
+ return self.llm.get_response(system_prompt, prompt)
91
+