File size: 2,361 Bytes
c6c79f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import openai
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
import re
import pdftotext
import gradio as gr
import json

# Calculate the cosine similarity
def cos_sim(vector1, vector2):
   cosine_similarity = np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
   return cosine_similarity

def sim_search(df, query, n=3, dot=False):
   embedding = model.encode(query)
   if dot: 
      df['similarities'] = df.embeddings.apply(lambda x: x@embedding)
      print("using dot product")
   else:
      df['similarities'] = df.embeddings.apply(lambda x: cos_sim(x, embedding))
      print("using cosine similarity")
   res = df.sort_values('similarities', ascending=False).head(n)
   return res

def create_prompt(context, question):
    return f"""
    Context information is below.
    ---------------------
    {context}
    ---------------------
    Given the context information and not prior knowledge, answer the query.
    Query: {question}
    Answer: \
    """


def answer_question(question, model="gpt-3.5-turbo",n=3):
    r = sim_search(df, question,n=n)
    context = "\n\n".join(r.chunks)
    prompt = create_prompt(context, question)
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant answering questions in german. You answer only in german. If you do not know an answer you say it. You do not fabricate answers."},
            {"role": "user", "content": prompt},
        ]
    )
    return response.choices[0].message.content

df = pd.read_csv("stroke_embeddings_sentence_transformers.csv")
df["embeddings"] = df.embeddings.apply(json.loads)
model = SentenceTransformer('thenlper/gte-base')

def gradio_answer(input):
    return answer_question(input)

demo = gr.Interface(fn=gradio_answer, inputs=gr.Textbox(lines=1, placeholder="Frage hier...", label="Frage"), outputs=gr.Textbox(lines=4, placeholder="Antwort hier...", label="Antwort"), title="S3 Leitlinie Carotis Stenose", examples=["In welchen Intervallen ist eine Nachuntersuchung nach CAS angezeigt?", "Ist eine ambulante Therapie der Carotisstenose mittels CEA oder CAS möglich und sinnvoll?", "Was sollte man als Bradykardie-Therapie bei Nachdilatation eines Stents einsetzen?"])
    
demo.launch()