File size: 20,211 Bytes
0703e71
68c11f0
e504a30
daf3c40
68c11f0
 
d232ed1
5b6755d
901a87e
6efea88
daf3c40
 
 
d232ed1
0703e71
5b6755d
60ffe71
daf3c40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210b40c
 
 
 
 
090dd00
210b40c
 
 
090dd00
210b40c
 
 
090dd00
210b40c
 
 
 
 
 
f21f2ed
31afd70
 
a60c539
 
 
 
31afd70
 
a60c539
 
 
 
 
 
 
 
 
 
31afd70
 
a60c539
 
 
 
31afd70
 
a60c539
 
 
 
 
 
 
 
 
 
31afd70
 
a60c539
 
 
 
31afd70
a60c539
31afd70
 
b0e4079
 
a927854
 
8694806
b0e4079
8694806
b0e4079
8694806
 
 
 
 
 
 
 
 
 
 
 
 
b0e4079
8694806
 
0a402b2
f21f2ed
0a402b2
ad5b2ad
08a0c79
0108e87
ee765f5
08a0c79
3efbf76
f21f2ed
 
0bb5671
 
 
 
1230f78
 
6d1bb98
 
ed9ab49
8de2cbd
0edf770
ed9ab49
9427a68
6d1bb98
00a2578
 
6283f19
a2a0b4e
00a2578
6d1bb98
 
1418604
ec09f15
040d986
a1f7b63
2e380b0
a1f7b63
37528ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e899803
1418604
 
 
 
 
eb7b56f
1aa6e35
090dd00
5b6755d
 
7f45e1c
 
76c4bfe
2cdd4cb
9bd0a5d
1861169
daf3c40
 
87d4d46
d232ed1
e34f0a0
 
 
 
 
 
e504a30
 
 
901a87e
 
e504a30
901a87e
 
e504a30
 
6efea88
7144bca
6efea88
 
 
 
 
f41fff5
e504a30
34e4cb8
 
e504a30
 
901a87e
 
6efea88
 
e34f0a0
 
71c0f59
 
e34f0a0
31afd70
34e4cb8
e504a30
82e1308
e504a30
f41fff5
31afd70
 
 
32e8378
 
f41fff5
50d1738
f2c7470
2632b0b
f2c7470
ae781e2
c43c3ad
70bc4c5
11d1d2d
 
 
 
4ac5777
00a2578
034edb9
4a543db
9ecbe8e
 
11d1d2d
 
d5478e2
 
19b3d90
db1bb98
70bc4c5
db1bb98
 
ed9ab49
0bb5671
 
2d01a69
 
 
 
 
 
 
 
f41fff5
1418604
e504a30
 
32e8378
bd26917
1418604
 
 
 
d232ed1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
import os
import re
import io
import json
from typing import List, Tuple, Union
from pathlib import Path
import gradio as gr
import openai
import pymupdf
from docx import Document
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

HF_TOKEN = os.environ.get("HF_TOKEN", None)
LEPTON_API_TOKEN = os.environ.get("LEPTON_API_TOKEN", None)

# Set up Google Sheets API credentials
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
SERVICE_ACCOUNT_FILE = os.environ.get("GOOGLE_SECRET_AUTH_JSON", None)
SPREADSHEET_ID = os.environ.get("SPREADSHEET_ID", None)

def addrow_googlesheets(model, question, document, answer, reasoning, score):
    try:
        creds = Credentials.from_service_account_info(json.loads(SERVICE_ACCOUNT_FILE), scopes=SCOPES)
        service = build('sheets', 'v4', credentials=creds)
        body = {
            'values': [[model, question, document, answer, reasoning, score]]
        }
        result = service.spreadsheets().values().append(
            spreadsheetId=SPREADSHEET_ID,
            range='Sheet1!A:F',
            valueInputOption='RAW',
            insertDataOption='INSERT_ROWS',
            body=body
        ).execute()
        # print(f"{result.get('updates').get('updatedCells')} cells appended.")
        return True

    except HttpError as error:
        print(f"An error occurred: {error}")
        return False

PROMPT = """
Given the following QUESTION, DOCUMENT and ANSWER you must analyze the provided answer and determine whether it is faithful to the contents of the DOCUMENT. The ANSWER must not offer new information beyond the context provided in the DOCUMENT. The ANSWER also must not contradict information provided in the DOCUMENT. Output your final verdict by strictly following this format: "PASS" if the answer is faithful to the DOCUMENT and "FAIL" if the answer is not faithful to the DOCUMENT. Show your reasoning.

--
QUESTION (THIS DOES NOT COUNT AS BACKGROUND INFORMATION):
{question}

--
DOCUMENT:
{document}

--
ANSWER:
{answer}

--

Your output should be in JSON FORMAT with the keys "REASONING" and "SCORE":
{{"REASONING": <your reasoning as bullet points>, "SCORE": <your final score>}}
"""

EXAMPLES = [
    {
        "emoji": "🏈",
        "question": "How many yards Hanson score with in the first?",
        "document": "To start the season, the Lions traveled south to Tampa, Florida to take on the Tampa Bay Buccaneers. The Lions scored first in the first quarter with a 23-yard field goal by Jason Hanson. The Buccaneers tied it up with a 38-yard field goal by Connor Barth, then took the lead when Aqib Talib intercepted a pass from Matthew Stafford and ran it in 28 yards. The Lions responded with a 28-yard field goal. In the second quarter, Detroit took the lead with a 36-yard touchdown catch by Calvin Johnson, and later added more points when Tony Scheffler caught an 11-yard TD pass. Tampa Bay responded with a 31-yard field goal just before halftime. The second half was relatively quiet, with each team only scoring one touchdown. First, Detroit's Calvin Johnson caught a 1-yard pass in the third quarter. The game's final points came when Mike Williams of Tampa Bay caught a 5-yard pass. The Lions won their regular season opener for the first time since 2007",
        "answer": "28"
    },
    {
        "emoji": "🫁",
        "question": "Does timing of initial surfactant treatment make a difference in rates of chronic lung disease or mortality in premature infants?",
        "document": "To compare two treatment strategies in preterm infants with or at risk of respiratory distress syndrome: early surfactant administration (within one hour of birth) versus late surfactant administration, in a geographically defined population. The primary outcome was chronic lung disease (CLD) and mortality before/at 36 weeks. Secondary outcomes included: duration of mechanical ventilation and continuous positive airway pressure (CPAP), post-natal steroids for CLD and major neonatal morbidities. Premature infants born at 22-32 weeks' gestation between January 2006 and December 2009. Ten neonatal intensive care units (NICUs) in New South Wales (NSW) and Australian Capital Territory (ACT), Australia. Retrospective analysis of prospectively collected data from the regional NICU database in NSW and ACT. Of the 2170 infants who received surfactant, 1182 (54.5%) and 988 (45.5%) received early and late surfactant, respectively. The early surfactant group was less mature (27.1 ± 2.1 versus 29.4 ± 2.1 weeks) and had more CLD and mortality (40.2% versus 20.0%). The multivariable analysis showed early surfactant to be associated with less duration of ventilation, longer duration of CPAP and longer hospital stay but had little or no impact on CLD/mortality.",
        "answer": "Yes. Early surfactant administration significantly reduces the rates of chronic lung disease and mortality among premature infants. This supports the routine use of early surfactant treatment over alternatives like CPAP, especially in very premature infants. Further large-scale studies are still needed to solidify these findings."
    }, 
    {
        "emoji": "🏭",
        "question": "Where does Vitol has its headquarter?",
        "document": "Information in the list relates to the most recent fiscal year (mostly FY 2022 or 2023). Rank Name Industry Revenue Profit Employees Headquarters[note 1] State-owned Ref. Revenue per worker USD millions 1 Walmart Retail Increase $611,289 $11,680 2,100,000 United States United States No [1] $291,090.00 2 Saudi Aramco Oil and gas Increase $603,651 $159,069 70,496 Saudi Arabia Saudi Arabia Yes [4] $8,562,911.37 3 Amazon Retail Increase $574,785 $30,425 1,525,000 United States United States No [5] $376,908 4 State Grid Corporation of China Electricity Increase $530,009 $8,192 870,287 China China Yes [6] $609,004.85 5 Vitol Commodities Increase $505,000 $15,000 1,560  Switzerland No [7][8] $323,717,948.72 6 China National Petroleum Corporation Oil and gas Increase $483,019 $21,080 1,087,049 China China Yes [9] $444,339.68 7 China Petrochemical Corporation Oil and gas Increase $471,154 $9,657 527,487 China China Yes [10] $893,204.95",
        "answer": "United States."
    },
    {
        "emoji": "🩺",
        "question": "What does the abbreviation of VD mean in the context of students?",
        "document": "A sexually transmitted infection (STI), also referred to as a sexually transmitted disease (STD) and the older term venereal disease (VD), is an infection that is spread by sexual activity, especially vaginal intercourse, anal sex, oral sex, or sometimes manual sex.",
        "answer": "In the context of student, VD stands usually for Valedictorian, an academic title for the highest-performing student of a graduating class of an academic institution."
    },
    {
        "emoji": "💻",
        "question": "How much market cap does Microsoft, Apple and Nvidia have together?",
        "document": "Here's a list of the top 10 companies by market cap in 2024 (as of February 13, 2024): Company Sector Market Cap (in USD) #1 Microsoft Technology $3.085 trillion #2 Apple Technology $2.889 trillion #3 Saudi Aramco Oil & Gas $1.997 trillion #4 Alphabet (Google) Technology $1.841 trillion #5 Amazon E-commerce $1.790 trillion #6 Nvidia Technology $1.784 trillion #7 Meta Platforms Social Media $1.195 trillion #8 Berkshire Hathaway Diversified Investments $861.74 billion #09 Eli Lilly Pharmaceuticals $699.88 billion #10 TSMC Semiconductors $676.61 billion",
        "answer": "They have a shared market cap of $ 7.758 trillion USD."
    },
    {
        "emoji": "📈",
        "question": "Which company has the largest market cap in 2024?",
        "document": "Here's a list of the top 10 companies by market cap in 2024 (as of February 13, 2024): Company Sector Market Cap (in USD) #1 Microsoft Technology $3.085 trillion #2 Apple Technology $2.889 trillion #3 Saudi Aramco Oil & Gas $1.997 trillion #4 Alphabet (Google) Technology $1.841 trillion #5 Amazon E-commerce $1.790 trillion #6 Nvidia Technology $1.784 trillion #7 Meta Platforms Social Media $1.195 trillion #8 Berkshire Hathaway Diversified Investments $861.74 billion #09 Eli Lilly Pharmaceuticals $699.88 billion #10 TSMC Semiconductors $676.61 billion",
        "answer": "Alphabet has the largest market cap with $1.841 trillion USD"
    },
    {
        "emoji": "🪖",
        "question": "When did World War II start?",
        "document": """World War II[b] or the Second World War was a global conflict that lasted from 1939 to 1945. The vast majority of the world's countries, including all the great powers, fought as part of two opposing military alliances: the Allies and the Axis. Many participating countries invested all available economic, industrial, and scientific capabilities into this total war, blurring the distinction between civilian and military resources. Aircraft played a major role, enabling the strategic bombing of population centres and delivery of the only two nuclear weapons ever used in war. It was by far the deadliest conflict in history, resulting in 70–85 million fatalities. Millions died due to genocides, including the Holocaust, as well as starvation, massacres, and disease. In the wake of Axis defeat, Germany, Austria, and Japan were occupied, and war crime tribunals were conducted against German and Japanese leaders. The causes of the war are debated; contributing factors included the rise of fascism in Europe, the Spanish Civil War, the Second Sino-Japanese War, Soviet–Japanese border conflicts, and tensions in the aftermath of World War I. World War II is generally considered to have begun on 1 September 1939, when Nazi Germany, under Adolf Hitler, invaded Poland. The United Kingdom and France declared war on Germany on 3 September. Under the Molotov–Ribbentrop Pact of August 1939, Germany and the Soviet Union had partitioned Poland and marked out their "spheres of influence" across Finland, Estonia, Latvia, Lithuania, and Romania. From late 1939 to early 1941, in a series of campaigns and treaties, Germany conquered or controlled much of continental Europe in a military alliance called the Axis with Italy, Japan, and other countries. Following the onset of campaigns in North and East Africa, and the fall of France in mid-1940, the war continued primarily between the European Axis powers and the British Empire, with the war in the Balkans, the aerial Battle of Britain, the Blitz of the UK, and the Battle of the Atlantic. In June 1941, Germany led the European Axis powers in an invasion of the Soviet Union, opening the Eastern Front, the largest land theatre of war in history.""",
        "answer": "It started on 3 September 1939 when France and United Kingdom declared war on Germany."
    }

]

HEADER = """
<div style="width: 100%; display: flex; flex-direction: column; gap: 24px; padding-top: 24px; position: relative">
    <img src="https://postimage.me/images/2024/07/30/lynx.png" width="175" style="position: absolute; top: 0; right: 48px">
    <div style="display: flex; justify-content: space-between; z-index: 1;">
        <a href="https://www.patronus.ai">
            <img src="https://postimage.me/images/2024/07/31/FullLogo_ColorDefault.png" width="250">
        </a>
        <div style="display: flex; gap: 12px;">
            <a href="https://huggingface.co/PatronusAI/Llama-3-Patronus-Lynx-8B-Instruct">
                <img src="https://img.shields.io/badge/%F0%9F%A4%97%20Model_Card-Huggingface-orange" height="20">
            </a>
            <a href="https://github.com/patronus-ai/Lynx-hallucination-detection">
                <img src="https://img.shields.io/badge/GitHub-Lynx--hallucination--detection-indigo" height="20">
            </a>
            <a href="https://arxiv.org/abs/2407.08488">
                <img src="https://img.shields.io/badge/arXiv-2407.08488-b31b1b.svg" height="20">
            </a>
        </div>
    </div>
    <div>
        <h1>Lynx 8B v1.1 & Lynx 70B</h1>
        <h2>Patronus Lynx Demo</h2>
    </div>
</div>

**Patronus Lynx** is a state-of-the-art open-source model for hallucination detection.

**Context Window**: Lynx 8B v1.1 has a **128k** context window & Lynx 70B has a **8k** context window.

**Getting Started**: Provide a question and document or context given to your model in addition to the answer given by the model and then click submit. The output panel will indicate whether the reponse is a hallucination (Fail) or if it is faithful to the given document or context (Pass) through the score Pass or Fail and provide reasoning behind the score.

**File Upload**: You can choose to upload a file for the document field. The file must be a PDF, TXT, or DOCX file and within the context window of the model.

"""

EXAMPLES_HEADER = """
# Try it Yourself!
"""

UPLOADABLE_FILE_TYPES = [".pdf", ".txt", ".docx", ".doc"]

css = """
.example-button {
    width: fit-content;
    font-size: 1rem;
    font-weight: 400 !important;
    padding: .5rem 1rem;
    text-align: start;
}
.fixed-height-button {
    height: fit-content;
    word-break: break-all;
    font-size: .85rem;
}
"""

def update_client_base_url(model_name):
    if model_name == "Patronus Lynx 8B v1.1":
        return "https://yb15a7dy-lynx-v1-1-8b.tin.lepton.run/api/v1/"
    elif model_name == "Patronus Lynx 70B":
        return "https://yb15a7dy-patronus-lynx-70b-v1-0.tin.lepton.run/api/v1/"

def parse_patronus_lynx_response(
    response: str,
) -> Tuple[bool, Union[List[str], None]]:
    """
    Parses the response from the Patronus Lynx LLM and returns a tuple of:
    - Whether the response is hallucinated or not.
    - A reasoning trace explaining the decision.
    """
    # Default to hallucinated
    hallucination, reasoning = True, None
    reasoning_pattern = r'"REASONING":\s*\[(.*?)\]'
    score_pattern = r'"SCORE":\s*"?\b(PASS|FAIL)\b"?'

    reasoning_match = re.search(reasoning_pattern, response, re.DOTALL)
    score_match = re.search(score_pattern, response)

    if score_match:
        score = score_match.group(1)
        if score == "PASS":
            hallucination = False
    if reasoning_match:
        reasoning_content = reasoning_match.group(1)
        reasoning = re.split(r"['\"],\s*['\"]", reasoning_content)

    return hallucination, reasoning

def model_call(question, document, answer, client_base_url):
    client = openai.OpenAI(
        base_url=client_base_url,
        api_key=LEPTON_API_TOKEN
    )
    if question == "" or document == "" or answer == "":
        return "", ""
    NEW_FORMAT = PROMPT.format(question=question, document=document, answer=answer)
    response = client.completions.create(
        model="gpt-3.5-turbo-instruct",
        prompt=NEW_FORMAT,
        temperature=0.0
    )
    hallucination, reasoning = parse_patronus_lynx_response(response.choices[0].text)
    score = "🔴 FAIL 🔴" if hallucination else "🟢 PASS 🟢"
    combined_reasoning = " ".join(reasoning)[1:-1]
    model = "Patronus Lynx 8B v1.1" if client_base_url=="https://yb15a7dy-lynx-v1-1-8b.tin.lepton.run/api/v1/" else "Patronus Lynx 70B"
    addrow_googlesheets(model, question, document, answer, combined_reasoning, score)
    return combined_reasoning, score

def return_approximate_token_size(text):
    MAX_TOKEN_LENGTH = 8000
    number_of_total_characters = len(text)
    number_of_tokens = number_of_total_characters / 4
    return number_of_tokens < MAX_TOKEN_LENGTH

def get_filetype(filename):
    return filename.split(".")[-1]

def extract_text_pymupdf(file):
    with pymupdf.open(file) as pdf_or_txt:
        text = ""
        for page in pdf_or_txt:
            text += page.get_text()
    return text

def extract_text_python_docx(file):
    doc = Document(file)
    text = ""
    for paragraph in doc.paragraphs:
        text += paragraph.text + '\n'
    return text.strip()

def upload_file(filepath):
    extracted_file_text = ""
    if filepath is not None:
        name = Path(filepath).name
        filetype = get_filetype(name)
        # conditionals for filetype and function call
        if filetype == "pdf" or filetype == "txt":
            extracted_file_text = extract_text_pymupdf(filepath)
        elif filetype == "docx":
            extracted_file_text = extract_text_python_docx(filepath)
        
        # return warning if file is too large
        if not return_approximate_token_size(extracted_file_text):
            raise gr.Error("File is too large to process. Please upload a smaller file.")

        return [gr.UploadButton(visible=False), gr.Group(visible=True), gr.Markdown(f"**Uploaded file:** {name}"), extracted_file_text]
    else:
        return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), extracted_file_text]
def reset_buttons():
    return [gr.UploadButton(visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES), gr.Group(visible=False), gr.Markdown(""), gr.Textbox(value="")]

def select_template(template):
    return template["question"], template["document"], template["answer"]

# def download_file():
#     return [gr.UploadButton(visible=True), gr.DownloadButton(visible=False)]

with gr.Blocks(css=css, theme=gr.themes.Default(spacing_size="sm", font=[gr.themes.GoogleFont("Plus Jakarta Sans"), "Arial", "sans-serif"], primary_hue="indigo", secondary_hue="purple")) as demo:
    base_url_state = gr.State(update_client_base_url("Patronus Lynx 8B v1.1"))
    gr.Markdown(HEADER)
    model_dropdown = gr.Dropdown(choices=["Patronus Lynx 8B v1.1", "Patronus Lynx 70B"], value="Patronus Lynx 8B v1.1", label="Model", interactive=True)
    with gr.Row(equal_height=True):
        with gr.Column(scale=1):
            gr.Markdown("**Your Inputs**")
            # with gr.Row():
            question = gr.Textbox(label="Question")
            # with gr.Row():
            document = gr.Textbox(label="Document", scale=4)
            with gr.Row():
                u = gr.UploadButton("Upload", visible=True, file_count="single", file_types=UPLOADABLE_FILE_TYPES, scale=1, elem_classes="fixed-height-button")
                file_group = gr.Group(elem_classes="fixed-height-button", visible=False)
                with file_group:
                    file_name = gr.Markdown("")
                    c = gr.ClearButton([u, file_name])
            # with gr.Row():
            answer = gr.Textbox(label="Answer")
            with gr.Row():
                clear_btn = gr.ClearButton([question, document, answer])
                submit_button = gr.Button("Submit", variant="primary")
        with gr.Column(scale=1):
            gr.Markdown("**Model Outputs**")
            reasoning = gr.Textbox(label="Reasoning")
            score = gr.Textbox(label="Score (FAIL if Hallucinated, PASS if not)")
    gr.Markdown("&nbsp;")
    gr.Markdown(EXAMPLES_HEADER)
    with gr.Row():
        with gr.Column():
            for _, example in enumerate(EXAMPLES):
                template_btn = gr.Button(f"{example['emoji']} {example['question']}", elem_classes="example-button")
                template_btn.click(
                    fn=select_template,
                    inputs=[gr.State(example)],
                    outputs=[question, document, answer]
                    )

    model_dropdown.change(fn=update_client_base_url, inputs=[model_dropdown], outputs=[base_url_state])
    u.upload(upload_file, u, [u, file_group, file_name, document])
    c.click(reset_buttons, None, [u, file_group, file_name, document])
    # d.click(download_file, None, [u, d])

    submit_button.click(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
    question.submit(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
    document.submit(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
    answer.submit(fn=model_call, inputs=[question, document, answer, base_url_state], outputs=[reasoning, score])
demo.launch()