Minakshee25 commited on
Commit
392cc43
·
verified ·
1 Parent(s): 4907140

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -0
app.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tqdm import tqdm
2
+ from selfcheckgpt.modeling_selfcheck import SelfCheckNLI, SelfCheckBERTScore, SelfCheckNgram
3
+ import torch
4
+ import spacy
5
+ import os
6
+ import gradio as gr
7
+
8
+
9
+ # Load the English language model
10
+ nlp = spacy.load("en_core_web_sm")
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available
13
+ selfcheck_bertscore = SelfCheckBERTScore(rescale_with_baseline=True)
14
+ selfcheck_ngram = SelfCheckNgram(n=1) # n=1 means Unigram, n=2 means Bigram, etc.
15
+
16
+ openai_key = os.getenv("OPENAI_API_KEY")
17
+ resource_url = os.getenv("OPENAI_API_RESOURCEURL")
18
+ api_version =os.getenv("OPENAI_API_VERSION")
19
+ api_url=os.getenv("OPENAI_API_RESOURCEURL")
20
+
21
+ import os
22
+ from openai import AzureOpenAI
23
+
24
+ client = AzureOpenAI(
25
+ api_key=openai_key,
26
+ api_version=api_version,
27
+ azure_endpoint = api_url
28
+ )
29
+
30
+ deployment_name=os.getenv("model_name") #This will correspond to the custom name you chose for your deployment when you deployed a model. Use a gpt-35-turbo-instruct deployment.
31
+
32
+ import os
33
+ from openai import AzureOpenAI
34
+
35
+ client = AzureOpenAI(
36
+ api_key = openai_key,
37
+ api_version =api_version,
38
+ azure_endpoint =api_url
39
+
40
+ )
41
+
42
+ def generate_response(prompt):
43
+ response = client.chat.completions.create(
44
+ model=deployment_name, # model = "deployment_name".
45
+ temperature=0.0,
46
+ messages=[
47
+
48
+ {"role": "user", "content": prompt}
49
+ ]
50
+ )
51
+ return response.choices[0].message.content
52
+
53
+ def generate_response_high_temp(prompt):
54
+ response = client.chat.completions.create(
55
+ model=deployment_name, # model = "deployment_name".
56
+ temperature=1.0,
57
+ messages=[
58
+
59
+ {"role": "user", "content": prompt}
60
+ ]
61
+ )
62
+ return response.choices[0].message.content
63
+
64
+ def create_dataset(prompt):
65
+ s1 = generate_response_high_temp(prompt)
66
+ s2 = generate_response_high_temp(prompt)
67
+ s3 = generate_response_high_temp(prompt)
68
+ return s1, s2, s3
69
+
70
+ def split_sent(sentence):
71
+ return [sent.text.strip() for sent in nlp(sentence).sents]
72
+
73
+ def func_selfcheck_nli(sentence, s1, s2, s3):
74
+ sentence1 = [sentence[2:-2]]
75
+ sample_dataset = [s1, s2, s3]
76
+
77
+ print(sentence1, "\n", sample_dataset,"\n",type(sentence), type(sample_dataset))
78
+
79
+ score = selfcheck_nli.predict(
80
+ sentences = sentence1, # list of sentences
81
+ sampled_passages = sample_dataset, # list of sampled passages
82
+ )
83
+ print(score)
84
+ if (score > 0.35):
85
+ return f"The LLM is hallucinating with selfcheck nli score of {score}"
86
+ else:
87
+ return f"The LLM is generating true information with selfcheck nli score of {score}"
88
+
89
+ def func_selfcheckbert(sentence, s1, s2, s3):
90
+ sentence1 = [sentence[2:-2]]
91
+ sample_dataset = [s1, s2, s3]
92
+ sent_scores_bertscore = selfcheck_bertscore.predict(
93
+ sentences = sentence1, # list of sentences
94
+ sampled_passages = sample_dataset, # list of sampled passages
95
+ )
96
+ print(sent_scores_bertscore)
97
+ if (sent_scores_bertscore > 0.6):
98
+ return f"The LLM is hallucinating with selfcheck BERT score of {sent_scores_bertscore}"
99
+ else:
100
+ return f"The LLM is generating true information with selfcheck BERT score of {sent_scores_bertscore}"
101
+
102
+ def func_selfcheckngram(sentence, s1, s2, s3):
103
+ sentence1 = [sentence[2:-2]]
104
+ sample_dataset = [s1, s2, s3]
105
+ sentences_split = split_sent(sentence1[0])
106
+ print(sample_dataset)
107
+ print(sentences_split)
108
+ sent_scores_ngram = selfcheck_ngram.predict(
109
+ sentences = sentences_split,
110
+ passage = sentence1[0],
111
+ sampled_passages = sample_dataset,
112
+ )
113
+ print(sent_scores_ngram)
114
+ avg_max_neg_logprob = sent_scores_ngram['doc_level']['avg_max_neg_logprob']
115
+ if(avg_max_neg_logprob > 6):
116
+ return f"The LLM is hallucinating with selfcheck ngram score of {avg_max_neg_logprob}"
117
+ else:
118
+ return f"The LLM is generating true information with selfcheck ngram score of {avg_max_neg_logprob}"
119
+
120
+ return sent_scores_ngram
121
+
122
+ def generating_samples(prompt):
123
+ prompt_template=f"This is a Wikipedia passage on the topic of '{prompt}' in 100 words"
124
+ sample_response=generate_response(prompt_template)
125
+
126
+ s1, s2, s3 =create_dataset(prompt_template)
127
+
128
+ sentence=[sample_response]
129
+
130
+ return sentence, s1, s2, s3
131
+ with gr.Blocks() as demo:
132
+ gr.Markdown(
133
+ """
134
+ <h1> LLM Hackathon : LLM Hallucination Detector​ <h1>
135
+ """)
136
+ with gr.Column():
137
+ prompt = gr.Textbox(label="prompt")
138
+
139
+ with gr.Column():
140
+ sentence = gr.Textbox(label="response")
141
+ print(sentence)
142
+
143
+ with gr.Row():
144
+ s1 = gr.Textbox(label="sample1")
145
+ s2 = gr.Textbox(label="sample2")
146
+ s3 = gr.Textbox(label="sample3")
147
+
148
+ with gr.Column():
149
+ score= gr.Textbox(label="output")
150
+
151
+ output_response = gr.Button("Generate response")
152
+ output_response.click(
153
+ fn=generating_samples,
154
+ inputs=prompt,
155
+ outputs=[sentence, s1, s2, s3]
156
+ )
157
+ with gr.Row(equal_height=True):
158
+
159
+
160
+ self_check_nli_button = gr.Button("self check nli")
161
+ self_check_nli_button.click(
162
+ fn=func_selfcheck_nli,
163
+ inputs=[sentence, s1, s2, s3],
164
+ outputs=score
165
+ )
166
+
167
+ selfcheckbert_button = gr.Button("self check Bert")
168
+ selfcheckbert_button.click(
169
+ fn=func_selfcheckbert,
170
+ inputs=[sentence, s1, s2, s3],
171
+ outputs=score
172
+ )
173
+
174
+ self_check_ngram_button = gr.Button("self check ngram")
175
+ self_check_ngram_button.click(
176
+ fn=func_selfcheckngram,
177
+ inputs=[sentence, s1, s2, s3],
178
+ outputs=score
179
+ )
180
+
181
+ demo.launch()