Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,11 +7,11 @@ import gradio as gr
|
|
7 |
|
8 |
|
9 |
# Load the English language model
|
10 |
-
|
11 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
12 |
selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available
|
13 |
selfcheck_bertscore = SelfCheckBERTScore(rescale_with_baseline=True)
|
14 |
-
selfcheck_ngram = SelfCheckNgram(n=1) # n=1 means Unigram, n=2 means Bigram, etc.
|
15 |
|
16 |
openai_key = os.getenv("OPENAI_API_KEY")
|
17 |
resource_url = os.getenv("OPENAI_API_RESOURCEURL")
|
@@ -67,8 +67,8 @@ def create_dataset(prompt):
|
|
67 |
s3 = generate_response_high_temp(prompt)
|
68 |
return s1, s2, s3
|
69 |
|
70 |
-
def split_sent(sentence):
|
71 |
-
|
72 |
|
73 |
def func_selfcheck_nli(sentence, s1, s2, s3):
|
74 |
sentence1 = [sentence[2:-2]]
|
@@ -99,25 +99,25 @@ def func_selfcheckbert(sentence, s1, s2, s3):
|
|
99 |
else:
|
100 |
return f"The LLM is generating true information with selfcheck BERT score of {sent_scores_bertscore}"
|
101 |
|
102 |
-
def func_selfcheckngram(sentence, s1, s2, s3):
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
|
122 |
def generating_samples(prompt):
|
123 |
prompt_template=f"This is a Wikipedia passage on the topic of '{prompt}' in 100 words"
|
@@ -157,12 +157,12 @@ with gr.Blocks() as demo:
|
|
157 |
with gr.Row(equal_height=True):
|
158 |
|
159 |
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
|
167 |
selfcheckbert_button = gr.Button("self check Bert")
|
168 |
selfcheckbert_button.click(
|
@@ -171,11 +171,11 @@ with gr.Blocks() as demo:
|
|
171 |
outputs=score
|
172 |
)
|
173 |
|
174 |
-
self_check_ngram_button = gr.Button("self check ngram")
|
175 |
-
self_check_ngram_button.click(
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
)
|
180 |
|
181 |
demo.launch()
|
|
|
7 |
|
8 |
|
9 |
# Load the English language model
|
10 |
+
nlp = spacy.load("en_core_web_sm")
|
11 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
12 |
selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available
|
13 |
selfcheck_bertscore = SelfCheckBERTScore(rescale_with_baseline=True)
|
14 |
+
# selfcheck_ngram = SelfCheckNgram(n=1) # n=1 means Unigram, n=2 means Bigram, etc.
|
15 |
|
16 |
openai_key = os.getenv("OPENAI_API_KEY")
|
17 |
resource_url = os.getenv("OPENAI_API_RESOURCEURL")
|
|
|
67 |
s3 = generate_response_high_temp(prompt)
|
68 |
return s1, s2, s3
|
69 |
|
70 |
+
# def split_sent(sentence):
|
71 |
+
# return [sent.text.strip() for sent in nlp(sentence).sents]
|
72 |
|
73 |
def func_selfcheck_nli(sentence, s1, s2, s3):
|
74 |
sentence1 = [sentence[2:-2]]
|
|
|
99 |
else:
|
100 |
return f"The LLM is generating true information with selfcheck BERT score of {sent_scores_bertscore}"
|
101 |
|
102 |
+
# def func_selfcheckngram(sentence, s1, s2, s3):
|
103 |
+
# sentence1 = [sentence[2:-2]]
|
104 |
+
# sample_dataset = [s1, s2, s3]
|
105 |
+
# sentences_split = split_sent(sentence1[0])
|
106 |
+
# print(sample_dataset)
|
107 |
+
# print(sentences_split)
|
108 |
+
# sent_scores_ngram = selfcheck_ngram.predict(
|
109 |
+
# sentences = sentences_split,
|
110 |
+
# passage = sentence1[0],
|
111 |
+
# sampled_passages = sample_dataset,
|
112 |
+
# )
|
113 |
+
# print(sent_scores_ngram)
|
114 |
+
# avg_max_neg_logprob = sent_scores_ngram['doc_level']['avg_max_neg_logprob']
|
115 |
+
# if(avg_max_neg_logprob > 6):
|
116 |
+
# return f"The LLM is hallucinating with selfcheck ngram score of {avg_max_neg_logprob}"
|
117 |
+
# else:
|
118 |
+
# return f"The LLM is generating true information with selfcheck ngram score of {avg_max_neg_logprob}"
|
119 |
+
|
120 |
+
# return sent_scores_ngram
|
121 |
|
122 |
def generating_samples(prompt):
|
123 |
prompt_template=f"This is a Wikipedia passage on the topic of '{prompt}' in 100 words"
|
|
|
157 |
with gr.Row(equal_height=True):
|
158 |
|
159 |
|
160 |
+
self_check_nli_button = gr.Button("self check nli")
|
161 |
+
self_check_nli_button.click(
|
162 |
+
fn=func_selfcheck_nli,
|
163 |
+
inputs=[sentence, s1, s2, s3],
|
164 |
+
outputs=score
|
165 |
+
)
|
166 |
|
167 |
selfcheckbert_button = gr.Button("self check Bert")
|
168 |
selfcheckbert_button.click(
|
|
|
171 |
outputs=score
|
172 |
)
|
173 |
|
174 |
+
# self_check_ngram_button = gr.Button("self check ngram")
|
175 |
+
# self_check_ngram_button.click(
|
176 |
+
# fn=func_selfcheckngram,
|
177 |
+
# inputs=[sentence, s1, s2, s3],
|
178 |
+
# outputs=score
|
179 |
+
# )
|
180 |
|
181 |
demo.launch()
|