Minakshee25 commited on
Commit
9624119
·
verified ·
1 Parent(s): 943b8c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -35
app.py CHANGED
@@ -7,11 +7,11 @@ import gradio as gr
7
 
8
 
9
  # Load the English language model
10
- # nlp = spacy.load("en_core_web_sm")
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available
13
  selfcheck_bertscore = SelfCheckBERTScore(rescale_with_baseline=True)
14
- selfcheck_ngram = SelfCheckNgram(n=1) # n=1 means Unigram, n=2 means Bigram, etc.
15
 
16
  openai_key = os.getenv("OPENAI_API_KEY")
17
  resource_url = os.getenv("OPENAI_API_RESOURCEURL")
@@ -67,8 +67,8 @@ def create_dataset(prompt):
67
  s3 = generate_response_high_temp(prompt)
68
  return s1, s2, s3
69
 
70
- def split_sent(sentence):
71
- return [sent.text.strip() for sent in nlp(sentence).sents]
72
 
73
  def func_selfcheck_nli(sentence, s1, s2, s3):
74
  sentence1 = [sentence[2:-2]]
@@ -99,25 +99,25 @@ def func_selfcheckbert(sentence, s1, s2, s3):
99
  else:
100
  return f"The LLM is generating true information with selfcheck BERT score of {sent_scores_bertscore}"
101
 
102
- def func_selfcheckngram(sentence, s1, s2, s3):
103
- sentence1 = [sentence[2:-2]]
104
- sample_dataset = [s1, s2, s3]
105
- sentences_split = split_sent(sentence1[0])
106
- print(sample_dataset)
107
- print(sentences_split)
108
- sent_scores_ngram = selfcheck_ngram.predict(
109
- sentences = sentences_split,
110
- passage = sentence1[0],
111
- sampled_passages = sample_dataset,
112
- )
113
- print(sent_scores_ngram)
114
- avg_max_neg_logprob = sent_scores_ngram['doc_level']['avg_max_neg_logprob']
115
- if(avg_max_neg_logprob > 6):
116
- return f"The LLM is hallucinating with selfcheck ngram score of {avg_max_neg_logprob}"
117
- else:
118
- return f"The LLM is generating true information with selfcheck ngram score of {avg_max_neg_logprob}"
119
-
120
- return sent_scores_ngram
121
 
122
  def generating_samples(prompt):
123
  prompt_template=f"This is a Wikipedia passage on the topic of '{prompt}' in 100 words"
@@ -157,12 +157,12 @@ with gr.Blocks() as demo:
157
  with gr.Row(equal_height=True):
158
 
159
 
160
- # self_check_nli_button = gr.Button("self check nli")
161
- # self_check_nli_button.click(
162
- # fn=func_selfcheck_nli,
163
- # inputs=[sentence, s1, s2, s3],
164
- # outputs=score
165
- # )
166
 
167
  selfcheckbert_button = gr.Button("self check Bert")
168
  selfcheckbert_button.click(
@@ -171,11 +171,11 @@ with gr.Blocks() as demo:
171
  outputs=score
172
  )
173
 
174
- self_check_ngram_button = gr.Button("self check ngram")
175
- self_check_ngram_button.click(
176
- fn=func_selfcheckngram,
177
- inputs=[sentence, s1, s2, s3],
178
- outputs=score
179
- )
180
 
181
  demo.launch()
 
7
 
8
 
9
  # Load the English language model
10
+ nlp = spacy.load("en_core_web_sm")
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available
13
  selfcheck_bertscore = SelfCheckBERTScore(rescale_with_baseline=True)
14
+ # selfcheck_ngram = SelfCheckNgram(n=1) # n=1 means Unigram, n=2 means Bigram, etc.
15
 
16
  openai_key = os.getenv("OPENAI_API_KEY")
17
  resource_url = os.getenv("OPENAI_API_RESOURCEURL")
 
67
  s3 = generate_response_high_temp(prompt)
68
  return s1, s2, s3
69
 
70
+ # def split_sent(sentence):
71
+ # return [sent.text.strip() for sent in nlp(sentence).sents]
72
 
73
  def func_selfcheck_nli(sentence, s1, s2, s3):
74
  sentence1 = [sentence[2:-2]]
 
99
  else:
100
  return f"The LLM is generating true information with selfcheck BERT score of {sent_scores_bertscore}"
101
 
102
+ # def func_selfcheckngram(sentence, s1, s2, s3):
103
+ # sentence1 = [sentence[2:-2]]
104
+ # sample_dataset = [s1, s2, s3]
105
+ # sentences_split = split_sent(sentence1[0])
106
+ # print(sample_dataset)
107
+ # print(sentences_split)
108
+ # sent_scores_ngram = selfcheck_ngram.predict(
109
+ # sentences = sentences_split,
110
+ # passage = sentence1[0],
111
+ # sampled_passages = sample_dataset,
112
+ # )
113
+ # print(sent_scores_ngram)
114
+ # avg_max_neg_logprob = sent_scores_ngram['doc_level']['avg_max_neg_logprob']
115
+ # if(avg_max_neg_logprob > 6):
116
+ # return f"The LLM is hallucinating with selfcheck ngram score of {avg_max_neg_logprob}"
117
+ # else:
118
+ # return f"The LLM is generating true information with selfcheck ngram score of {avg_max_neg_logprob}"
119
+
120
+ # return sent_scores_ngram
121
 
122
  def generating_samples(prompt):
123
  prompt_template=f"This is a Wikipedia passage on the topic of '{prompt}' in 100 words"
 
157
  with gr.Row(equal_height=True):
158
 
159
 
160
+ self_check_nli_button = gr.Button("self check nli")
161
+ self_check_nli_button.click(
162
+ fn=func_selfcheck_nli,
163
+ inputs=[sentence, s1, s2, s3],
164
+ outputs=score
165
+ )
166
 
167
  selfcheckbert_button = gr.Button("self check Bert")
168
  selfcheckbert_button.click(
 
171
  outputs=score
172
  )
173
 
174
+ # self_check_ngram_button = gr.Button("self check ngram")
175
+ # self_check_ngram_button.click(
176
+ # fn=func_selfcheckngram,
177
+ # inputs=[sentence, s1, s2, s3],
178
+ # outputs=score
179
+ # )
180
 
181
  demo.launch()