Spaces:

Minakshee25
/

hallucination_detection

Sleeping

App Files Files Community

Minakshee25 commited on Apr 4, 2024

Commit

9624119

verified ·

1 Parent(s): 943b8c0

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -35

app.py CHANGED Viewed

@@ -7,11 +7,11 @@ import gradio as gr
 # Load the English language model
-# nlp = spacy.load("en_core_web_sm")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available
 selfcheck_bertscore = SelfCheckBERTScore(rescale_with_baseline=True)
-selfcheck_ngram = SelfCheckNgram(n=1) # n=1 means Unigram, n=2 means Bigram, etc.
 openai_key = os.getenv("OPENAI_API_KEY")
 resource_url = os.getenv("OPENAI_API_RESOURCEURL")
@@ -67,8 +67,8 @@ def create_dataset(prompt):
     s3 = generate_response_high_temp(prompt)
     return s1, s2, s3
-def split_sent(sentence):
-  return [sent.text.strip() for sent in nlp(sentence).sents]
 def func_selfcheck_nli(sentence, s1, s2, s3):
     sentence1 = [sentence[2:-2]]
@@ -99,25 +99,25 @@ def func_selfcheckbert(sentence, s1, s2, s3):
     else:
       return f"The LLM is generating true information with selfcheck BERT score of {sent_scores_bertscore}"
-def func_selfcheckngram(sentence, s1, s2, s3):
-    sentence1 = [sentence[2:-2]]
-    sample_dataset = [s1, s2, s3]
-    sentences_split = split_sent(sentence1[0])
-    print(sample_dataset)
-    print(sentences_split)
-    sent_scores_ngram = selfcheck_ngram.predict(
-      sentences = sentences_split,
-      passage = sentence1[0],
-      sampled_passages = sample_dataset,
-    )
-    print(sent_scores_ngram)
-    avg_max_neg_logprob = sent_scores_ngram['doc_level']['avg_max_neg_logprob']
-    if(avg_max_neg_logprob > 6):
-       return f"The LLM is hallucinating with selfcheck ngram score of {avg_max_neg_logprob}"
-    else:
-        return f"The LLM is generating true information with selfcheck ngram score of {avg_max_neg_logprob}"
-    return sent_scores_ngram
 def generating_samples(prompt):
     prompt_template=f"This is a Wikipedia passage on the topic of '{prompt}' in 100 words"
@@ -157,12 +157,12 @@ with gr.Blocks() as demo:
   with gr.Row(equal_height=True):
-    # self_check_nli_button = gr.Button("self check nli")
-    # self_check_nli_button.click(
-    #     fn=func_selfcheck_nli,
-    #     inputs=[sentence, s1, s2, s3],
-    #     outputs=score
-    # )
     selfcheckbert_button = gr.Button("self check Bert")
     selfcheckbert_button.click(
@@ -171,11 +171,11 @@ with gr.Blocks() as demo:
         outputs=score
     )
-    self_check_ngram_button = gr.Button("self check ngram")
-    self_check_ngram_button.click(
-        fn=func_selfcheckngram,
-        inputs=[sentence, s1, s2, s3],
-        outputs=score
-    )
 demo.launch()

 # Load the English language model
+nlp = spacy.load("en_core_web_sm")
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 selfcheck_nli = SelfCheckNLI(device=device) # set device to 'cuda' if GPU is available
 selfcheck_bertscore = SelfCheckBERTScore(rescale_with_baseline=True)
+# selfcheck_ngram = SelfCheckNgram(n=1) # n=1 means Unigram, n=2 means Bigram, etc.
 openai_key = os.getenv("OPENAI_API_KEY")
 resource_url = os.getenv("OPENAI_API_RESOURCEURL")
     s3 = generate_response_high_temp(prompt)
     return s1, s2, s3
+# def split_sent(sentence):
+#   return [sent.text.strip() for sent in nlp(sentence).sents]
 def func_selfcheck_nli(sentence, s1, s2, s3):
     sentence1 = [sentence[2:-2]]
     else:
       return f"The LLM is generating true information with selfcheck BERT score of {sent_scores_bertscore}"
+# def func_selfcheckngram(sentence, s1, s2, s3):
+#     sentence1 = [sentence[2:-2]]
+#     sample_dataset = [s1, s2, s3]
+#     sentences_split = split_sent(sentence1[0])
+#     print(sample_dataset)
+#     print(sentences_split)
+#     sent_scores_ngram = selfcheck_ngram.predict(
+#       sentences = sentences_split,
+#       passage = sentence1[0],
+#       sampled_passages = sample_dataset,
+#     )
+#     print(sent_scores_ngram)
+#     avg_max_neg_logprob = sent_scores_ngram['doc_level']['avg_max_neg_logprob']
+#     if(avg_max_neg_logprob > 6):
+#        return f"The LLM is hallucinating with selfcheck ngram score of {avg_max_neg_logprob}"
+#     else:
+#         return f"The LLM is generating true information with selfcheck ngram score of {avg_max_neg_logprob}"
+#     return sent_scores_ngram
 def generating_samples(prompt):
     prompt_template=f"This is a Wikipedia passage on the topic of '{prompt}' in 100 words"
   with gr.Row(equal_height=True):
+    self_check_nli_button = gr.Button("self check nli")
+    self_check_nli_button.click(
+        fn=func_selfcheck_nli,
+        inputs=[sentence, s1, s2, s3],
+        outputs=score
+    )
     selfcheckbert_button = gr.Button("self check Bert")
     selfcheckbert_button.click(
         outputs=score
     )
+    # self_check_ngram_button = gr.Button("self check ngram")
+    # self_check_ngram_button.click(
+    #     fn=func_selfcheckngram,
+    #     inputs=[sentence, s1, s2, s3],
+    #     outputs=score
+    # )
 demo.launch()