File size: 2,947 Bytes
e108d52 26968e1 611eb64 e108d52 e2fe7c7 e108d52 97a58f0 e108d52 611eb64 e108d52 611eb64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import streamlit as st
from PIL import Image
import spacy
st.set_page_config(page_title="FACTOID: FACtual enTailment fOr hallucInation Detection", layout="wide")
st.title('Welcome to :blue[FACTOID] ')
st.header('FACTOID: FACtual enTailment fOr hallucInation Detection :blue[Web Demo]')
image = Image.open('image.png')
st.image(image, caption='Traditional Entailment vs Factual Entailment')
# List of sentences
sentence1 = [f"U.S. President Barack Obama declared that the U.S. will refrain from deploying troops in Ukraine."]
sentence2 = [f"Joe Biden said we’d not send U.S. troops to fight Russian troops in Ukraine, but we would provide robust military assistance and try to unify the Western world against Russia’s aggression."]
# Create a dropdown menu
selected_sentence1 = st.selectbox("Select first sentence:", sentence1)
selected_sentence2 = st.selectbox("Select first sentence:", sentence2)
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model_name = "MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7"
tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=False)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
premise = sentence1
hypothesis = sentence2
input = tokenizer(premise, hypothesis, truncation=True, return_tensors="pt")
output = model(input["input_ids"].to(device)) # device = "cuda:0" or "cpu"
prediction = torch.softmax(output["logits"][0], -1).tolist()
label_names = ["support", "neutral", "refute"]
prediction = {name: round(float(pred) * 100, 1) for pred, name in zip(prediction, label_names)}
print(prediction)
from sentence_transformers import CrossEncoder
model1 = CrossEncoder('cross-encoder/nli-deberta-v3-xsmall')
scores1 = model.predict([(sentence1, sentence2)])
#Convert scores to labels
label_mapping = ['contradiction', 'entailment', 'neutral']
labels = [label_mapping[score_max] for score_max in scores1.argmax(axis=1)]
labels
def extract_person_names(sentence):
"""
Extract person names from a sentence using spaCy's named entity recognition.
Parameters:
sentence (str): Input sentence.
Returns:
list: List of person names extracted from the sentence.
"""
# Load English language model
nlp = spacy.load("en_core_web_sm")
# Process the sentence using spaCy
doc = nlp(sentence)
# Extract person names
person_names = [entity.text for entity in doc.ents if entity.label_ == 'PERSON']
return person_names[0]
person_name1 = extract_person_names(sentence1)
person_name2 = extract_person_names(sentence2)
st.write("Result:", prediction)
col1, col2 = st.beta_columns(2)
with col1:
st.write("Without Factual Entailment:",prediction)
with col2:
st.write("Factual Entailment:",labels)
st.write(f"{person_name1}::{person_name2}")
|