|
|
|
import streamlit as st |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline |
|
import pandas as pd |
|
|
|
|
|
class BiasPipeline: |
|
def __init__(self): |
|
|
|
self.load_resources() |
|
|
|
def load_resources(self): |
|
"""Load models and tokenizers.""" |
|
self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert") |
|
self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert") |
|
|
|
self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition") |
|
self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition") |
|
|
|
self.classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer) |
|
self.ner = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer) |
|
|
|
def clean_text(self, text): |
|
"""Clean up the text by removing any redundant spaces.""" |
|
return ' '.join(text.split()) |
|
|
|
def process(self, texts): |
|
"""Process texts to classify and find named entities.""" |
|
classification_results = self.classifier(texts) |
|
ner_results = self.ner(texts) |
|
return classification_results, ner_results |
|
|
|
|
|
pipeline = BiasPipeline() |
|
|
|
|
|
st.title('UnBIAS App') |
|
|
|
example_sentences = [ |
|
"Women are just too emotional to be leaders.", |
|
|
|
] |
|
|
|
|
|
selected_sentence = st.selectbox("Choose an example or type your own below:", [""] + example_sentences) |
|
input_text = st.text_area("Enter text:", selected_sentence, height=150) |
|
|
|
if st.button("Process Text"): |
|
if input_text: |
|
cleaned_text = pipeline.clean_text(input_text) |
|
classification_results, ner_results = pipeline.process(cleaned_text) |
|
label = classification_results[0]['label'] |
|
score = classification_results[0]['score'] |
|
st.write(f"**Classification:** {label} (Confidence: {score:.2f})") |
|
|
|
|
|
biased_words = [result['word'] for result in ner_results if result['entity'].startswith('B-BIAS')] |
|
st.write("**Biased Words Identified:**") |
|
st.write(", ".join(biased_words)) |
|
else: |
|
st.write("Please enter some text to process.") |
|
|
|
|
|
st.info("Disclaimer: Please note that while this tool aims to identify and highlight biased language, no automated system is perfect. \\ |
|
The detection of bias depends on various factors, including the context, the training data used for the models, \\ |
|
and the inherent limitations of natural language processing technologies. As such, some biases may not be detected, \\ |
|
and all results should be reviewed critically by human users.") |
|
|