File size: 3,094 Bytes
be61171
9476a50
7eca2e2
2b3dec6
7eca2e2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9476a50
 
36d3b4c
380b45b
 
 
2b3dec6
380b45b
 
 
 
 
 
7eca2e2
9476a50
7eca2e2
 
2b3dec6
 
 
 
 
 
 
 
9476a50
7eca2e2
f199e05
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#%%writefile debias_app.py
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline
import pandas as pd

# Define the BiasPipeline class with text processing methods
class BiasPipeline:
    def __init__(self):
        # Load models and tokenizers
        self.load_resources()

    def load_resources(self):
        """Load models and tokenizers."""
        self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
        self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")

        self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
        self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")

        self.classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
        self.ner = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)

    def clean_text(self, text):
        """Clean up the text by removing any redundant spaces."""
        return ' '.join(text.split())

    def process(self, texts):
        """Process texts to classify and find named entities."""
        classification_results = self.classifier(texts)
        ner_results = self.ner(texts)
        return classification_results, ner_results

# Initialize the BiasPipeline
pipeline = BiasPipeline()

# Streamlit interface
st.title('UnBIAS App')
# List of preloaded example sentences
example_sentences = [
    "Women are just too emotional to be leaders.",
    # More examples...
]

# Dropdown for selecting an example sentence or entering your own
selected_sentence = st.selectbox("Choose an example or type your own below:", [""] + example_sentences)
input_text = st.text_area("Enter text:", selected_sentence, height=150)

if st.button("Process Text"):
    if input_text:
        cleaned_text = pipeline.clean_text(input_text)
        classification_results, ner_results = pipeline.process(cleaned_text)
        label = classification_results[0]['label']
        score = classification_results[0]['score']
        st.write(f"**Classification:** {label} (Confidence: {score:.2f})")

        # Extract biased words from NER results
        biased_words = [result['word'] for result in ner_results if result['entity'].startswith('B-BIAS')]
        st.write("**Biased Words Identified:**")
        st.write(", ".join(biased_words))
    else:
        st.write("Please enter some text to process.")

# Disclaimer
st.info("Disclaimer: Please note that while this tool aims to identify and highlight biased language, no automated system is perfect. \\
The detection of bias depends on various factors, including the context, the training data used for the models, \\
and the inherent limitations of natural language processing technologies. As such, some biases may not be detected, \\
and all results should be reviewed critically by human users.")