File size: 4,151 Bytes
4bb8289
9476a50
3fa07b4
7cda966
3fa07b4
 
 
0451811
7cda966
 
 
 
 
 
 
 
 
 
 
 
0451811
7cda966
 
 
 
 
 
 
0451811
7cda966
 
 
7eca2e2
7cda966
 
 
 
 
 
6be538d
 
7eca2e2
 
 
 
 
 
 
 
 
 
 
7cda966
 
73cfee4
0451811
 
d1ac74e
0451811
 
 
 
 
380b45b
0451811
 
 
 
 
2b3dec6
 
 
 
460f5fd
0451811
 
 
 
9476a50
0451811
f199e05
 
460f5fd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#%%writefile debias_app.py
import streamlit as st
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline as tf_pipeline
import torch
import pandas as pd

# Set the default device
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Model Loader Class for Lazy Loading and Resource Sharing
class ModelLoader:
    def __init__(self):
        self.tokenizers = {}
        self.models = {}

    def load_tokenizer(self, model_name):
        if model_name not in self.tokenizers:
            self.tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name)
        return self.tokenizers[model_name]

    def load_model(self, model_type, model_name):
        if model_name not in self.models:
            if model_type == "classification":
                self.models[model_name] = AutoModelForSequenceClassification.from_pretrained(model_name).to(torch_device)
            elif model_type == "token_classification":
                self.models[model_name] = AutoModelForTokenClassification.from_pretrained(model_name).to(torch_device)
        return self.models[model_name]

model_loader = ModelLoader()

# BiasPipeline for handling bias detection and cleaning
class BiasPipeline:
    def __init__(self, model_loader):
        self.model_loader = model_loader
        self.classifier_tokenizer = model_loader.load_tokenizer("newsmediabias/UnBIAS-classification-bert")
        self.classifier_model = model_loader.load_model("classification", "newsmediabias/UnBIAS-classification-bert")
        self.ner_tokenizer = model_loader.load_tokenizer("newsmediabias/UnBIAS-NER")
        self.ner_model = model_loader.load_model("token_classification", "newsmediabias/UnBIAS-NER")
        self.classifier = tf_pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer, device=torch_device)
        self.ner = tf_pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer, device=torch_device)

    def clean_text(self, text):
        """Clean up the text by removing any redundant spaces."""
        return ' '.join(text.split())

    def process(self, texts):
        """Process texts to classify and find named entities."""
        classification_results = self.classifier(texts)
        ner_results = self.ner(texts)
        return classification_results, ner_results

# Initialize the BiasPipeline
bias_pipeline = BiasPipeline(model_loader)

# Streamlit interface setup
st.title('UnBIAS App')

# Dropdown to select a test sentence
selected_sentence = st.selectbox("Choose a pre-loaded sentence to analyze and debias:", [""] + test_sentences)

# Text area for custom input
input_text = st.text_area("Or enter your own text to analyze and debias:", height=150)

if st.button("Analyze and Debias Text"):
    text_to_process = selected_sentence if selected_sentence else input_text
    if text_to_process:
        cleaned_text = bias_pipeline.clean_text(text_to_process)
        classification_results, ner_results = bias_pipeline.process(cleaned_text)
        label = classification_results[0]['label']
        score = classification_results[0]['score']
        st.write(f"**Classification:** {label} (Confidence: {score:.2f})")
        biased_words = [result['word'] for result in ner_results if result['entity'].startswith('B-BIAS')]
        st.write("**Biased Words Identified:**", ", ".join(biased_words))
        # Debias the text
        debiased_text = get_debiased_sequence(cleaned_text)
        st.write("## Debiased Text:")
        st.write(debiased_text)
    else:
        st.write("Please enter some text to analyze and debias or select a pre-loaded sentence.")

# Disclaimer
st.info("Disclaimer: Please note that while this tool aims to identify and highlight biased language, no automated system is perfect. The detection of bias depends on various factors, including the context, the training data used for the models, and the inherent limitations of natural language processing technologies. As such, some biases may not be detected, and all results should be reviewed critically by human users.")