shainaraza
commited on
Commit
•
0451811
1
Parent(s):
3fa07b4
Update app.py
Browse files
app.py
CHANGED
@@ -6,24 +6,19 @@ import torch
|
|
6 |
import pandas as pd
|
7 |
|
8 |
|
9 |
-
#
|
|
|
|
|
10 |
class BiasPipeline:
|
11 |
def __init__(self):
|
12 |
-
# Load models and tokenizers
|
13 |
-
self.load_resources()
|
14 |
-
|
15 |
-
def load_resources(self):
|
16 |
-
"""Load models and tokenizers."""
|
17 |
self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
|
18 |
self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
|
19 |
-
|
20 |
-
#self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
|
21 |
-
#self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
|
22 |
self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-NER")
|
23 |
self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-NER")
|
24 |
-
|
25 |
-
self.
|
26 |
-
|
27 |
|
28 |
def clean_text(self, text):
|
29 |
"""Clean up the text by removing any redundant spaces."""
|
@@ -35,7 +30,6 @@ class BiasPipeline:
|
|
35 |
ner_results = self.ner(texts)
|
36 |
return classification_results, ner_results
|
37 |
|
38 |
-
|
39 |
# Model setup for debiasing
|
40 |
debias_model = "newsmediabias/UnBIAS-LLama2-Debiaser-Chat-QLoRA"
|
41 |
debias_tokenizer = AutoTokenizer.from_pretrained(debias_model)
|
@@ -89,13 +83,13 @@ def get_debiased_sequence(prompt):
|
|
89 |
|
90 |
return "No output generated. Check model configuration or input."
|
91 |
|
92 |
-
|
|
|
93 |
# Initialize the BiasPipeline
|
94 |
-
|
95 |
|
96 |
-
#
|
97 |
-
|
98 |
-
example_sentences = [
|
99 |
"Women are just too emotional to be leaders.",
|
100 |
"All young people are lazy and addicted to their phones.",
|
101 |
"People from that country are always dishonest and corrupt.",
|
@@ -116,20 +110,28 @@ example_sentences = [
|
|
116 |
"People without jobs are simply not trying hard enough."
|
117 |
]
|
118 |
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
121 |
|
122 |
-
if st.button("
|
123 |
-
if input_text
|
124 |
-
|
125 |
-
|
|
|
126 |
label = classification_results[0]['label']
|
127 |
score = classification_results[0]['score']
|
128 |
st.write(f"**Classification:** {label} (Confidence: {score:.2f})")
|
129 |
biased_words = [result['word'] for result in ner_results if result['entity'].startswith('B-BIAS')]
|
130 |
st.write("**Biased Words Identified:**", ", ".join(biased_words))
|
|
|
|
|
|
|
|
|
131 |
else:
|
132 |
-
st.write("Please enter some text to
|
133 |
|
134 |
# Disclaimer
|
135 |
st.info("Disclaimer: Please note that while this tool aims to identify and highlight biased language, no automated system is perfect. The detection of bias depends on various factors, including the context, the training data used for the models, and the inherent limitations of natural language processing technologies. As such, some biases may not be detected, and all results should be reviewed critically by human users.")
|
|
|
6 |
import pandas as pd
|
7 |
|
8 |
|
9 |
+
# Set the default device
|
10 |
+
|
11 |
+
|
12 |
class BiasPipeline:
|
13 |
def __init__(self):
|
14 |
+
# Load models and tokenizers for bias detection
|
|
|
|
|
|
|
|
|
15 |
self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
|
16 |
self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
|
|
|
|
|
|
|
17 |
self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-NER")
|
18 |
self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-NER")
|
19 |
+
self.classifier = tf_pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
|
20 |
+
self.ner = tf_pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
|
21 |
+
|
22 |
|
23 |
def clean_text(self, text):
|
24 |
"""Clean up the text by removing any redundant spaces."""
|
|
|
30 |
ner_results = self.ner(texts)
|
31 |
return classification_results, ner_results
|
32 |
|
|
|
33 |
# Model setup for debiasing
|
34 |
debias_model = "newsmediabias/UnBIAS-LLama2-Debiaser-Chat-QLoRA"
|
35 |
debias_tokenizer = AutoTokenizer.from_pretrained(debias_model)
|
|
|
83 |
|
84 |
return "No output generated. Check model configuration or input."
|
85 |
|
86 |
+
# Streamlit interface setup
|
87 |
+
st.title('UnBIAS App')
|
88 |
# Initialize the BiasPipeline
|
89 |
+
bias_pipeline = BiasPipeline()
|
90 |
|
91 |
+
# List of predefined test sentences
|
92 |
+
test_sentences = [
|
|
|
93 |
"Women are just too emotional to be leaders.",
|
94 |
"All young people are lazy and addicted to their phones.",
|
95 |
"People from that country are always dishonest and corrupt.",
|
|
|
110 |
"People without jobs are simply not trying hard enough."
|
111 |
]
|
112 |
|
113 |
+
# Dropdown to select a test sentence
|
114 |
+
selected_sentence = st.selectbox("Choose a pre-loaded sentence to analyze and debias:", [""] + test_sentences)
|
115 |
+
|
116 |
+
# Text area for custom input
|
117 |
+
input_text = st.text_area("Or enter your own text to analyze and debias:", height=150)
|
118 |
|
119 |
+
if st.button("Analyze and Debias Text"):
|
120 |
+
text_to_process = selected_sentence if selected_sentence else input_text
|
121 |
+
if text_to_process:
|
122 |
+
cleaned_text = bias_pipeline.clean_text(text_to_process)
|
123 |
+
classification_results, ner_results = bias_pipeline.process(cleaned_text)
|
124 |
label = classification_results[0]['label']
|
125 |
score = classification_results[0]['score']
|
126 |
st.write(f"**Classification:** {label} (Confidence: {score:.2f})")
|
127 |
biased_words = [result['word'] for result in ner_results if result['entity'].startswith('B-BIAS')]
|
128 |
st.write("**Biased Words Identified:**", ", ".join(biased_words))
|
129 |
+
# Debias the text
|
130 |
+
debiased_text = get_debiased_sequence(cleaned_text)
|
131 |
+
st.write("## Debiased Text:")
|
132 |
+
st.write(debiased_text)
|
133 |
else:
|
134 |
+
st.write("Please enter some text to analyze and debias or select a pre-loaded sentence.")
|
135 |
|
136 |
# Disclaimer
|
137 |
st.info("Disclaimer: Please note that while this tool aims to identify and highlight biased language, no automated system is perfect. The detection of bias depends on various factors, including the context, the training data used for the models, and the inherent limitations of natural language processing technologies. As such, some biases may not be detected, and all results should be reviewed critically by human users.")
|