shainaraza commited on
Commit
7cda966
1 Parent(s): c385187

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -74
app.py CHANGED
@@ -1,24 +1,44 @@
1
  #%%writefile debias_app.py
2
  import streamlit as st
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline as tf_pipeline
4
  import transformers
 
5
  import torch
6
  import pandas as pd
7
 
8
-
9
  # Set the default device
 
 
 
 
 
 
 
 
 
 
 
 
10
 
 
 
 
 
 
 
 
11
 
 
 
 
12
  class BiasPipeline:
13
- def __init__(self):
14
- # Load models and tokenizers for bias detection
15
- self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
16
- self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
17
- self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-NER")
18
- self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-NER")
19
- self.classifier = tf_pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
20
- self.ner = tf_pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
21
-
22
 
23
  def clean_text(self, text):
24
  """Clean up the text by removing any redundant spaces."""
@@ -30,72 +50,11 @@ class BiasPipeline:
30
  ner_results = self.ner(texts)
31
  return classification_results, ner_results
32
 
33
-
34
- # Constants and Global Variables
35
- sys_message = "Task: Please just generate a bias-free version of the text provided, ensuring it's free from biases related to age, gender, politics, social nuances, or economic background, while keeping it roughly the same length as the original:"
36
- instruction = "Instruction: As a helpful, respectful and trustworthy debiasing assistant, your task is to receive a text and return its unbiased version, Don't add additional comment. Just return the un biased version of the input text:"
37
- # Model setup for debiasing
38
- debias_model = "newsmediabias/UnBIAS-LLama2-Debiaser-Chat-QLoRA"
39
- debias_tokenizer = AutoTokenizer.from_pretrained(debias_model)
40
- debias_pipeline = transformers.pipeline(
41
- "text-generation",
42
- model=debias_model,
43
- torch_dtype=torch.float16,
44
- device_map="auto",
45
- )
46
-
47
-
48
-
49
- def get_debiased_sequence(prompt):
50
- """
51
- Generate a debiased version of the provided text using the debiasing pipeline.
52
- Args:
53
- - prompt (str): Text to be debiased.
54
- Returns:
55
- - str: Debiased text.
56
- """
57
- input_text = f"<s> <<SYS>>{instruction}. {sys_message} <</SYS>> [INST]{prompt} [/INST]"
58
- sequences = debias_pipeline(
59
- input_text,
60
- do_sample=True,
61
- top_k=10,
62
- num_return_sequences=1,
63
- eos_token_id=debias_tokenizer.eos_token_id,
64
- max_length=len(prompt.split(" ")) + len(input_text.split(" ")) + 100, # Increased max_length
65
- )
66
- res = sequences[0]['generated_text']
67
- result_part = res.split('[/INST]')[-1]
68
- clean_result = ''.join(c for c in result_part if c.isprintable())
69
- cleaned_text = re_incomplete_sentence(clean_result.strip())
70
- return cleaned_text.strip()
71
-
72
 
73
  # Streamlit interface setup
74
  st.title('UnBIAS App')
75
- # Initialize the BiasPipeline
76
- bias_pipeline = BiasPipeline()
77
-
78
- # List of predefined test sentences
79
- test_sentences = [
80
- "Women are just too emotional to be leaders.",
81
- "All young people are lazy and addicted to their phones.",
82
- "People from that country are always dishonest and corrupt.",
83
- "Men are inherently better at science and math than women.",
84
- "The elderly are just burdens to society; they contribute nothing.",
85
- "All poor people are lazy and don't want to work.",
86
- "People who follow that religion are all terrorists.",
87
- "Immigrants are taking all our jobs and ruining the country.",
88
- "All wealthy people achieved success only through manipulation and deceit.",
89
- "People with tattoos are not professional.",
90
- "All politicians are liars and cannot be trusted.",
91
- "Blue-collar workers don't have any real skills.",
92
- "Anyone who doesn't attend college is unintelligent.",
93
- "Only people from the city are sophisticated.",
94
- "Rural folks have backward mindsets and don't understand progress.",
95
- "All artists are always broke and unreliable.",
96
- "Anyone from that region is uneducated and narrow-minded.",
97
- "People without jobs are simply not trying hard enough."
98
- ]
99
 
100
  # Dropdown to select a test sentence
101
  selected_sentence = st.selectbox("Choose a pre-loaded sentence to analyze and debias:", [""] + test_sentences)
 
1
  #%%writefile debias_app.py
2
  import streamlit as st
 
3
  import transformers
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline as tf_pipeline
5
  import torch
6
  import pandas as pd
7
 
 
8
  # Set the default device
9
+ torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
10
+
11
+ # Model Loader Class for Lazy Loading and Resource Sharing
12
+ class ModelLoader:
13
+ def __init__(self):
14
+ self.tokenizers = {}
15
+ self.models = {}
16
+
17
+ def load_tokenizer(self, model_name):
18
+ if model_name not in self.tokenizers:
19
+ self.tokenizers[model_name] = AutoTokenizer.from_pretrained(model_name)
20
+ return self.tokenizers[model_name]
21
 
22
+ def load_model(self, model_type, model_name):
23
+ if model_name not in self.models:
24
+ if model_type == "classification":
25
+ self.models[model_name] = AutoModelForSequenceClassification.from_pretrained(model_name).to(torch_device)
26
+ elif model_type == "token_classification":
27
+ self.models[model_name] = AutoModelForTokenClassification.from_pretrained(model_name).to(torch_device)
28
+ return self.models[model_name]
29
 
30
+ model_loader = ModelLoader()
31
+
32
+ # BiasPipeline for handling bias detection and cleaning
33
  class BiasPipeline:
34
+ def __init__(self, model_loader):
35
+ self.model_loader = model_loader
36
+ self.classifier_tokenizer = model_loader.load_tokenizer("newsmediabias/UnBIAS-classification-bert")
37
+ self.classifier_model = model_loader.load_model("classification", "newsmediabias/UnBIAS-classification-bert")
38
+ self.ner_tokenizer = model_loader.load_tokenizer("newsmediabias/UnBIAS-NER")
39
+ self.ner_model = model_loader.load_model("token_classification", "newsmediabias/UnBIAS-NER")
40
+ self.classifier = tf_pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer, device=0)
41
+ self.ner = tf_pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer, device=0)
 
42
 
43
  def clean_text(self, text):
44
  """Clean up the text by removing any redundant spaces."""
 
50
  ner_results = self.ner(texts)
51
  return classification_results, ner_results
52
 
53
+ # Initialize the BiasPipeline
54
+ bias_pipeline = BiasPipeline(model_loader)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  # Streamlit interface setup
57
  st.title('UnBIAS App')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  # Dropdown to select a test sentence
60
  selected_sentence = st.selectbox("Choose a pre-loaded sentence to analyze and debias:", [""] + test_sentences)