shainaraza
commited on
Commit
•
73cfee4
1
Parent(s):
c5aa009
Update app.py
Browse files
app.py
CHANGED
@@ -13,10 +13,10 @@ class BiasPipeline:
|
|
13 |
self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
|
14 |
self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
|
15 |
|
16 |
-
self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
|
17 |
-
self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
|
18 |
-
|
19 |
-
|
20 |
|
21 |
self.classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
|
22 |
self.ner = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
|
@@ -31,6 +31,61 @@ class BiasPipeline:
|
|
31 |
ner_results = self.ner(texts)
|
32 |
return classification_results, ner_results
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# Initialize the BiasPipeline
|
35 |
pipeline = BiasPipeline()
|
36 |
|
|
|
13 |
self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
|
14 |
self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
|
15 |
|
16 |
+
#self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
|
17 |
+
#self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
|
18 |
+
self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-NER")
|
19 |
+
self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-NER")
|
20 |
|
21 |
self.classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
|
22 |
self.ner = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
|
|
|
31 |
ner_results = self.ner(texts)
|
32 |
return classification_results, ner_results
|
33 |
|
34 |
+
|
35 |
+
# Model setup for debiasing
|
36 |
+
debias_model = "newsmediabias/UnBIAS-LLama2-Debiaser-Chat-QLoRA"
|
37 |
+
debias_tokenizer = AutoTokenizer.from_pretrained(debias_model)
|
38 |
+
debias_pipeline = transformers.pipeline(
|
39 |
+
"text-generation",
|
40 |
+
model=debias_model,
|
41 |
+
torch_dtype=torch.float16,
|
42 |
+
device_map="auto",
|
43 |
+
)
|
44 |
+
|
45 |
+
# Instruction for debiasing
|
46 |
+
instruction = ("Instruction: As a helpful, respectful and trustworthy debiasing assistant, your "
|
47 |
+
"task is to receive a text and return its unbiased version, without adding any unrelated content "
|
48 |
+
"or additional outputs.")
|
49 |
+
|
50 |
+
|
51 |
+
def get_debiased_sequence(prompt):
|
52 |
+
"""Generate a debiased version of the provided text using the debiasing pipeline."""
|
53 |
+
instruction_prefix = "<s> <<SYS>> {instruction} <</SYS>> [INST]".format(instruction=instruction)
|
54 |
+
instruction_suffix = "[/INST]</s>"
|
55 |
+
full_input_text = f"{instruction_prefix}{prompt}{instruction_suffix}"
|
56 |
+
|
57 |
+
# Tokenize the full input text to calculate its length in tokens
|
58 |
+
input_tokens = debias_tokenizer.encode(full_input_text)
|
59 |
+
|
60 |
+
# Ensure max_length is greater than the number of input tokens
|
61 |
+
max_length = len(input_tokens) + 50 # Add a buffer to accommodate generation without truncation
|
62 |
+
|
63 |
+
try:
|
64 |
+
sequences = debias_pipeline(
|
65 |
+
full_input_text,
|
66 |
+
do_sample=True,
|
67 |
+
top_k=10,
|
68 |
+
num_return_sequences=1,
|
69 |
+
eos_token_id=debias_tokenizer.eos_token_id,
|
70 |
+
max_length=max_length, # Updated to use calculated max_length
|
71 |
+
)
|
72 |
+
|
73 |
+
if sequences:
|
74 |
+
res = sequences[0]['generated_text']
|
75 |
+
# Assuming the response also includes the [/INST] tag, split and extract after this tag
|
76 |
+
result_part = res.split('[/INST]')[-1]
|
77 |
+
clean_result = ''.join(c for c in result_part if c.isprintable())
|
78 |
+
return clean_result.strip()
|
79 |
+
except RuntimeError as e:
|
80 |
+
if 'CUDA out of memory' in str(e):
|
81 |
+
torch.cuda.empty_cache() # Try clearing cache to free up memory
|
82 |
+
return "Error: Out of memory. Please try again with shorter input or less complex instructions."
|
83 |
+
else:
|
84 |
+
raise e # Re-raise the exception if it's not a memory error
|
85 |
+
|
86 |
+
return "No output generated. Check model configuration or input."
|
87 |
+
|
88 |
+
|
89 |
# Initialize the BiasPipeline
|
90 |
pipeline = BiasPipeline()
|
91 |
|