Spaces:

newsmediabias
/

UnBIAS

App Files Files Community

shainaraza commited on Apr 27

Commit

73cfee4

•

1 Parent(s): c5aa009

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -4

app.py CHANGED Viewed

@@ -13,10 +13,10 @@ class BiasPipeline:
         self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
         self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
-        self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
-        self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
-        #self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-NER")
-        #self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-NER")
         self.classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
         self.ner = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
@@ -31,6 +31,61 @@ class BiasPipeline:
         ner_results = self.ner(texts)
         return classification_results, ner_results
 # Initialize the BiasPipeline
 pipeline = BiasPipeline()

         self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
         self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
+        #self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
+        #self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
+        self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-NER")
+        self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-NER")
         self.classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
         self.ner = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
         ner_results = self.ner(texts)
         return classification_results, ner_results
+# Model setup for debiasing
+debias_model = "newsmediabias/UnBIAS-LLama2-Debiaser-Chat-QLoRA"
+debias_tokenizer = AutoTokenizer.from_pretrained(debias_model)
+debias_pipeline = transformers.pipeline(
+    "text-generation",
+    model=debias_model,
+    torch_dtype=torch.float16,
+    device_map="auto",
+)
+# Instruction for debiasing
+instruction = ("Instruction: As a helpful, respectful and trustworthy debiasing assistant, your "
+               "task is to receive a text and return its unbiased version, without adding any unrelated content "
+               "or additional outputs.")
+def get_debiased_sequence(prompt):
+    """Generate a debiased version of the provided text using the debiasing pipeline."""
+    instruction_prefix = "<s> <<SYS>> {instruction} <</SYS>> [INST]".format(instruction=instruction)
+    instruction_suffix = "[/INST]</s>"
+    full_input_text = f"{instruction_prefix}{prompt}{instruction_suffix}"
+    # Tokenize the full input text to calculate its length in tokens
+    input_tokens = debias_tokenizer.encode(full_input_text)
+    # Ensure max_length is greater than the number of input tokens
+    max_length = len(input_tokens) + 50  # Add a buffer to accommodate generation without truncation
+    try:
+        sequences = debias_pipeline(
+            full_input_text,
+            do_sample=True,
+            top_k=10,
+            num_return_sequences=1,
+            eos_token_id=debias_tokenizer.eos_token_id,
+            max_length=max_length,  # Updated to use calculated max_length
+        )
+        if sequences:
+            res = sequences[0]['generated_text']
+            # Assuming the response also includes the [/INST] tag, split and extract after this tag
+            result_part = res.split('[/INST]')[-1]
+            clean_result = ''.join(c for c in result_part if c.isprintable())
+            return clean_result.strip()
+    except RuntimeError as e:
+        if 'CUDA out of memory' in str(e):
+            torch.cuda.empty_cache()  # Try clearing cache to free up memory
+            return "Error: Out of memory. Please try again with shorter input or less complex instructions."
+        else:
+            raise e  # Re-raise the exception if it's not a memory error
+    return "No output generated. Check model configuration or input."
 # Initialize the BiasPipeline
 pipeline = BiasPipeline()