shainaraza commited on
Commit
7eca2e2
1 Parent(s): 36d3b4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -41
app.py CHANGED
@@ -1,47 +1,57 @@
 
1
  import streamlit as st
2
- from transformers import AutoTokenizer, pipeline
3
- import transformers
4
- import torch
5
- import pandas as pd
6
-
7
- # Model setup
8
- model = "newsmediabias/UnBIAS-LLama2-Debiaser-Chat-QLoRA"
9
- tokenizer = AutoTokenizer.from_pretrained(model)
10
- debias_pipeline = transformers.pipeline(
11
- "text-generation",
12
- model=model,
13
- torch_dtype=torch.float16,
14
- device_map="auto",
15
- )
16
-
17
- # Sample Instruction
18
- instruction = ("Instruction: As a helpful, respectful and trustworthy debiasing assistant, your "
19
- "task is to receive a text and return its unbiased version, without adding any unrelated content "
20
- "or additional outputs.")
21
-
22
- def get_debiased_sequence(prompt):
23
- """Generate a debiased version of the provided text using the debiasing pipeline."""
24
- input_text = f"<s> <<SYS>> {instruction} <</SYS>> [INST]{prompt} [/INST]"
25
- sequences = debias_pipeline(
26
- input_text,
27
- do_sample=True,
28
- top_k=10,
29
- num_return_sequences=1,
30
- eos_token_id=tokenizer.eos_token_id,
31
- max_length=len(prompt)+100,
32
- )
33
- res = sequences[0]['generated_text']
34
- result_part = res.split('[/INST]')[-1]
35
- clean_result = ''.join(c for c in result_part if c.isprintable())
36
- return clean_result.strip()
 
 
 
 
 
 
 
 
37
 
38
  # Streamlit interface
39
  st.title('UnBIAS App')
40
- input_text = st.text_area("Enter text to debias:", height=150)
41
- if st.button("Debias Text"):
42
  if input_text:
43
- debiased_text = get_debiased_sequence(input_text)
44
- st.write("Debiased Text:", debiased_text)
 
 
45
  else:
46
- st.write("Please enter some text to debias.")
47
-
 
1
+ %%writefile debias_app.py
2
  import streamlit as st
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForTokenClassification, pipeline
4
+
5
+ # Define the BiasPipeline class with text processing methods
6
+ class BiasPipeline:
7
+ def __init__(self):
8
+ # Load models and tokenizers
9
+ self.load_resources()
10
+
11
+ def load_resources(self):
12
+ """Load models and tokenizers."""
13
+ self.classifier_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-classification-bert")
14
+ self.classifier_model = AutoModelForSequenceClassification.from_pretrained("newsmediabias/UnBIAS-classification-bert")
15
+
16
+ self.ner_tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
17
+ self.ner_model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Named-Entity-Recognition")
18
+
19
+ self.classifier = pipeline("text-classification", model=self.classifier_model, tokenizer=self.classifier_tokenizer)
20
+ self.ner = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
21
+
22
+ def clean_text(self, text):
23
+ """Clean up the text by removing any redundant spaces."""
24
+ return ' '.join(text.split())
25
+
26
+ def complete_sentence(self, text):
27
+ """If the text ends mid-sentence, remove all words after the last full stop."""
28
+ sentences = text.split(". ")
29
+ if len(sentences) > 1 and not sentences[-1].endswith("."):
30
+ return ". ".join(sentences[:-1]) + "."
31
+ return text
32
+
33
+ def create_token_limit(self, text):
34
+ words = text.split()
35
+ max_length = round(len(words) + 1.5 * len(words))
36
+ return max_length
37
+
38
+ def process(self, texts):
39
+ """Process texts to classify and find named entities."""
40
+ classification_results = self.classifier(texts)
41
+ ner_results = self.ner(texts)
42
+ return classification_results, ner_results
43
+
44
+ # Initialize the BiasPipeline
45
+ pipeline = BiasPipeline()
46
 
47
  # Streamlit interface
48
  st.title('UnBIAS App')
49
+ input_text = st.text_area("Enter text:", height=150)
50
+ if st.button("Process Text"):
51
  if input_text:
52
+ cleaned_text = pipeline.clean_text(input_text)
53
+ classification_results, ner_results = pipeline.process(cleaned_text)
54
+ st.write("Classification Results:", classification_results)
55
+ st.write("Named Entity Recognition Results:", ner_results)
56
  else:
57
+ st.write("Please enter some text to process.")