mskov commited on
Commit
d7bfcf2
·
1 Parent(s): 7dd4190

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -88
app.py CHANGED
@@ -37,8 +37,7 @@ emo_dict = {
37
  class_options = {
38
  "racism": ["racism", "hate speech", "bigotry", "racially targeted", "racial slur", "ethnic slur", "ethnic hate", "pro-white nationalism"],
39
  "LGBTQ+ hate": ["gay slur", "trans slur", "homophobic slur", "transphobia", "anti-LBGTQ+", "hate speech"],
40
- "sexually explicit": ["sexually explicit", "sexually coercive", "sexual exploitation", "vulgar", "raunchy", "sexist", "sexually demeaning", "sexual violence", "victim blaming"],
41
- "misophonia": ["chewing", "breathing", "mouthsounds", "popping", "sneezing", "yawning", "smacking", "sniffling", "panting"]
42
  }
43
 
44
  pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large")
@@ -72,99 +71,59 @@ def classify_toxicity(audio_file, text_input, classify_anxiety, emo_class, expli
72
  transcribed_text = pipe(audio_file)["text"]
73
  else:
74
  transcribed_text = text_input
75
- if classify_anxiety != "misophonia":
76
- print("emo_class ", emo_class, "explitive select", explitive_selection)
77
 
78
- ## SLIDER ##
79
- threshold = slider_logic(slider)
80
-
81
- #------- explitive call ---------------
82
-
83
- if replace_explitives != None and emo_class == None:
84
- transcribed_text = replace_explitives.sub_explitives(transcribed_text, explitive_selection)
85
-
86
- #### Toxicity Classifier ####
87
-
88
- toxicity_module = evaluate.load("toxicity", "facebook/roberta-hate-speech-dynabench-r4-target")
89
- #toxicity_module = evaluate.load("toxicity", 'DaNLP/da-electra-hatespeech-detection', module_type="measurement")
90
-
91
- toxicity_results = toxicity_module.compute(predictions=[transcribed_text])
92
-
93
- toxicity_score = toxicity_results["toxicity"][0]
94
- print(toxicity_score)
95
- # emo call
96
- if emo_class != None:
97
- classify_emotion(audio_file)
98
-
99
- #### Text classification #####
100
 
101
- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
102
 
103
- text_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
 
104
 
105
- sequence_to_classify = transcribed_text
106
- print(classify_anxiety, class_options)
107
- candidate_labels = class_options.get(classify_anxiety, [])
108
- # classification_output = classifier(sequence_to_classify, candidate_labels, multi_label=False)
109
- classification_output = text_classifier(sequence_to_classify, candidate_labels, multi_label=True)
110
- print("class output ", type(classification_output))
111
- # classification_df = pd.DataFrame.from_dict(classification_output)
112
- print("keys ", classification_output.keys())
113
 
114
- # formatted_classification_output = "\n".join([f"{key}: {value}" for key, value in classification_output.items()])
115
- # label_score_pairs = [(label, score) for label, score in zip(classification_output['labels'], classification_output['scores'])]
116
- label_score_dict = {label: score for label, score in zip(classification_output['labels'], classification_output['scores'])}
117
-
118
-
119
 
120
-
121
-
122
- # plot.update(x=classification_df["labels"], y=classification_df["scores"])
123
- if toxicity_score > threshold:
124
- print("threshold exceeded!! Launch intervention")
125
- affirm = positive_affirmations()
126
- else:
127
- affirm = ""
128
 
129
- return toxicity_score, label_score_dict, transcribed_text, affirm
130
- # return f"Toxicity Score ({available_models[selected_model]}): {toxicity_score:.4f}"
131
- else:
132
- threshold = slider_logic(slider)
133
- model = whisper.load_model("large")
134
- # model = model_cache[model_name]
135
- # class_names = classify_anxiety.split(",")
136
- class_names_list = class_options.get(classify_anxiety, [])
137
- class_str = ""
138
- for elm in class_names_list:
139
- class_str += elm + ","
140
- #class_names = class_names_temp.split(",")
141
- class_names = class_str.split(",")
142
- print("class names ", class_names, "classify_anxiety ", classify_anxiety)
143
-
144
- tokenizer = get_tokenizer("large")
145
- # tokenizer= WhisperTokenizer.from_pretrained("openai/whisper-large")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
- internal_lm_average_logprobs = classify.calculate_internal_lm_average_logprobs(
148
- model=model,
149
- class_names=class_names,
150
- # class_names=classify_anxiety,
151
- tokenizer=tokenizer,
152
- )
153
- audio_features = classify.calculate_audio_features(audio_file, model)
154
- average_logprobs = classify.calculate_average_logprobs(
155
- model=model,
156
- audio_features=audio_features,
157
- class_names=class_names,
158
- tokenizer=tokenizer,
159
- )
160
- average_logprobs -= internal_lm_average_logprobs
161
- scores = average_logprobs.softmax(-1).tolist()
162
- holder1 = {class_name: score for class_name, score in zip(class_names, scores)}
163
- # miso_label_dict = {label: score for label, score in classify_anxiety[0].items()}
164
- holder2 = ""
165
- holder3= " "
166
- return holder1, holder1, holder2, holder3
167
-
168
  def positive_affirmations():
169
  affirmations = [
170
  "I have survived my anxiety before and I will survive again now",
@@ -178,7 +137,7 @@ def positive_affirmations():
178
  with gr.Blocks() as iface:
179
  show_state = gr.State([])
180
  with gr.Column():
181
- anxiety_class = gr.Radio(["racism", "LGBTQ+ hate", "sexually explicit", "misophonia"])
182
  explit_preference = gr.Radio(choices=["N-Word", "B-Word", "All Explitives"], label="Words to omit from general anxiety classes", info="certain words may be acceptible within certain contects for given groups of people, and some people may be unbothered by explitives broadly speaking.")
183
  emo_class = gr.Radio(choices=["negaitve emotionality"], label="label", info="Select if you would like explitives to be considered anxiety-indiucing in the case of anger/ negative emotionality.")
184
  sense_slider = gr.Slider(minimum=1, maximum=5, step=1.0, label="How readily do you want the tool to intervene? 1 = in extreme cases and 5 = at every opportunity")
 
37
  class_options = {
38
  "racism": ["racism", "hate speech", "bigotry", "racially targeted", "racial slur", "ethnic slur", "ethnic hate", "pro-white nationalism"],
39
  "LGBTQ+ hate": ["gay slur", "trans slur", "homophobic slur", "transphobia", "anti-LBGTQ+", "hate speech"],
40
+ "sexually explicit": ["sexually explicit", "sexually coercive", "sexual exploitation", "vulgar", "raunchy", "sexist", "sexually demeaning", "sexual violence", "victim blaming"]
 
41
  }
42
 
43
  pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large")
 
71
  transcribed_text = pipe(audio_file)["text"]
72
  else:
73
  transcribed_text = text_input
 
 
74
 
75
+ print("emo_class ", emo_class, "explitive select", explitive_selection)
76
+
77
+ ## SLIDER ##
78
+ threshold = slider_logic(slider)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ #------- explitive call ---------------
81
 
82
+ if replace_explitives != None and emo_class == None:
83
+ transcribed_text = replace_explitives.sub_explitives(transcribed_text, explitive_selection)
84
 
85
+ #### Toxicity Classifier ####
 
 
 
 
 
 
 
86
 
87
+ toxicity_module = evaluate.load("toxicity", "facebook/roberta-hate-speech-dynabench-r4-target")
88
+ #toxicity_module = evaluate.load("toxicity", 'DaNLP/da-electra-hatespeech-detection', module_type="measurement")
 
 
 
89
 
90
+ toxicity_results = toxicity_module.compute(predictions=[transcribed_text])
 
 
 
 
 
 
 
91
 
92
+ toxicity_score = toxicity_results["toxicity"][0]
93
+ print(toxicity_score)
94
+ # emo call
95
+ if emo_class != None:
96
+ classify_emotion(audio_file)
97
+
98
+ #### Text classification #####
99
+
100
+ device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
101
+
102
+ text_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
103
+
104
+ sequence_to_classify = transcribed_text
105
+ print(classify_anxiety, class_options)
106
+ candidate_labels = class_options.get(classify_anxiety, [])
107
+ # classification_output = classifier(sequence_to_classify, candidate_labels, multi_label=False)
108
+ classification_output = text_classifier(sequence_to_classify, candidate_labels, multi_label=True)
109
+ print("class output ", type(classification_output))
110
+ # classification_df = pd.DataFrame.from_dict(classification_output)
111
+ print("keys ", classification_output.keys())
112
+
113
+ # formatted_classification_output = "\n".join([f"{key}: {value}" for key, value in classification_output.items()])
114
+ # label_score_pairs = [(label, score) for label, score in zip(classification_output['labels'], classification_output['scores'])]
115
+ label_score_dict = {label: score for label, score in zip(classification_output['labels'], classification_output['scores'])}
116
+
117
+ # plot.update(x=classification_df["labels"], y=classification_df["scores"])
118
+ if toxicity_score > threshold:
119
+ print("threshold exceeded!! Launch intervention")
120
+ affirm = positive_affirmations()
121
+ else:
122
+ affirm = ""
123
+
124
+ return toxicity_score, label_score_dict, transcribed_text, affirm
125
+ # return f"Toxicity Score ({available_models[selected_model]}): {toxicity_score:.4f}"
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  def positive_affirmations():
128
  affirmations = [
129
  "I have survived my anxiety before and I will survive again now",
 
137
  with gr.Blocks() as iface:
138
  show_state = gr.State([])
139
  with gr.Column():
140
+ anxiety_class = gr.Radio(["racism", "LGBTQ+ hate", "sexually explicit"])
141
  explit_preference = gr.Radio(choices=["N-Word", "B-Word", "All Explitives"], label="Words to omit from general anxiety classes", info="certain words may be acceptible within certain contects for given groups of people, and some people may be unbothered by explitives broadly speaking.")
142
  emo_class = gr.Radio(choices=["negaitve emotionality"], label="label", info="Select if you would like explitives to be considered anxiety-indiucing in the case of anger/ negative emotionality.")
143
  sense_slider = gr.Slider(minimum=1, maximum=5, step=1.0, label="How readily do you want the tool to intervene? 1 = in extreme cases and 5 = at every opportunity")