File size: 2,088 Bytes
7f8de92
0fe2dfa
 
0934d02
d077642
70f481c
0acc1ca
3754612
0f64e73
5d5484f
d077642
0f64e73
17bb76c
854d0de
a2a9b24
 
0934d02
17bb76c
 
be6580d
a2a9b24
 
 
34b533c
17bb76c
 
be6580d
5d5484f
61af1a8
5d5484f
 
 
de6a24f
c5ececb
 
 
 
 
b6d255b
c5ececb
a169dec
 
 
2fda257
a169dec
 
 
bed48c7
c5ececb
 
 
de6a24f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import streamlit as st
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification

st.title("Milestone #2 offensive statement prediction with pre-trained models")
st.write("in this basic demo you can select a model to judge whether or not the text below is offensive")
text = "I Hate cocksuckers"
st.write(text)

options = ["zero-shot-classification", "cardiffnlp/twitter-roberta-base-offensive", "Greys/milestonemodel"]
model = st.selectbox("Select a  pre-trained model", options)

con = st.button("Submit")
if con:
  if model == "zero-shot-classification":
    classifier = pipeline(model)
    res = classifier(text, candidate_labels=["offensive"])
    label = res['labels'][0]
    score = res['scores'][0]
    st.write(f"Prediction: {label}, Score: {score*100}% chance")
  
  if model == "cardiffnlp/twitter-roberta-base-offensive":
    classifier = pipeline('text-classification', model='cardiffnlp/twitter-roberta-base-offensive', tokenizer='cardiffnlp/twitter-roberta-base-offensive')
    result = classifier(text)
    label = result[0]['label']
    score = result[0]['score']
    st.write(f"Prediction: {label}, Score: {score*100}% chance")
    
  if model == "Greys/milestonemodel":
  
    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
    model = AutoModelForSequenceClassification.from_pretrained("Greys/milestonemodel")
    my_list = ['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']
  def classify_sentence(text):
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model(**inputs)
    probs = outputs.logits.softmax(dim=1)
    return probs.detach().numpy()[0]
  probs = classify_sentence(text)
  def find_largest_number(numbers):
    max_num = numbers[0]
    max_index = 0
    for i in range(1, len(numbers)):
        if numbers[i] > max_num:
            max_num = numbers[i]
            max_index = i
    return max_index

  print(probs)    
  index = find_largest_number(probs)
  st.write(my_list[index])
#id,toxic,severe_toxic,obscene,threat,insult,identity_hate