SaviAnna commited on
Commit
801d065
1 Parent(s): b611336

Update pages/✨second.py

Browse files
Files changed (1) hide show
  1. pages/✨second.py +28 -4
pages/✨second.py CHANGED
@@ -1,5 +1,8 @@
1
  from sklearn.feature_extraction.text import CountVectorizer
2
  from sklearn.linear_model import LogisticRegression
 
 
 
3
  import re
4
  import string
5
  import pickle
@@ -14,12 +17,12 @@ def clean(text):
14
  text = re.sub(r'\d+', ' ', text) # удаляем числа
15
  return text
16
 
17
- # Загрузка весов модели
18
 
19
  model_filename = 'model_comments_weights.pkl'
20
  with open(model_filename, 'rb') as file:
21
  model = pickle.load(file)
22
-
23
  # Загрузка весов векторизатора
24
  vectorizer = CountVectorizer()
25
  vectorizer_filename = 'vectorizer_comments_weights.pkl'
@@ -28,6 +31,17 @@ with open(vectorizer_filename, 'rb') as file:
28
 
29
  # Само приложение
30
 
 
 
 
 
 
 
 
 
 
 
 
31
  st.title("SafeTalk")
32
  st.write("Your Personal Comment Filter is an innovative application that harnesses the power of AI to distinguish toxic comments from the rest.")
33
  st.write("Empowering users to navigate online discussions with confidence, SafeTalk ensures a more constructive and respectful online community by identifying and flagging harmful content.")
@@ -35,12 +49,22 @@ user_review = st.text_input("Enter your comment:", "")
35
  user_review_clean = clean(user_review)
36
  user_features = vectorizer.transform([user_review_clean])
37
  prediction = model.predict(user_features)
38
-
39
- st.write("Comment:", user_review)
 
 
 
40
 
41
  if prediction == 0:
42
  st.markdown("<p style='color: green;'>Non-toxic comment</p>", unsafe_allow_html=True)
43
  else:
44
  st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
 
45
 
 
 
 
 
 
 
46
 
 
1
  from sklearn.feature_extraction.text import CountVectorizer
2
  from sklearn.linear_model import LogisticRegression
3
+ from transformers import AutoModelForSequenceClassification
4
+ from transformers import BertTokenizerFast
5
+ import torch
6
  import re
7
  import string
8
  import pickle
 
17
  text = re.sub(r'\d+', ' ', text) # удаляем числа
18
  return text
19
 
20
+ # Загрузка весов модели ML
21
 
22
  model_filename = 'model_comments_weights.pkl'
23
  with open(model_filename, 'rb') as file:
24
  model = pickle.load(file)
25
+
26
  # Загрузка весов векторизатора
27
  vectorizer = CountVectorizer()
28
  vectorizer_filename = 'vectorizer_comments_weights.pkl'
 
31
 
32
  # Само приложение
33
 
34
+
35
+ #Готовая модель ruBert
36
+
37
+ tokenizer_bert = BertTokenizerFast.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment')
38
+ model_bert = AutoModelForSequenceClassification.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment', return_dict=True)
39
+
40
+
41
+ def predict(text):
42
+ )
43
+ return predicted
44
+
45
  st.title("SafeTalk")
46
  st.write("Your Personal Comment Filter is an innovative application that harnesses the power of AI to distinguish toxic comments from the rest.")
47
  st.write("Empowering users to navigate online discussions with confidence, SafeTalk ensures a more constructive and respectful online community by identifying and flagging harmful content.")
 
49
  user_review_clean = clean(user_review)
50
  user_features = vectorizer.transform([user_review_clean])
51
  prediction = model.predict(user_features)
52
+ inputs = tokenizer(user_review_clean, max_length=512, padding=True, truncation=True, return_tensors='pt')
53
+ outputs = model_bert(**inputs)
54
+ prediction_bert = torch.nn.functional.softmax(outputs.logits, dim=1)
55
+ prediction_bert = torch.argmax(predicted, dim=1).numpy(
56
+ st.write("Comment by ML model:", user_review)
57
 
58
  if prediction == 0:
59
  st.markdown("<p style='color: green;'>Non-toxic comment</p>", unsafe_allow_html=True)
60
  else:
61
  st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
62
+ st.write("Comment by RuBERT:", user_review)
63
 
64
+ if prediction == 0:
65
+ st.markdown("<p style='color: green;'>Controversial comment</p>", unsafe_allow_html=True)
66
+ elif prediction == 1:
67
+ st.markdown("<p style='color: red;'>Non-toxic comment</p>", unsafe_allow_html=True)
68
+ else:
69
+ st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
70