Spaces:
Sleeping
Sleeping
Update pages/✨second.py
Browse files- pages/✨second.py +28 -4
pages/✨second.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
from sklearn.feature_extraction.text import CountVectorizer
|
2 |
from sklearn.linear_model import LogisticRegression
|
|
|
|
|
|
|
3 |
import re
|
4 |
import string
|
5 |
import pickle
|
@@ -14,12 +17,12 @@ def clean(text):
|
|
14 |
text = re.sub(r'\d+', ' ', text) # удаляем числа
|
15 |
return text
|
16 |
|
17 |
-
# Загрузка весов модели
|
18 |
|
19 |
model_filename = 'model_comments_weights.pkl'
|
20 |
with open(model_filename, 'rb') as file:
|
21 |
model = pickle.load(file)
|
22 |
-
|
23 |
# Загрузка весов векторизатора
|
24 |
vectorizer = CountVectorizer()
|
25 |
vectorizer_filename = 'vectorizer_comments_weights.pkl'
|
@@ -28,6 +31,17 @@ with open(vectorizer_filename, 'rb') as file:
|
|
28 |
|
29 |
# Само приложение
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
st.title("SafeTalk")
|
32 |
st.write("Your Personal Comment Filter is an innovative application that harnesses the power of AI to distinguish toxic comments from the rest.")
|
33 |
st.write("Empowering users to navigate online discussions with confidence, SafeTalk ensures a more constructive and respectful online community by identifying and flagging harmful content.")
|
@@ -35,12 +49,22 @@ user_review = st.text_input("Enter your comment:", "")
|
|
35 |
user_review_clean = clean(user_review)
|
36 |
user_features = vectorizer.transform([user_review_clean])
|
37 |
prediction = model.predict(user_features)
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
40 |
|
41 |
if prediction == 0:
|
42 |
st.markdown("<p style='color: green;'>Non-toxic comment</p>", unsafe_allow_html=True)
|
43 |
else:
|
44 |
st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
|
|
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
|
|
1 |
from sklearn.feature_extraction.text import CountVectorizer
|
2 |
from sklearn.linear_model import LogisticRegression
|
3 |
+
from transformers import AutoModelForSequenceClassification
|
4 |
+
from transformers import BertTokenizerFast
|
5 |
+
import torch
|
6 |
import re
|
7 |
import string
|
8 |
import pickle
|
|
|
17 |
text = re.sub(r'\d+', ' ', text) # удаляем числа
|
18 |
return text
|
19 |
|
20 |
+
# Загрузка весов модели ML
|
21 |
|
22 |
model_filename = 'model_comments_weights.pkl'
|
23 |
with open(model_filename, 'rb') as file:
|
24 |
model = pickle.load(file)
|
25 |
+
|
26 |
# Загрузка весов векторизатора
|
27 |
vectorizer = CountVectorizer()
|
28 |
vectorizer_filename = 'vectorizer_comments_weights.pkl'
|
|
|
31 |
|
32 |
# Само приложение
|
33 |
|
34 |
+
|
35 |
+
#Готовая модель ruBert
|
36 |
+
|
37 |
+
tokenizer_bert = BertTokenizerFast.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment')
|
38 |
+
model_bert = AutoModelForSequenceClassification.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment', return_dict=True)
|
39 |
+
|
40 |
+
|
41 |
+
def predict(text):
|
42 |
+
)
|
43 |
+
return predicted
|
44 |
+
|
45 |
st.title("SafeTalk")
|
46 |
st.write("Your Personal Comment Filter is an innovative application that harnesses the power of AI to distinguish toxic comments from the rest.")
|
47 |
st.write("Empowering users to navigate online discussions with confidence, SafeTalk ensures a more constructive and respectful online community by identifying and flagging harmful content.")
|
|
|
49 |
user_review_clean = clean(user_review)
|
50 |
user_features = vectorizer.transform([user_review_clean])
|
51 |
prediction = model.predict(user_features)
|
52 |
+
inputs = tokenizer(user_review_clean, max_length=512, padding=True, truncation=True, return_tensors='pt')
|
53 |
+
outputs = model_bert(**inputs)
|
54 |
+
prediction_bert = torch.nn.functional.softmax(outputs.logits, dim=1)
|
55 |
+
prediction_bert = torch.argmax(predicted, dim=1).numpy(
|
56 |
+
st.write("Comment by ML model:", user_review)
|
57 |
|
58 |
if prediction == 0:
|
59 |
st.markdown("<p style='color: green;'>Non-toxic comment</p>", unsafe_allow_html=True)
|
60 |
else:
|
61 |
st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
|
62 |
+
st.write("Comment by RuBERT:", user_review)
|
63 |
|
64 |
+
if prediction == 0:
|
65 |
+
st.markdown("<p style='color: green;'>Controversial comment</p>", unsafe_allow_html=True)
|
66 |
+
elif prediction == 1:
|
67 |
+
st.markdown("<p style='color: red;'>Non-toxic comment</p>", unsafe_allow_html=True)
|
68 |
+
else:
|
69 |
+
st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
|
70 |
|