Spaces:

SaviAnna
/

history_mistery

Sleeping

App Files Files Community

SaviAnna commited on Jun 9, 2023

Commit

801d065

•

1 Parent(s): b611336

Update pages/✨second.py

Browse files

Files changed (1) hide show

pages/✨second.py +28 -4

pages/✨second.py CHANGED Viewed

@@ -1,5 +1,8 @@
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.linear_model import LogisticRegression
 import re
 import string
 import pickle
@@ -14,12 +17,12 @@ def clean(text):
     text = re.sub(r'\d+', ' ', text) # удаляем числа
     return text
-# Загрузка весов модели
 model_filename = 'model_comments_weights.pkl'
 with open(model_filename, 'rb') as file:
     model = pickle.load(file)
 # Загрузка весов векторизатора
 vectorizer = CountVectorizer()
 vectorizer_filename = 'vectorizer_comments_weights.pkl'
@@ -28,6 +31,17 @@ with open(vectorizer_filename, 'rb') as file:
 # Само приложение
 st.title("SafeTalk")
 st.write("Your Personal Comment Filter is an innovative application that harnesses the power of AI to distinguish toxic comments from the rest.")
 st.write("Empowering users to navigate online discussions with confidence, SafeTalk ensures a more constructive and respectful online community by identifying and flagging harmful content.")
@@ -35,12 +49,22 @@ user_review = st.text_input("Enter your comment:", "")
 user_review_clean = clean(user_review)
 user_features = vectorizer.transform([user_review_clean])
 prediction = model.predict(user_features)
-st.write("Comment:", user_review)
 if prediction == 0:
     st.markdown("<p style='color: green;'>Non-toxic comment</p>", unsafe_allow_html=True)
 else:
     st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)

 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.linear_model import LogisticRegression
+from transformers import AutoModelForSequenceClassification
+from transformers import BertTokenizerFast
+import torch
 import re
 import string
 import pickle
     text = re.sub(r'\d+', ' ', text) # удаляем числа
     return text
+# Загрузка весов модели ML
 model_filename = 'model_comments_weights.pkl'
 with open(model_filename, 'rb') as file:
     model = pickle.load(file)
 # Загрузка весов векторизатора
 vectorizer = CountVectorizer()
 vectorizer_filename = 'vectorizer_comments_weights.pkl'
 # Само приложение
+#Готовая модель ruBert
+tokenizer_bert = BertTokenizerFast.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment')
+model_bert = AutoModelForSequenceClassification.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment', return_dict=True)
+def predict(text):
+)
+    return predicted
 st.title("SafeTalk")
 st.write("Your Personal Comment Filter is an innovative application that harnesses the power of AI to distinguish toxic comments from the rest.")
 st.write("Empowering users to navigate online discussions with confidence, SafeTalk ensures a more constructive and respectful online community by identifying and flagging harmful content.")
 user_review_clean = clean(user_review)
 user_features = vectorizer.transform([user_review_clean])
 prediction = model.predict(user_features)
+inputs = tokenizer(user_review_clean, max_length=512, padding=True, truncation=True, return_tensors='pt')
+outputs = model_bert(**inputs)
+prediction_bert = torch.nn.functional.softmax(outputs.logits, dim=1)
+prediction_bert = torch.argmax(predicted, dim=1).numpy(
+st.write("Comment by ML model:", user_review)
 if prediction == 0:
     st.markdown("<p style='color: green;'>Non-toxic comment</p>", unsafe_allow_html=True)
 else:
     st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
+st.write("Comment by RuBERT:", user_review)
+if prediction == 0:
+    st.markdown("<p style='color: green;'>Controversial comment</p>", unsafe_allow_html=True)
+elif prediction == 1:
+    st.markdown("<p style='color: red;'>Non-toxic comment</p>", unsafe_allow_html=True)
+else:
+    st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)