Spaces:
Sleeping
Sleeping
File size: 4,019 Bytes
4bf6cb6 801d065 4bf6cb6 7a5f863 fac3dc1 4bf6cb6 801d065 4bf6cb6 f7ce05d 4bf6cb6 801d065 4bf6cb6 f7ce05d 4bf6cb6 801d065 ab99c79 801d065 f7ce05d 4bf6cb6 2b0aaf5 801d065 2b0aaf5 801d065 4bf6cb6 f7ce05d 4bf6cb6 f7ce05d 801d065 f7ce05d 2b0aaf5 801d065 2b0aaf5 801d065 f7ce05d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from transformers import AutoModelForSequenceClassification
from transformers import BertTokenizerFast
import torch
import re
import string
import pickle
import streamlit as st
import base64
import plotly.express as px
df = px.data.iris()
@st.cache_data
def get_img_as_base64(file):
with open(file, "rb") as f:
data = f.read()
return base64.b64encode(data).decode()
#img = get_img_as_base64("https://catherineasquithgallery.com/uploads/posts/2021-02/1612739741_65-p-goluboi-fon-tsifri-110.jpg")
page_bg_img = f"""
<style>
[data-testid="stAppViewContainer"] > .main {{
background-image: url("https://wallpapercave.com/wp/wp11966930.jpg");
background-size: 115%;
background-position: top left;
background-repeat: no-repeat;
background-attachment: local;
}}
[data-testid="stSidebar"] > div:first-child {{
background-image: url("https://ibb.co/ZBkdJRg");
background-size: 115%;
background-position: center;
background-repeat: no-repeat;
background-attachment: fixed;
}}
[data-testid="stHeader"] {{
background: rgba(0,0,0,0);
}}
[data-testid="stToolbar"] {{
right: 2rem;
}}
div.css-1n76uvr.e1tzin5v0 {{
background-color: rgba(238, 238, 238, 0.5);
border: 10px solid #EEEEEE;
padding: 5% 5% 5% 10%;
border-radius: 5px;
}}
</style>
"""
st.markdown(page_bg_img, unsafe_allow_html=True)
# Функция очистки текста
def clean(text):
text = text.lower() # нижний регистр
text = re.sub(r'http\S+', " ", text) # удаляем ссылки
text = re.sub(r'@\w+',' ',text) # удаляем упоминания пользователей
text = re.sub(r'#\w+', ' ', text) # удаляем хэштеги
text = re.sub(r'\d+', ' ', text) # удаляем числа
return text
# Загрузка весов модели ML
model_filename = 'model_comments_weights.pkl'
with open(model_filename, 'rb') as file:
model = pickle.load(file)
# Загрузка весов векторизатора
vectorizer = CountVectorizer()
vectorizer_filename = 'vectorizer_comments_weights.pkl'
with open(vectorizer_filename, 'rb') as file:
vectorizer = pickle.load(file)
# Само приложение
#Готовая модель ruBert
tokenizer_bert = BertTokenizerFast.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment')
model_bert = AutoModelForSequenceClassification.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment', return_dict=True)
st.title("SafeTalk")
st.write("Your Personal Comment Filter is an innovative application that harnesses the power of AI to distinguish toxic comments from the rest.")
st.write("Empowering users to navigate online discussions with confidence, SafeTalk ensures a more constructive and respectful online community by identifying and flagging harmful content.")
user_review = st.text_input("Enter your comment:", "")
user_review_clean = clean(user_review)
user_features = vectorizer.transform([user_review_clean])
prediction = model.predict(user_features)
inputs = tokenizer_bert(user_review_clean, max_length=512, padding=True, truncation=True, return_tensors='pt')
outputs = model_bert(**inputs)
prediction_bert = torch.nn.functional.softmax(outputs.logits, dim=1)
prediction_bert = torch.argmax(prediction_bert, dim=1).numpy()
st.write("Comment by ML model:", user_review)
if prediction == 0:
st.markdown("<p style='color: green;'>Non-toxic comment</p>", unsafe_allow_html=True)
else:
st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
st.write("Comment by RuBERT:", user_review)
if prediction_bert == 0:
st.markdown("<p style='color: green;'>Controversial comment</p>", unsafe_allow_html=True)
elif prediction_bert == 1:
st.markdown("<p style='color: red;'>Non-toxic comment</p>", unsafe_allow_html=True)
else:
st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
|