Spaces:

SaviAnna
/

history_mistery

Sleeping

App Files Files Community

history_mistery / pages /🤢 You_are_toxic.py

MARI-posa

Update pages/🤢 You_are_toxic.py

fac3dc1 over 1 year ago

raw

history blame

4.02 kB

	from sklearn.feature_extraction.text import CountVectorizer
	from sklearn.linear_model import LogisticRegression
	from transformers import AutoModelForSequenceClassification
	from transformers import BertTokenizerFast
	import torch
	import re
	import string
	import pickle
	import streamlit as st

	import base64
	import plotly.express as px

	df = px.data.iris()

	@st.cache_data
	def get_img_as_base64(file):
	with open(file, "rb") as f:
	data = f.read()
	return base64.b64encode(data).decode()


	#img = get_img_as_base64("https://catherineasquithgallery.com/uploads/posts/2021-02/1612739741_65-p-goluboi-fon-tsifri-110.jpg")

	page_bg_img = f"""
	<style>
	[data-testid="stAppViewContainer"] > .main {{
	background-image: url("https://wallpapercave.com/wp/wp11966930.jpg");
	background-size: 115%;
	background-position: top left;
	background-repeat: no-repeat;
	background-attachment: local;
	}}

	[data-testid="stSidebar"] > div:first-child {{
	background-image: url("https://ibb.co/ZBkdJRg");
	background-size: 115%;
	background-position: center;
	background-repeat: no-repeat;
	background-attachment: fixed;
	}}

	[data-testid="stHeader"] {{
	background: rgba(0,0,0,0);
	}}

	[data-testid="stToolbar"] {{
	right: 2rem;
	}}

	div.css-1n76uvr.e1tzin5v0 {{
	background-color: rgba(238, 238, 238, 0.5);
	border: 10px solid #EEEEEE;
	padding: 5% 5% 5% 10%;
	border-radius: 5px;
	}}

	</style>
	"""
	st.markdown(page_bg_img, unsafe_allow_html=True)

	# Функция очистки текста
	def clean(text):
	text = text.lower() # нижний регистр
	text = re.sub(r'http\S+', " ", text) # удаляем ссылки
	text = re.sub(r'@\w+',' ',text) # удаляем упоминания пользователей
	text = re.sub(r'#\w+', ' ', text) # удаляем хэштеги
	text = re.sub(r'\d+', ' ', text) # удаляем числа
	return text

	# Загрузка весов модели ML

	model_filename = 'model_comments_weights.pkl'
	with open(model_filename, 'rb') as file:
	model = pickle.load(file)

	# Загрузка весов векторизатора
	vectorizer = CountVectorizer()
	vectorizer_filename = 'vectorizer_comments_weights.pkl'
	with open(vectorizer_filename, 'rb') as file:
	vectorizer = pickle.load(file)

	# Само приложение


	#Готовая модель ruBert

	tokenizer_bert = BertTokenizerFast.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment')
	model_bert = AutoModelForSequenceClassification.from_pretrained('blanchefort/rubert-base-cased-sentiment-rusentiment', return_dict=True)




	st.title("SafeTalk")
	st.write("Your Personal Comment Filter is an innovative application that harnesses the power of AI to distinguish toxic comments from the rest.")
	st.write("Empowering users to navigate online discussions with confidence, SafeTalk ensures a more constructive and respectful online community by identifying and flagging harmful content.")
	user_review = st.text_input("Enter your comment:", "")
	user_review_clean = clean(user_review)
	user_features = vectorizer.transform([user_review_clean])
	prediction = model.predict(user_features)
	inputs = tokenizer_bert(user_review_clean, max_length=512, padding=True, truncation=True, return_tensors='pt')
	outputs = model_bert(**inputs)
	prediction_bert = torch.nn.functional.softmax(outputs.logits, dim=1)
	prediction_bert = torch.argmax(prediction_bert, dim=1).numpy()
	st.write("Comment by ML model:", user_review)

	if prediction == 0:
	st.markdown("<p style='color: green;'>Non-toxic comment</p>", unsafe_allow_html=True)
	else:
	st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)
	st.write("Comment by RuBERT:", user_review)

	if prediction_bert == 0:
	st.markdown("<p style='color: green;'>Controversial comment</p>", unsafe_allow_html=True)
	elif prediction_bert == 1:
	st.markdown("<p style='color: red;'>Non-toxic comment</p>", unsafe_allow_html=True)
	else:
	st.markdown("<p style='color: red;'>Toxic comment</p>", unsafe_allow_html=True)