Spaces:
Sleeping
Sleeping
Commit
·
8380741
1
Parent(s):
27b5282
Update app.py
Browse files
app.py
CHANGED
@@ -7,11 +7,14 @@ import tensorflow as tf
|
|
7 |
from tensorflow.keras.preprocessing.text import Tokenizer
|
8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
9 |
import time
|
|
|
10 |
|
|
|
|
|
|
|
11 |
clf = load('my_model_filename.pkl')
|
12 |
vectorizer = load('tfidf_vectorizer.pkl')
|
13 |
scaler = load('scaler.joblib')
|
14 |
-
|
15 |
tukinazor = load('tokenizer.pkl')
|
16 |
rnn_model = load_model('path_to_my_model.h5')
|
17 |
bert_model = BertForSequenceClassification.from_pretrained('my_bert_model')
|
@@ -19,6 +22,26 @@ tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
|
|
19 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
20 |
bert_model = bert_model.to(device)
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def predict_text(text):
|
23 |
sequences = tukinazor.texts_to_sequences([text])
|
24 |
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=200, padding='post', truncating='post')
|
@@ -27,8 +50,19 @@ def predict_text(text):
|
|
27 |
return predicted_class
|
28 |
|
29 |
|
30 |
-
# Запуск приложения
|
31 |
def main():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
st.title("Модель классификации отзывов")
|
33 |
|
34 |
# Ввод текста
|
@@ -77,5 +111,14 @@ def main():
|
|
77 |
st.write(f"Прогнозируемый класс (BERT): {predictions.item() + 1}")
|
78 |
st.write(f"Время вычисления: {elapsed_time:.2f} сек.")
|
79 |
|
80 |
-
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from tensorflow.keras.preprocessing.text import Tokenizer
|
8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
9 |
import time
|
10 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
11 |
|
12 |
+
model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
|
13 |
+
toxicity_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
|
14 |
+
toxicity_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
|
15 |
clf = load('my_model_filename.pkl')
|
16 |
vectorizer = load('tfidf_vectorizer.pkl')
|
17 |
scaler = load('scaler.joblib')
|
|
|
18 |
tukinazor = load('tokenizer.pkl')
|
19 |
rnn_model = load_model('path_to_my_model.h5')
|
20 |
bert_model = BertForSequenceClassification.from_pretrained('my_bert_model')
|
|
|
22 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
23 |
bert_model = bert_model.to(device)
|
24 |
|
25 |
+
labels = ["не токсичный", "оскорбляющий", "непристойный", "угрожающий", "опасный"]
|
26 |
+
def text2toxicity(text, aggregate=True):
|
27 |
+
""" Calculate toxicity of a text (if aggregate=True) or a vector of toxicity aspects (if aggregate=False)"""
|
28 |
+
with torch.no_grad():
|
29 |
+
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True).to(model.device)
|
30 |
+
proba = torch.sigmoid(model(**inputs).logits).cpu().numpy()
|
31 |
+
|
32 |
+
if isinstance(text, str):
|
33 |
+
proba = proba[0]
|
34 |
+
|
35 |
+
if aggregate:
|
36 |
+
return 1 - proba.T[0] * (1 - proba.T[-1])
|
37 |
+
else:
|
38 |
+
# Добавленный блок кода
|
39 |
+
result = {}
|
40 |
+
for label, prob in zip(labels, proba):
|
41 |
+
result[label] = prob
|
42 |
+
return result
|
43 |
+
|
44 |
+
|
45 |
def predict_text(text):
|
46 |
sequences = tukinazor.texts_to_sequences([text])
|
47 |
padded_sequences = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=200, padding='post', truncating='post')
|
|
|
50 |
return predicted_class
|
51 |
|
52 |
|
|
|
53 |
def main():
|
54 |
+
page_selection = st.sidebar.selectbox("Выберите страницу:", ["Классификация отзывов", "Анализ токсичности"])
|
55 |
+
|
56 |
+
if page_selection == "Классификация отзывов":
|
57 |
+
page_reviews_classification()
|
58 |
+
elif page_selection == "Анализ токсичности":
|
59 |
+
page_toxicity_analysis()
|
60 |
+
|
61 |
+
if __name__ == "__main__":
|
62 |
+
main()
|
63 |
+
|
64 |
+
|
65 |
+
def page_reviews_classification():
|
66 |
st.title("Модель классификации отзывов")
|
67 |
|
68 |
# Ввод текста
|
|
|
111 |
st.write(f"Прогнозируемый класс (BERT): {predictions.item() + 1}")
|
112 |
st.write(f"Время вычисления: {elapsed_time:.2f} сек.")
|
113 |
|
114 |
+
def page_toxicity_analysis():
|
115 |
+
# Код для анализа токсичности текста с использованием модели cointegrated/rubert-tiny-toxicity
|
116 |
+
user_input_toxicity = st.text_area("Введите текст для оценки токсичности:")
|
117 |
+
|
118 |
+
if st.button("Оценить токсичность"):
|
119 |
+
start_time = time.time()
|
120 |
+
probs = text2toxicity(user_input_toxicity, aggregate=False)
|
121 |
+
elapsed_time = time.time() - start_time
|
122 |
+
|
123 |
+
for label, prob in probs.items():
|
124 |
+
st.write(f"Вероятность {label}: {prob:.4f}")
|