Spaces:
Sleeping
Sleeping
Commit
·
c31032a
1
Parent(s):
f8f2913
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,9 @@ from tensorflow.keras.preprocessing.text import Tokenizer
|
|
8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
9 |
import time
|
10 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
|
11 |
|
|
|
12 |
model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
|
13 |
toxicity_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
|
14 |
toxicity_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
|
@@ -21,6 +23,8 @@ bert_model = BertForSequenceClassification.from_pretrained('my_bert_model')
|
|
21 |
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
|
22 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
23 |
bert_model = bert_model.to(device)
|
|
|
|
|
24 |
|
25 |
labels = ["не токсичный", "оскорбляющий", "непристойный", "угрожающий", "опасный"]
|
26 |
def text2toxicity(text, aggregate=True):
|
@@ -49,7 +53,17 @@ def predict_text(text):
|
|
49 |
return predicted_class
|
50 |
|
51 |
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
|
55 |
def page_reviews_classification():
|
@@ -111,6 +125,19 @@ def page_toxicity_analysis():
|
|
111 |
st.write(f"Вероятность того что комментарий {label}: {prob:.4f}")
|
112 |
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
def main():
|
115 |
page_selection = st.sidebar.selectbox("Выберите страницу:", ["Классификация отзывов", "Анализ токсичности"])
|
116 |
|
|
|
8 |
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
9 |
import time
|
10 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
11 |
+
from transformers import GPT2Tokenizer, GPT2LMHeadModel
|
12 |
|
13 |
+
tok = GPT2Tokenizer()
|
14 |
model_checkpoint = 'cointegrated/rubert-tiny-toxicity'
|
15 |
toxicity_tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
|
16 |
toxicity_model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint)
|
|
|
23 |
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
|
24 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
25 |
bert_model = bert_model.to(device)
|
26 |
+
model_finetuned = load_full_model_from_pt(model_directory)
|
27 |
+
model_finetuned.eval()
|
28 |
|
29 |
labels = ["не токсичный", "оскорбляющий", "непристойный", "угрожающий", "опасный"]
|
30 |
def text2toxicity(text, aggregate=True):
|
|
|
53 |
return predicted_class
|
54 |
|
55 |
|
56 |
+
def generate_text(model, prompt, max_length=150, temperature=1.0):
|
57 |
+
input_ids = tok.encode(prompt, return_tensors='pt')
|
58 |
+
output = sber.generate(
|
59 |
+
input_ids=input_ids,
|
60 |
+
max_length=max_length + len(input_ids[0]),
|
61 |
+
temperature=temperature,
|
62 |
+
num_return_sequences=1,
|
63 |
+
pad_token_id=tokenizer.eos_token_id
|
64 |
+
)
|
65 |
+
generated_text = tok.decode(output[0], skip_special_tokens=True)
|
66 |
+
return generated_text
|
67 |
|
68 |
|
69 |
def page_reviews_classification():
|
|
|
125 |
st.write(f"Вероятность того что комментарий {label}: {prob:.4f}")
|
126 |
|
127 |
|
128 |
+
def page_gpt_generation():
|
129 |
+
st.title("Генерация текста с помощью GPT-модели")
|
130 |
+
|
131 |
+
user_prompt = st.text_area("Введите ваш текст:")
|
132 |
+
sequence_length = st.slider("Длина последовательности:", min_value=10, max_value=1000, value=150, step=10)
|
133 |
+
num_generations = st.slider("Число генераций:", min_value=1, max_value=10, value=1)
|
134 |
+
temperature = st.slider("Температура:", min_value=0.1, max_value=3.0, value=1.0, step=0.1)
|
135 |
+
|
136 |
+
if st.button("Генерировать"):
|
137 |
+
for _ in range(num_generations):
|
138 |
+
generated_text = generate_text(model_finetuned, user_prompt, sequence_length, temperature)
|
139 |
+
st.text(generated_text)
|
140 |
+
|
141 |
def main():
|
142 |
page_selection = st.sidebar.selectbox("Выберите страницу:", ["Классификация отзывов", "Анализ токсичности"])
|
143 |
|