Spaces:

Yerzhxn
/

class_space

Sleeping

App Files Files Community

class_space / app.py

Yerzhxn

Update app.py

fbf6406 verified 5 months ago

raw

history blame contribute delete

3.34 kB

	import streamlit as st
	import torch.nn.functional as F
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import re
	from nltk.tokenize import RegexpTokenizer
	from bs4 import BeautifulSoup as bs
	from nltk.corpus import stopwords
	import nltk
	nltk.download('stopwords')
	nltk.download('punkt')
	import pandas as pd

	# Замените 'username/имя-вашей-модели' на путь к вашей модели на Hugging Face
	model_name = 'Yerzhxn/class_vac'

	# Загрузка токенизатора и модели
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)

	# Перемещение модели на устройство (если есть GPU)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.to(device)

	def preprocess(sentence):
	soup = bs(sentence, features="html.parser")
	sentence = soup.get_text()
	soup = bs(sentence, features="html.parser")
	sentence = soup.get_text()

	sentence = str(sentence)
	sentence = sentence.lower()
	sentence = sentence.replace('{html}',"")
	cleanr = re.compile('<.*?>')
	cleantext = re.sub(cleanr, '', sentence)
	rem_url = re.sub(r'http\S+', '',cleantext)
	rem_num = re.sub('[0-9]+', '', rem_url)
	tokenizer = RegexpTokenizer(r'\w+')
	tokens = tokenizer.tokenize(rem_num)
	filtered_words = [w for w in tokens if not w in stopwords.words('russian')]

	return " ".join(filtered_words)

	# Интерфейс Streamlit
	st.title("Тестирование классификации текста")
	st.write("Введите текст, чтобы узнать предсказанный класс.")

	# Поле ввода текста
	input_text = st.text_area("Введите текст здесь", "")
	df = pd.read_excel('me.xlsx')
	if st.button("Предсказать"):
	if input_text:
	input_text = preprocess(input_text)
	# Преобразование текста в формат, подходящий для модели
	inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
	inputs = {key: value.to(device) for key, value in inputs.items()}

	# Прогон текста через модель и получение предсказания
	with torch.no_grad():
	outputs = model(**inputs)

	# Преобразование выходных данных в вероятности
	logits = outputs.logits
	probabilities = F.softmax(logits, dim=1)

	# Определение класса и его вероятности
	max_prob, predicted_class = torch.max(probabilities, dim=1)


	# Проверка вероятности для отображения результата
	if max_prob.item() > 0.35:
	st.write(f"Предсказанный класс: {predicted_class.item()}, вероятность: {max_prob.item():.2f}")
	dataframe = df[df['label']==predicted_class.item()]
	str1 = dataframe['PROF_NAME']
	st.write(str1.iloc[0])
	else:
	st.write("Модель не уверена в предсказании (вероятность меньше 35%).")
	else:
	st.write("Пожалуйста, введите текст для классификации.")