Spaces:

Kurkur99
/

Sentimentanalysi3

Sleeping

Sentimentanalysi3 / prediction.py

Upload 9 files

9dfabbb over 1 year ago

1.4 kB

	import joblib
	import re
	from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
	import emoji

	# Load the model and vectorizer
	model = joblib.load("hard_voting_classifier.pkl")
	vectorizer = joblib.load("vectorizer.pkl")

	# Load custom stopwords
	with open("Indonesia_stopwords.txt", "r") as f:
	custom_stopwords = [word.strip() for word in f.readlines()]

	def preprocess_data(text):
	"""Preprocess the input text."""
	# Case Folding
	text = text.lower()

	# Sentence Normalization
	text = emoji.demojize(text) # Translate emojis to their word representation
	text = re.sub(r'http[s]?://\S+', '', text) # Remove URLs
	text = re.sub(r'\d+', '', text) # Remove numbers
	text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove non-alphanumeric characters except for spaces

	# Tokenization & Stemming
	stemmer = StemmerFactory().create_stemmer()
	tokens = [word for word in text.split() if word not in custom_stopwords] # Tokenization and Stopword Removal
	tokens = [stemmer.stem(word) for word in tokens] # Stemming

	return ' '.join(tokens)

	def predict_sentiment(text):
	"""Predict the sentiment of the input text."""
	preprocessed_text = preprocess_data(text)
	vectorized_text = vectorizer.transform([preprocessed_text])
	prediction = model.predict(vectorized_text)
	return "Positive" if prediction[0] == 1 else "Negative"