Spaces:
Sleeping
Sleeping
File size: 1,395 Bytes
9dfabbb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import joblib
import re
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
import emoji
# Load the model and vectorizer
model = joblib.load("hard_voting_classifier.pkl")
vectorizer = joblib.load("vectorizer.pkl")
# Load custom stopwords
with open("Indonesia_stopwords.txt", "r") as f:
custom_stopwords = [word.strip() for word in f.readlines()]
def preprocess_data(text):
"""Preprocess the input text."""
# Case Folding
text = text.lower()
# Sentence Normalization
text = emoji.demojize(text) # Translate emojis to their word representation
text = re.sub(r'http[s]?://\S+', '', text) # Remove URLs
text = re.sub(r'\d+', '', text) # Remove numbers
text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove non-alphanumeric characters except for spaces
# Tokenization & Stemming
stemmer = StemmerFactory().create_stemmer()
tokens = [word for word in text.split() if word not in custom_stopwords] # Tokenization and Stopword Removal
tokens = [stemmer.stem(word) for word in tokens] # Stemming
return ' '.join(tokens)
def predict_sentiment(text):
"""Predict the sentiment of the input text."""
preprocessed_text = preprocess_data(text)
vectorized_text = vectorizer.transform([preprocessed_text])
prediction = model.predict(vectorized_text)
return "Positive" if prediction[0] == 1 else "Negative"
|