Spaces:
Sleeping
Sleeping
import joblib | |
import re | |
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory | |
import emoji | |
# Load the model and vectorizer | |
model = joblib.load("hard_voting_classifier.pkl") | |
vectorizer = joblib.load("vectorizer.pkl") | |
# Load custom stopwords | |
with open("Indonesia_stopwords.txt", "r") as f: | |
custom_stopwords = [word.strip() for word in f.readlines()] | |
def preprocess_data(text): | |
"""Preprocess the input text.""" | |
# Case Folding | |
text = text.lower() | |
# Sentence Normalization | |
text = emoji.demojize(text) # Translate emojis to their word representation | |
text = re.sub(r'http[s]?://\S+', '', text) # Remove URLs | |
text = re.sub(r'\d+', '', text) # Remove numbers | |
text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove non-alphanumeric characters except for spaces | |
# Tokenization & Stemming | |
stemmer = StemmerFactory().create_stemmer() | |
tokens = [word for word in text.split() if word not in custom_stopwords] # Tokenization and Stopword Removal | |
tokens = [stemmer.stem(word) for word in tokens] # Stemming | |
return ' '.join(tokens) | |
def predict_sentiment(text): | |
"""Predict the sentiment of the input text.""" | |
preprocessed_text = preprocess_data(text) | |
vectorized_text = vectorizer.transform([preprocessed_text]) | |
prediction = model.predict(vectorized_text) | |
return "Positive" if prediction[0] == 1 else "Negative" | |