import joblib import re from Sastrawi.Stemmer.StemmerFactory import StemmerFactory import emoji # Load the model and vectorizer model = joblib.load("hard_voting_classifier.pkl") vectorizer = joblib.load("vectorizer.pkl") # Load custom stopwords with open("Indonesia_stopwords.txt", "r") as f: custom_stopwords = [word.strip() for word in f.readlines()] def preprocess_data(text): """Preprocess the input text.""" # Case Folding text = text.lower() # Sentence Normalization text = emoji.demojize(text) # Translate emojis to their word representation text = re.sub(r'http[s]?://\S+', '', text) # Remove URLs text = re.sub(r'\d+', '', text) # Remove numbers text = re.sub(r'[^a-zA-Z0-9\s]', '', text) # Remove non-alphanumeric characters except for spaces # Tokenization & Stemming stemmer = StemmerFactory().create_stemmer() tokens = [word for word in text.split() if word not in custom_stopwords] # Tokenization and Stopword Removal tokens = [stemmer.stem(word) for word in tokens] # Stemming return ' '.join(tokens) def predict_sentiment(text): """Predict the sentiment of the input text.""" preprocessed_text = preprocess_data(text) vectorized_text = vectorizer.transform([preprocessed_text]) prediction = model.predict(vectorized_text) return "Positive" if prediction[0] == 1 else "Negative"