from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import MultinomialNB from sklearn.linear_model import LogisticRegression from sklearn.pipeline import make_pipeline from sklearn.pipeline import Pipeline import joblib import re import string import nltk nltk.download('stopwords') nltk.download('punkt') import streamlit as st # Preprocess function from nltk.corpus import stopwords from nltk.tokenize import word_tokenize def preprocess_text(text): # Remove URLs url_pattern = re.compile(r'https?://\S+') text = url_pattern.sub(' ', text) # Remove HTML Tags html_pattern = re.compile(r'<[^<>]+>') text = html_pattern.sub(' ', text) # Remove punctuation and digits text = re.sub(r'[^\w\s]', ' ', text) # Remove emojis emoji_pattern = re.compile("[" u"\U0001F600-\U0001F64F" u"\U0001F300-\U0001F5FF" u"\U0001F680-\U0001F6FF" u"\U0001F1E0-\U0001F1FF" u"\U0001F1F2-\U0001F1F4" u"\U0001F1E6-\U0001F1FF" u"\U0001F600-\U0001F64F" u"\U00002702-\U000027B0" u"\U000024C2-\U0001F251" u"\U0001f926-\U0001f937" u"\U0001F1F2" u"\U0001F1F4" u"\U0001F620" u"\u200d" u"\u2640-\u2642" "]+", flags=re.UNICODE) text = emoji_pattern.sub(' ', text) # Convert to lowercase text = text.lower() # Tokenize and remove stopwords stop_words = set(stopwords.words('english')) tokens = word_tokenize(text) tokens = [token for token in tokens if token not in stop_words] # Join tokens back into text text = ' '.join(tokens) return text # Main function model_NB_path = './model_NB.sav' model_NB = joblib.load(model_NB_path) model_LR_path = './model_LR.sav' model_LR = joblib.load(model_LR_path) def sentiment_analysis_LR(input): # Assuming you have a Logistic Regression model and TfidfVectorizer in the pipeline input = preprocess_text(input) vectorizer = model_LR.named_steps['tfidfvectorizer'] lr_classifier = model_LR.named_steps['logisticregression'] # Transform the user input using the TF-IDF vectorizer user_input_tfidf = vectorizer.transform([input]) # Make predictions user_pred = lr_classifier.predict(user_input_tfidf) # Display the prediction if user_pred[0] == 0: return 0 else: return 1 def sentiment_analysis_NB(input): input = preprocess_text(input) vectorizer = model_NB.named_steps['tfidf'] nb_classifier = model_NB.named_steps['nb'] # Transform the user input using the TF-IDF vectorizer user_input_tfidf = vectorizer.transform([input]) # Make predictions user_pred = nb_classifier.predict(user_input_tfidf) # Display the prediction if user_pred[0] == 0: return 0 else: return 1 text = st.text_area('Enter some text !!! (English text : D )') if text: out = sentiment_analysis_LR(text) if out == 0: st.write('The sentence is negative') else: st.write('The sentence is positive')