Spaces:
Runtime error
Runtime error
"""# MODEL BUILDING""" | |
# Commented out IPython magic to ensure Python compatibility. | |
import numpy as np # For linear algebra | |
import pandas as pd # Data processing, CSV file I/O (e.g. pd.read_csv) | |
# import matplotlib.pyplot as plt # For Visualisation | |
# %matplotlib inline | |
# import seaborn as sns # For Visualisation | |
# from bs4 import BeautifulSoup # For Text Parsing | |
# from ydata_profiling import ProfileReport # For generating data report | |
import nltk | |
from nltk.corpus import stopwords | |
nltk.download('stopwords') | |
nltk.download('punkt') | |
def remove_stopword(text): | |
stopword=nltk.corpus.stopwords.words('english') | |
stopword.remove('not') | |
a=[w for w in nltk.word_tokenize(text) if w not in stopword] | |
return ' '.join(a) | |
#data['Extracted text'] = data['Extracted text'].apply(remove_stopword) | |
data = pd.read_csv('train-cleaned.csv') | |
data | |
import nltk #Natural Language Processing Toolkit | |
def punc_clean(text): | |
import string as st | |
a=[w for w in text if w not in st.punctuation] | |
return ''.join(a) | |
data[''] = data['Extracted text'].apply(punc_clean) | |
#data.head(2) | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
vectr = TfidfVectorizer(ngram_range=(1,2),min_df=1) | |
vectr.fit(data['Extracted text']) | |
vect_X = vectr.transform(data['Extracted text']) | |
#from sklearn.linear_model import LogisticRegression | |
from sklearn.svm import SVC | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.ensemble import VotingClassifier | |
svm_classifier = SVC(kernel='linear', probability=True) | |
logistic_classifier = LogisticRegression() | |
model = VotingClassifier(estimators=[ | |
('svm', svm_classifier), | |
('logistic', logistic_classifier) | |
], voting='hard') | |
clf=model.fit(vect_X,data['saliency']) | |
# clf.score(vect_X, data['saliency'])*100 | |
# """# PREDICTION""" | |
# clf.predict(vectr.transform(['''thank you '''])) | |
# clf.predict(vectr.transform(['''Theres no trailers or nothing on the other side of me and its been facing away from my trailer straight'''])) | |
# clf.predict(vectr.transform([''' I dont think that should really matter Um'''])) | |