Spaces:
Runtime error
Runtime error
File size: 2,080 Bytes
de1732e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
"""# MODEL BUILDING"""
# Commented out IPython magic to ensure Python compatibility.
import numpy as np # For linear algebra
import pandas as pd # Data processing, CSV file I/O (e.g. pd.read_csv)
# import matplotlib.pyplot as plt # For Visualisation
# %matplotlib inline
# import seaborn as sns # For Visualisation
# from bs4 import BeautifulSoup # For Text Parsing
# from ydata_profiling import ProfileReport # For generating data report
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')
def remove_stopword(text):
stopword=nltk.corpus.stopwords.words('english')
stopword.remove('not')
a=[w for w in nltk.word_tokenize(text) if w not in stopword]
return ' '.join(a)
#data['Extracted text'] = data['Extracted text'].apply(remove_stopword)
data = pd.read_csv('train-cleaned.csv')
data
import nltk #Natural Language Processing Toolkit
def punc_clean(text):
import string as st
a=[w for w in text if w not in st.punctuation]
return ''.join(a)
data[''] = data['Extracted text'].apply(punc_clean)
#data.head(2)
from sklearn.feature_extraction.text import TfidfVectorizer
vectr = TfidfVectorizer(ngram_range=(1,2),min_df=1)
vectr.fit(data['Extracted text'])
vect_X = vectr.transform(data['Extracted text'])
#from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
svm_classifier = SVC(kernel='linear', probability=True)
logistic_classifier = LogisticRegression()
model = VotingClassifier(estimators=[
('svm', svm_classifier),
('logistic', logistic_classifier)
], voting='hard')
clf=model.fit(vect_X,data['saliency'])
# clf.score(vect_X, data['saliency'])*100
# """# PREDICTION"""
# clf.predict(vectr.transform(['''thank you ''']))
# clf.predict(vectr.transform(['''Theres no trailers or nothing on the other side of me and its been facing away from my trailer straight''']))
# clf.predict(vectr.transform([''' I dont think that should really matter Um''']))
|