curtpond commited on
Commit
438fdb3
1 Parent(s): c5a84a6

Updated stop word removal in app.py.

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -8,13 +8,18 @@ import nltk
8
  from nltk.corpus import stopwords
9
  nltk.download('stopwords')
10
  from sklearn.feature_extraction.text import CountVectorizer
 
11
 
12
  # file name
13
  lr_filename = 'logistic_regression.pkl'
14
 
 
 
 
15
  # Process input text, including removing stopwords, converting to lowercase, and removing punctuation
 
16
  def process_text(text):
17
- text = stopwords.words('english')
18
  text = str(text).lower()
19
  text = re.sub(
20
  f"[{re.escape(string.punctuation)}]", " ", text
@@ -29,10 +34,6 @@ def vectorize_text(text):
29
  text = vectorizer.fit_transform([text])
30
  return text
31
 
32
- # Load model from pickle file
33
- model = pickle.load(open(lr_filename, 'rb'))
34
-
35
-
36
  def predict(text):
37
  text = vectorize_text(text)
38
  prediction = model.predict(text)
 
8
  from nltk.corpus import stopwords
9
  nltk.download('stopwords')
10
  from sklearn.feature_extraction.text import CountVectorizer
11
+ from sklearn.feature_extraction.text import TfidfVectorizer
12
 
13
  # file name
14
  lr_filename = 'logistic_regression.pkl'
15
 
16
+ # Load model from pickle file
17
+ model = pickle.load(open(lr_filename, 'rb'))
18
+
19
  # Process input text, including removing stopwords, converting to lowercase, and removing punctuation
20
+ stop = stopwords.words('english')
21
  def process_text(text):
22
+ text = [word for word in text.split() if word not in stop]
23
  text = str(text).lower()
24
  text = re.sub(
25
  f"[{re.escape(string.punctuation)}]", " ", text
 
34
  text = vectorizer.fit_transform([text])
35
  return text
36
 
 
 
 
 
37
  def predict(text):
38
  text = vectorize_text(text)
39
  prediction = model.predict(text)