Spaces:
Running
Running
Updated stop word removal in app.py.
Browse files
app.py
CHANGED
@@ -8,13 +8,18 @@ import nltk
|
|
8 |
from nltk.corpus import stopwords
|
9 |
nltk.download('stopwords')
|
10 |
from sklearn.feature_extraction.text import CountVectorizer
|
|
|
11 |
|
12 |
# file name
|
13 |
lr_filename = 'logistic_regression.pkl'
|
14 |
|
|
|
|
|
|
|
15 |
# Process input text, including removing stopwords, converting to lowercase, and removing punctuation
|
|
|
16 |
def process_text(text):
|
17 |
-
text =
|
18 |
text = str(text).lower()
|
19 |
text = re.sub(
|
20 |
f"[{re.escape(string.punctuation)}]", " ", text
|
@@ -29,10 +34,6 @@ def vectorize_text(text):
|
|
29 |
text = vectorizer.fit_transform([text])
|
30 |
return text
|
31 |
|
32 |
-
# Load model from pickle file
|
33 |
-
model = pickle.load(open(lr_filename, 'rb'))
|
34 |
-
|
35 |
-
|
36 |
def predict(text):
|
37 |
text = vectorize_text(text)
|
38 |
prediction = model.predict(text)
|
|
|
8 |
from nltk.corpus import stopwords
|
9 |
nltk.download('stopwords')
|
10 |
from sklearn.feature_extraction.text import CountVectorizer
|
11 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
12 |
|
13 |
# file name
|
14 |
lr_filename = 'logistic_regression.pkl'
|
15 |
|
16 |
+
# Load model from pickle file
|
17 |
+
model = pickle.load(open(lr_filename, 'rb'))
|
18 |
+
|
19 |
# Process input text, including removing stopwords, converting to lowercase, and removing punctuation
|
20 |
+
stop = stopwords.words('english')
|
21 |
def process_text(text):
|
22 |
+
text = [word for word in text.split() if word not in stop]
|
23 |
text = str(text).lower()
|
24 |
text = re.sub(
|
25 |
f"[{re.escape(string.punctuation)}]", " ", text
|
|
|
34 |
text = vectorizer.fit_transform([text])
|
35 |
return text
|
36 |
|
|
|
|
|
|
|
|
|
37 |
def predict(text):
|
38 |
text = vectorize_text(text)
|
39 |
prediction = model.predict(text)
|