nickmuchi commited on
Commit
173ed2d
·
1 Parent(s): 9c8ceb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -16,11 +16,10 @@ import docx2txt
16
  from io import StringIO
17
  from PyPDF2 import PdfFileReader
18
  import warnings
19
- import nltk
20
 
21
- nltk.download()
22
 
23
- from nltk import sent_tokenize
24
  warnings.filterwarnings("ignore")
25
 
26
 
@@ -71,7 +70,7 @@ def article_text_extractor(url: str):
71
 
72
  def chunk_clean_text(text):
73
 
74
- sentences = sent_tokenize(text)
75
  current_chunk = 0
76
  chunks = []
77
 
 
16
  from io import StringIO
17
  from PyPDF2 import PdfFileReader
18
  import warnings
19
+ import nltk.data
20
 
21
+ tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
22
 
 
23
  warnings.filterwarnings("ignore")
24
 
25
 
 
70
 
71
  def chunk_clean_text(text):
72
 
73
+ sentences = tokenizer(text)
74
  current_chunk = 0
75
  chunks = []
76