import nltk from nltk.tokenize import word_tokenize, sent_tokenize import re nltk.download('punkt') # For tokenization nltk.download('averaged_perceptron_tagger') # For POS tagging (optional) def sub_explitives(textfile, selection): replacetext = "person" # text = word_tokenize(textfile) # print(text) # sentences = sent_tokenize(textfile) if selection == "B": target_word = r"\bbitch\b" elif selection == "N": target_word = r"\bnigg\b" elif selection == "all": target_word = r"\bshit\b" else: target_word = None print("selection:", selection, "target_word:", target_word) if target_word: print("target word was found, ", target_word) print(text) textfile = re.sub(target_word, replacetext, textfile, flags=re.IGNORECASE) return textfile