import regex as re import nltk def load_words_from_file(file_path): with open(file_path, "r", encoding="utf-8") as f: words = [line.strip() for line in f.readlines()] return words def sub_explitives(textfile, selection): replacetext = "person" # Load target words from text files b_word_list = load_words_from_file("b_word.txt") n_word_list = load_words_from_file("n_word.txt") expletives_list = load_words_from_file("expletives.txt") # text = word_tokenize(textfile) # print(text) # sentences = sent_tokenize(textfile) if selection == "B-Word": target_word = b_word_list elif selection == "N-Word": target_word = n_word_list elif selection == "All Explitives": target_word = expletives_list else: target_word = [] print("selection:", selection, "target_word:", target_word) lines = textfile.split('\n') if target_word: print("target word was found, ", target_word) print(textfile) for i, line in enumerate(lines): for target_word in target_words: pattern = r"\b" + re.escape(target_word) + r"\b" # textfile = re.sub(target_word, replacetext, textfile, flags=re.IGNORECASE) lines[i] = re.sub(pattern, replacetext, lines[i], flags=re.IGNORECASE) textfile = '\n'.join(lines) return textfile