Speech-Trigger-Detection / replace_explitives.py
mskov's picture
Update replace_explitives.py
18a25c3
raw
history blame
No virus
857 Bytes
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
import re
nltk.download('punkt') # For tokenization
nltk.download('averaged_perceptron_tagger') # For POS tagging (optional)
def sub_explitives(textfile, selection):
replacetext = "person"
# text = word_tokenize(textfile)
# print(text)
# sentences = sent_tokenize(textfile)
if selection == "B":
target_word = r"\bbitch\b"
elif selection == "N":
target_word = r"\bnigg\b"
elif selection == "all":
target_word = r"\bshit\b"
else:
target_word = None
print("selection:", selection, "target_word:", target_word)
if target_word:
print("target word was found, ", target_word)
print(text)
textfile = re.sub(target_word, replacetext, textfile, flags=re.IGNORECASE)
return textfile