mskov commited on
Commit
5b1bf51
1 Parent(s): 0c5e4a4

Create replace_explitives.py

Browse files
Files changed (1) hide show
  1. replace_explitives.py +33 -0
replace_explitives.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.tokenize import word_tokenize, sent_tokenize
3
+ import re
4
+
5
+
6
+ nltk.download('punkt') # For tokenization
7
+ nltk.download('averaged_perceptron_tagger') # For POS tagging (optional)
8
+
9
+
10
+ def replace_explitives(textfile, selection):
11
+
12
+ replacetext = "person"
13
+
14
+ # text = word_tokenize(textfile)
15
+ # print(text)
16
+ # sentences = sent_tokenize(textfile)
17
+
18
+ if selection == "B":
19
+ target_word = r"\bbitch\b"
20
+ elif selection == "N":
21
+ target_word = r"\bnigg\b"
22
+ elif selection == "all":
23
+ target_word = r"\bshit\b"
24
+ else:
25
+ target_word = None
26
+
27
+ print("selection:", selection, "target_word:", target_word)
28
+
29
+ if target_word:
30
+ print("target word was found, ", target_word)
31
+ print(text)
32
+ textfile = re.sub(target_word, replacetext, textfile, flags=re.IGNORECASE)
33
+ return textfile