mclear1 commited on
Commit
b316eef
·
verified ·
1 Parent(s): 9ac51b1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -3
README.md CHANGED
@@ -1,3 +1,24 @@
1
- ---
2
- license: llama2
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.tokenize import word_tokenize
3
+ import re
4
+
5
+ # Load the document
6
+ with open('document.txt', 'r') as f:
7
+ text = f.read()
8
+
9
+ # Preprocess the text
10
+ tokens = word_tokenize(text.lower())
11
+ tokens = [t for t in tokens if t.isalpha()] # remove non-alpha characters
12
+
13
+ # Define key words
14
+ key_words = ['chronic kidney disease', 'heart failure', 'cirrhosis', 'ascites', 'ESRD', 'liver disease']
15
+
16
+ # Use regex to find key words
17
+ found_key_words = []
18
+ for key_word in key_words:
19
+ pattern = re.compile(r'\b' + key_word + r'\b')
20
+ if pattern.search(text):
21
+ found_key_words.append(key_word)
22
+
23
+ # Return the list of key words
24
+ print(found_key_words)