Spaces:
Sleeping
Sleeping
File size: 1,709 Bytes
d916065 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# Natural Language Toolkit (NLTK) Help
#
# Copyright (C) 2001-2023 NLTK Project
# Authors: Steven Bird <[email protected]>
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT
"""
Provide structured access to documentation.
"""
import re
from textwrap import wrap
from nltk.data import load
def brown_tagset(tagpattern=None):
_format_tagset("brown_tagset", tagpattern)
def claws5_tagset(tagpattern=None):
_format_tagset("claws5_tagset", tagpattern)
def upenn_tagset(tagpattern=None):
_format_tagset("upenn_tagset", tagpattern)
#####################################################################
# UTILITIES
#####################################################################
def _print_entries(tags, tagdict):
for tag in tags:
entry = tagdict[tag]
defn = [tag + ": " + entry[0]]
examples = wrap(
entry[1], width=75, initial_indent=" ", subsequent_indent=" "
)
print("\n".join(defn + examples))
def _format_tagset(tagset, tagpattern=None):
tagdict = load("help/tagsets/" + tagset + ".pickle")
if not tagpattern:
_print_entries(sorted(tagdict), tagdict)
elif tagpattern in tagdict:
_print_entries([tagpattern], tagdict)
else:
tagpattern = re.compile(tagpattern)
tags = [tag for tag in sorted(tagdict) if tagpattern.match(tag)]
if tags:
_print_entries(tags, tagdict)
else:
print("No matching tags found.")
if __name__ == "__main__":
brown_tagset(r"NN.*")
upenn_tagset(r".*\$")
claws5_tagset("UNDEFINED")
brown_tagset(r"NN")
|