Spaces:
Sleeping
Sleeping
# Natural Language Toolkit: Chunk parsing API | |
# | |
# Copyright (C) 2001-2023 NLTK Project | |
# Author: Edward Loper <[email protected]> | |
# Steven Bird <[email protected]> (minor additions) | |
# URL: <https://www.nltk.org/> | |
# For license information, see LICENSE.TXT | |
##////////////////////////////////////////////////////// | |
## Chunk Parser Interface | |
##////////////////////////////////////////////////////// | |
from nltk.chunk.util import ChunkScore | |
from nltk.internals import deprecated | |
from nltk.parse import ParserI | |
class ChunkParserI(ParserI): | |
""" | |
A processing interface for identifying non-overlapping groups in | |
unrestricted text. Typically, chunk parsers are used to find base | |
syntactic constituents, such as base noun phrases. Unlike | |
``ParserI``, ``ChunkParserI`` guarantees that the ``parse()`` method | |
will always generate a parse. | |
""" | |
def parse(self, tokens): | |
""" | |
Return the best chunk structure for the given tokens | |
and return a tree. | |
:param tokens: The list of (word, tag) tokens to be chunked. | |
:type tokens: list(tuple) | |
:rtype: Tree | |
""" | |
raise NotImplementedError() | |
def evaluate(self, gold): | |
return self.accuracy(gold) | |
def accuracy(self, gold): | |
""" | |
Score the accuracy of the chunker against the gold standard. | |
Remove the chunking the gold standard text, rechunk it using | |
the chunker, and return a ``ChunkScore`` object | |
reflecting the performance of this chunk parser. | |
:type gold: list(Tree) | |
:param gold: The list of chunked sentences to score the chunker on. | |
:rtype: ChunkScore | |
""" | |
chunkscore = ChunkScore() | |
for correct in gold: | |
chunkscore.score(correct, self.parse(correct.leaves())) | |
return chunkscore | |