Spaces:
Sleeping
Sleeping
# Natural Language Toolkit: Semantic Interpretation | |
# | |
# Author: Ewan Klein <[email protected]> | |
# | |
# Copyright (C) 2001-2023 NLTK Project | |
# URL: <https://www.nltk.org/> | |
# For license information, see LICENSE.TXT | |
""" | |
Utility functions for batch-processing sentences: parsing and | |
extraction of the semantic representation of the root node of the the | |
syntax tree, followed by evaluation of the semantic representation in | |
a first-order model. | |
""" | |
import codecs | |
from nltk.sem import evaluate | |
############################################################## | |
## Utility functions for connecting parse output to semantics | |
############################################################## | |
def parse_sents(inputs, grammar, trace=0): | |
""" | |
Convert input sentences into syntactic trees. | |
:param inputs: sentences to be parsed | |
:type inputs: list(str) | |
:param grammar: ``FeatureGrammar`` or name of feature-based grammar | |
:type grammar: nltk.grammar.FeatureGrammar | |
:rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree) | |
:return: a mapping from input sentences to a list of ``Tree`` instances. | |
""" | |
# put imports here to avoid circult dependencies | |
from nltk.grammar import FeatureGrammar | |
from nltk.parse import FeatureChartParser, load_parser | |
if isinstance(grammar, FeatureGrammar): | |
cp = FeatureChartParser(grammar) | |
else: | |
cp = load_parser(grammar, trace=trace) | |
parses = [] | |
for sent in inputs: | |
tokens = sent.split() # use a tokenizer? | |
syntrees = list(cp.parse(tokens)) | |
parses.append(syntrees) | |
return parses | |
def root_semrep(syntree, semkey="SEM"): | |
""" | |
Find the semantic representation at the root of a tree. | |
:param syntree: a parse ``Tree`` | |
:param semkey: the feature label to use for the root semantics in the tree | |
:return: the semantic representation at the root of a ``Tree`` | |
:rtype: sem.Expression | |
""" | |
from nltk.grammar import FeatStructNonterminal | |
node = syntree.label() | |
assert isinstance(node, FeatStructNonterminal) | |
try: | |
return node[semkey] | |
except KeyError: | |
print(node, end=" ") | |
print("has no specification for the feature %s" % semkey) | |
raise | |
def interpret_sents(inputs, grammar, semkey="SEM", trace=0): | |
""" | |
Add the semantic representation to each syntactic parse tree | |
of each input sentence. | |
:param inputs: a list of sentences | |
:type inputs: list(str) | |
:param grammar: ``FeatureGrammar`` or name of feature-based grammar | |
:type grammar: nltk.grammar.FeatureGrammar | |
:return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations) | |
:rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression))) | |
""" | |
return [ | |
[(syn, root_semrep(syn, semkey)) for syn in syntrees] | |
for syntrees in parse_sents(inputs, grammar, trace=trace) | |
] | |
def evaluate_sents(inputs, grammar, model, assignment, trace=0): | |
""" | |
Add the truth-in-a-model value to each semantic representation | |
for each syntactic parse of each input sentences. | |
:param inputs: a list of sentences | |
:type inputs: list(str) | |
:param grammar: ``FeatureGrammar`` or name of feature-based grammar | |
:type grammar: nltk.grammar.FeatureGrammar | |
:return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model) | |
:rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool))) | |
""" | |
return [ | |
[ | |
(syn, sem, model.evaluate("%s" % sem, assignment, trace=trace)) | |
for (syn, sem) in interpretations | |
] | |
for interpretations in interpret_sents(inputs, grammar) | |
] | |
def demo_model0(): | |
global m0, g0 | |
# Initialize a valuation of non-logical constants.""" | |
v = [ | |
("john", "b1"), | |
("mary", "g1"), | |
("suzie", "g2"), | |
("fido", "d1"), | |
("tess", "d2"), | |
("noosa", "n"), | |
("girl", {"g1", "g2"}), | |
("boy", {"b1", "b2"}), | |
("dog", {"d1", "d2"}), | |
("bark", {"d1", "d2"}), | |
("walk", {"b1", "g2", "d1"}), | |
("chase", {("b1", "g1"), ("b2", "g1"), ("g1", "d1"), ("g2", "d2")}), | |
( | |
"see", | |
{("b1", "g1"), ("b2", "d2"), ("g1", "b1"), ("d2", "b1"), ("g2", "n")}, | |
), | |
("in", {("b1", "n"), ("b2", "n"), ("d2", "n")}), | |
("with", {("b1", "g1"), ("g1", "b1"), ("d1", "b1"), ("b1", "d1")}), | |
] | |
# Read in the data from ``v`` | |
val = evaluate.Valuation(v) | |
# Bind ``dom`` to the ``domain`` property of ``val`` | |
dom = val.domain | |
# Initialize a model with parameters ``dom`` and ``val``. | |
m0 = evaluate.Model(dom, val) | |
# Initialize a variable assignment with parameter ``dom`` | |
g0 = evaluate.Assignment(dom) | |
def read_sents(filename, encoding="utf8"): | |
with codecs.open(filename, "r", encoding) as fp: | |
sents = [l.rstrip() for l in fp] | |
# get rid of blank lines | |
sents = [l for l in sents if len(l) > 0] | |
sents = [l for l in sents if not l[0] == "#"] | |
return sents | |
def demo_legacy_grammar(): | |
""" | |
Check that interpret_sents() is compatible with legacy grammars that use | |
a lowercase 'sem' feature. | |
Define 'test.fcfg' to be the following | |
""" | |
from nltk.grammar import FeatureGrammar | |
g = FeatureGrammar.fromstring( | |
""" | |
% start S | |
S[sem=<hello>] -> 'hello' | |
""" | |
) | |
print("Reading grammar: %s" % g) | |
print("*" * 20) | |
for reading in interpret_sents(["hello"], g, semkey="sem"): | |
syn, sem = reading[0] | |
print() | |
print("output: ", sem) | |
def demo(): | |
import sys | |
from optparse import OptionParser | |
description = """ | |
Parse and evaluate some sentences. | |
""" | |
opts = OptionParser(description=description) | |
opts.set_defaults( | |
evaluate=True, | |
beta=True, | |
syntrace=0, | |
semtrace=0, | |
demo="default", | |
grammar="", | |
sentences="", | |
) | |
opts.add_option( | |
"-d", | |
"--demo", | |
dest="demo", | |
help="choose demo D; omit this for the default demo, or specify 'chat80'", | |
metavar="D", | |
) | |
opts.add_option( | |
"-g", "--gram", dest="grammar", help="read in grammar G", metavar="G" | |
) | |
opts.add_option( | |
"-m", | |
"--model", | |
dest="model", | |
help="import model M (omit '.py' suffix)", | |
metavar="M", | |
) | |
opts.add_option( | |
"-s", | |
"--sentences", | |
dest="sentences", | |
help="read in a file of test sentences S", | |
metavar="S", | |
) | |
opts.add_option( | |
"-e", | |
"--no-eval", | |
action="store_false", | |
dest="evaluate", | |
help="just do a syntactic analysis", | |
) | |
opts.add_option( | |
"-b", | |
"--no-beta-reduction", | |
action="store_false", | |
dest="beta", | |
help="don't carry out beta-reduction", | |
) | |
opts.add_option( | |
"-t", | |
"--syntrace", | |
action="count", | |
dest="syntrace", | |
help="set syntactic tracing on; requires '-e' option", | |
) | |
opts.add_option( | |
"-T", | |
"--semtrace", | |
action="count", | |
dest="semtrace", | |
help="set semantic tracing on", | |
) | |
(options, args) = opts.parse_args() | |
SPACER = "-" * 30 | |
demo_model0() | |
sents = [ | |
"Fido sees a boy with Mary", | |
"John sees Mary", | |
"every girl chases a dog", | |
"every boy chases a girl", | |
"John walks with a girl in Noosa", | |
"who walks", | |
] | |
gramfile = "grammars/sample_grammars/sem2.fcfg" | |
if options.sentences: | |
sentsfile = options.sentences | |
if options.grammar: | |
gramfile = options.grammar | |
if options.model: | |
exec("import %s as model" % options.model) | |
if sents is None: | |
sents = read_sents(sentsfile) | |
# Set model and assignment | |
model = m0 | |
g = g0 | |
if options.evaluate: | |
evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace) | |
else: | |
semreps = interpret_sents(sents, gramfile, trace=options.syntrace) | |
for i, sent in enumerate(sents): | |
n = 1 | |
print("\nSentence: %s" % sent) | |
print(SPACER) | |
if options.evaluate: | |
for (syntree, semrep, value) in evaluations[i]: | |
if isinstance(value, dict): | |
value = set(value.keys()) | |
print("%d: %s" % (n, semrep)) | |
print(value) | |
n += 1 | |
else: | |
for (syntree, semrep) in semreps[i]: | |
print("%d: %s" % (n, semrep)) | |
n += 1 | |
if __name__ == "__main__": | |
demo() | |
demo_legacy_grammar() | |