Spaces:

sunnychenxiwang
/

EasyDetect

Sleeping

File size: 9,062 Bytes

d916065

# Natural Language Toolkit: Semantic Interpretation
#
# Author: Ewan Klein <[email protected]>
#
# Copyright (C) 2001-2023 NLTK Project
# URL: <https://www.nltk.org/>
# For license information, see LICENSE.TXT

"""

Utility functions for batch-processing sentences: parsing and

extraction of the semantic representation of the root node of the the

syntax tree, followed by evaluation of the semantic representation in

a first-order model.

"""

import codecs

from nltk.sem import evaluate

##############################################################
## Utility functions for connecting parse output to semantics
##############################################################


def parse_sents(inputs, grammar, trace=0):
    """

    Convert input sentences into syntactic trees.



    :param inputs: sentences to be parsed

    :type inputs: list(str)

    :param grammar: ``FeatureGrammar`` or name of feature-based grammar

    :type grammar: nltk.grammar.FeatureGrammar

    :rtype: list(nltk.tree.Tree) or dict(list(str)): list(Tree)

    :return: a mapping from input sentences to a list of ``Tree`` instances.

    """
    # put imports here to avoid circult dependencies
    from nltk.grammar import FeatureGrammar
    from nltk.parse import FeatureChartParser, load_parser

    if isinstance(grammar, FeatureGrammar):
        cp = FeatureChartParser(grammar)
    else:
        cp = load_parser(grammar, trace=trace)
    parses = []
    for sent in inputs:
        tokens = sent.split()  # use a tokenizer?
        syntrees = list(cp.parse(tokens))
        parses.append(syntrees)
    return parses


def root_semrep(syntree, semkey="SEM"):
    """

    Find the semantic representation at the root of a tree.



    :param syntree: a parse ``Tree``

    :param semkey: the feature label to use for the root semantics in the tree

    :return: the semantic representation at the root of a ``Tree``

    :rtype: sem.Expression

    """
    from nltk.grammar import FeatStructNonterminal

    node = syntree.label()
    assert isinstance(node, FeatStructNonterminal)
    try:
        return node[semkey]
    except KeyError:
        print(node, end=" ")
        print("has no specification for the feature %s" % semkey)
    raise


def interpret_sents(inputs, grammar, semkey="SEM", trace=0):
    """

    Add the semantic representation to each syntactic parse tree

    of each input sentence.



    :param inputs: a list of sentences

    :type inputs: list(str)

    :param grammar: ``FeatureGrammar`` or name of feature-based grammar

    :type grammar: nltk.grammar.FeatureGrammar

    :return: a mapping from sentences to lists of pairs (parse-tree, semantic-representations)

    :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression)))

    """
    return [
        [(syn, root_semrep(syn, semkey)) for syn in syntrees]
        for syntrees in parse_sents(inputs, grammar, trace=trace)
    ]


def evaluate_sents(inputs, grammar, model, assignment, trace=0):
    """

    Add the truth-in-a-model value to each semantic representation

    for each syntactic parse of each input sentences.



    :param inputs: a list of sentences

    :type inputs: list(str)

    :param grammar: ``FeatureGrammar`` or name of feature-based grammar

    :type grammar: nltk.grammar.FeatureGrammar

    :return: a mapping from sentences to lists of triples (parse-tree, semantic-representations, evaluation-in-model)

    :rtype: list(list(tuple(nltk.tree.Tree, nltk.sem.logic.ConstantExpression, bool or dict(str): bool)))

    """
    return [
        [
            (syn, sem, model.evaluate("%s" % sem, assignment, trace=trace))
            for (syn, sem) in interpretations
        ]
        for interpretations in interpret_sents(inputs, grammar)
    ]


def demo_model0():
    global m0, g0
    # Initialize a valuation of non-logical constants."""
    v = [
        ("john", "b1"),
        ("mary", "g1"),
        ("suzie", "g2"),
        ("fido", "d1"),
        ("tess", "d2"),
        ("noosa", "n"),
        ("girl", {"g1", "g2"}),
        ("boy", {"b1", "b2"}),
        ("dog", {"d1", "d2"}),
        ("bark", {"d1", "d2"}),
        ("walk", {"b1", "g2", "d1"}),
        ("chase", {("b1", "g1"), ("b2", "g1"), ("g1", "d1"), ("g2", "d2")}),
        (
            "see",
            {("b1", "g1"), ("b2", "d2"), ("g1", "b1"), ("d2", "b1"), ("g2", "n")},
        ),
        ("in", {("b1", "n"), ("b2", "n"), ("d2", "n")}),
        ("with", {("b1", "g1"), ("g1", "b1"), ("d1", "b1"), ("b1", "d1")}),
    ]
    # Read in the data from ``v``
    val = evaluate.Valuation(v)
    # Bind ``dom`` to the ``domain`` property of ``val``
    dom = val.domain
    # Initialize a model with parameters ``dom`` and ``val``.
    m0 = evaluate.Model(dom, val)
    # Initialize a variable assignment with parameter ``dom``
    g0 = evaluate.Assignment(dom)


def read_sents(filename, encoding="utf8"):
    with codecs.open(filename, "r", encoding) as fp:
        sents = [l.rstrip() for l in fp]

    # get rid of blank lines
    sents = [l for l in sents if len(l) > 0]
    sents = [l for l in sents if not l[0] == "#"]
    return sents


def demo_legacy_grammar():
    """

    Check that interpret_sents() is compatible with legacy grammars that use

    a lowercase 'sem' feature.



    Define 'test.fcfg' to be the following



    """
    from nltk.grammar import FeatureGrammar

    g = FeatureGrammar.fromstring(
        """

    % start S

    S[sem=<hello>] -> 'hello'

    """
    )
    print("Reading grammar: %s" % g)
    print("*" * 20)
    for reading in interpret_sents(["hello"], g, semkey="sem"):
        syn, sem = reading[0]
        print()
        print("output: ", sem)


def demo():
    import sys
    from optparse import OptionParser

    description = """

    Parse and evaluate some sentences.

    """

    opts = OptionParser(description=description)

    opts.set_defaults(
        evaluate=True,
        beta=True,
        syntrace=0,
        semtrace=0,
        demo="default",
        grammar="",
        sentences="",
    )

    opts.add_option(
        "-d",
        "--demo",
        dest="demo",
        help="choose demo D; omit this for the default demo, or specify 'chat80'",
        metavar="D",
    )
    opts.add_option(
        "-g", "--gram", dest="grammar", help="read in grammar G", metavar="G"
    )
    opts.add_option(
        "-m",
        "--model",
        dest="model",
        help="import model M (omit '.py' suffix)",
        metavar="M",
    )
    opts.add_option(
        "-s",
        "--sentences",
        dest="sentences",
        help="read in a file of test sentences S",
        metavar="S",
    )
    opts.add_option(
        "-e",
        "--no-eval",
        action="store_false",
        dest="evaluate",
        help="just do a syntactic analysis",
    )
    opts.add_option(
        "-b",
        "--no-beta-reduction",
        action="store_false",
        dest="beta",
        help="don't carry out beta-reduction",
    )
    opts.add_option(
        "-t",
        "--syntrace",
        action="count",
        dest="syntrace",
        help="set syntactic tracing on; requires '-e' option",
    )
    opts.add_option(
        "-T",
        "--semtrace",
        action="count",
        dest="semtrace",
        help="set semantic tracing on",
    )

    (options, args) = opts.parse_args()

    SPACER = "-" * 30

    demo_model0()

    sents = [
        "Fido sees a boy with Mary",
        "John sees Mary",
        "every girl chases a dog",
        "every boy chases a girl",
        "John walks with a girl in Noosa",
        "who walks",
    ]

    gramfile = "grammars/sample_grammars/sem2.fcfg"

    if options.sentences:
        sentsfile = options.sentences
    if options.grammar:
        gramfile = options.grammar
    if options.model:
        exec("import %s as model" % options.model)

    if sents is None:
        sents = read_sents(sentsfile)

    # Set model and assignment
    model = m0
    g = g0

    if options.evaluate:
        evaluations = evaluate_sents(sents, gramfile, model, g, trace=options.semtrace)
    else:
        semreps = interpret_sents(sents, gramfile, trace=options.syntrace)

    for i, sent in enumerate(sents):
        n = 1
        print("\nSentence: %s" % sent)
        print(SPACER)
        if options.evaluate:

            for (syntree, semrep, value) in evaluations[i]:
                if isinstance(value, dict):
                    value = set(value.keys())
                print("%d:  %s" % (n, semrep))
                print(value)
                n += 1
        else:

            for (syntree, semrep) in semreps[i]:
                print("%d:  %s" % (n, semrep))
                n += 1


if __name__ == "__main__":
    demo()
    demo_legacy_grammar()