Spaces:
Sleeping
Sleeping
# Natural Language Toolkit: Interface to Boxer | |
# <http://svn.ask.it.usyd.edu.au/trac/candc/wiki/boxer> | |
# | |
# Author: Dan Garrette <[email protected]> | |
# | |
# Copyright (C) 2001-2023 NLTK Project | |
# URL: <https://www.nltk.org/> | |
# For license information, see LICENSE.TXT | |
""" | |
An interface to Boxer. | |
This interface relies on the latest version of the development (subversion) version of | |
C&C and Boxer. | |
Usage | |
===== | |
Set the environment variable CANDC to the bin directory of your CandC installation. | |
The models directory should be in the CandC root directory. | |
For example:: | |
/path/to/candc/ | |
bin/ | |
candc | |
boxer | |
models/ | |
boxer/ | |
""" | |
import operator | |
import os | |
import re | |
import subprocess | |
import tempfile | |
from functools import reduce | |
from optparse import OptionParser | |
from nltk.internals import find_binary | |
from nltk.sem.drt import ( | |
DRS, | |
DrtApplicationExpression, | |
DrtEqualityExpression, | |
DrtNegatedExpression, | |
DrtOrExpression, | |
DrtParser, | |
DrtProposition, | |
DrtTokens, | |
DrtVariableExpression, | |
) | |
from nltk.sem.logic import ( | |
ExpectedMoreTokensException, | |
LogicalExpressionException, | |
UnexpectedTokenException, | |
Variable, | |
) | |
class Boxer: | |
""" | |
This class is an interface to Johan Bos's program Boxer, a wide-coverage | |
semantic parser that produces Discourse Representation Structures (DRSs). | |
""" | |
def __init__( | |
self, | |
boxer_drs_interpreter=None, | |
elimeq=False, | |
bin_dir=None, | |
verbose=False, | |
resolve=True, | |
): | |
""" | |
:param boxer_drs_interpreter: A class that converts from the | |
``AbstractBoxerDrs`` object hierarchy to a different object. The | |
default is ``NltkDrtBoxerDrsInterpreter``, which converts to the NLTK | |
DRT hierarchy. | |
:param elimeq: When set to true, Boxer removes all equalities from the | |
DRSs and discourse referents standing in the equality relation are | |
unified, but only if this can be done in a meaning-preserving manner. | |
:param resolve: When set to true, Boxer will resolve all anaphoric DRSs and perform merge-reduction. | |
Resolution follows Van der Sandt's theory of binding and accommodation. | |
""" | |
if boxer_drs_interpreter is None: | |
boxer_drs_interpreter = NltkDrtBoxerDrsInterpreter() | |
self._boxer_drs_interpreter = boxer_drs_interpreter | |
self._resolve = resolve | |
self._elimeq = elimeq | |
self.set_bin_dir(bin_dir, verbose) | |
def set_bin_dir(self, bin_dir, verbose=False): | |
self._candc_bin = self._find_binary("candc", bin_dir, verbose) | |
self._candc_models_path = os.path.normpath( | |
os.path.join(self._candc_bin[:-5], "../models") | |
) | |
self._boxer_bin = self._find_binary("boxer", bin_dir, verbose) | |
def interpret(self, input, discourse_id=None, question=False, verbose=False): | |
""" | |
Use Boxer to give a first order representation. | |
:param input: str Input sentence to parse | |
:param occur_index: bool Should predicates be occurrence indexed? | |
:param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate. | |
:return: ``drt.DrtExpression`` | |
""" | |
discourse_ids = [discourse_id] if discourse_id is not None else None | |
(d,) = self.interpret_multi_sents([[input]], discourse_ids, question, verbose) | |
if not d: | |
raise Exception(f'Unable to interpret: "{input}"') | |
return d | |
def interpret_multi(self, input, discourse_id=None, question=False, verbose=False): | |
""" | |
Use Boxer to give a first order representation. | |
:param input: list of str Input sentences to parse as a single discourse | |
:param occur_index: bool Should predicates be occurrence indexed? | |
:param discourse_id: str An identifier to be inserted to each occurrence-indexed predicate. | |
:return: ``drt.DrtExpression`` | |
""" | |
discourse_ids = [discourse_id] if discourse_id is not None else None | |
(d,) = self.interpret_multi_sents([input], discourse_ids, question, verbose) | |
if not d: | |
raise Exception(f'Unable to interpret: "{input}"') | |
return d | |
def interpret_sents( | |
self, inputs, discourse_ids=None, question=False, verbose=False | |
): | |
""" | |
Use Boxer to give a first order representation. | |
:param inputs: list of str Input sentences to parse as individual discourses | |
:param occur_index: bool Should predicates be occurrence indexed? | |
:param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate. | |
:return: list of ``drt.DrtExpression`` | |
""" | |
return self.interpret_multi_sents( | |
[[input] for input in inputs], discourse_ids, question, verbose | |
) | |
def interpret_multi_sents( | |
self, inputs, discourse_ids=None, question=False, verbose=False | |
): | |
""" | |
Use Boxer to give a first order representation. | |
:param inputs: list of list of str Input discourses to parse | |
:param occur_index: bool Should predicates be occurrence indexed? | |
:param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate. | |
:return: ``drt.DrtExpression`` | |
""" | |
if discourse_ids is not None: | |
assert len(inputs) == len(discourse_ids) | |
assert reduce(operator.and_, (id is not None for id in discourse_ids)) | |
use_disc_id = True | |
else: | |
discourse_ids = list(map(str, range(len(inputs)))) | |
use_disc_id = False | |
candc_out = self._call_candc(inputs, discourse_ids, question, verbose=verbose) | |
boxer_out = self._call_boxer(candc_out, verbose=verbose) | |
# if 'ERROR: input file contains no ccg/2 terms.' in boxer_out: | |
# raise UnparseableInputException('Could not parse with candc: "%s"' % input_str) | |
drs_dict = self._parse_to_drs_dict(boxer_out, use_disc_id) | |
return [drs_dict.get(id, None) for id in discourse_ids] | |
def _call_candc(self, inputs, discourse_ids, question, verbose=False): | |
""" | |
Call the ``candc`` binary with the given input. | |
:param inputs: list of list of str Input discourses to parse | |
:param discourse_ids: list of str Identifiers to be inserted to each occurrence-indexed predicate. | |
:param filename: str A filename for the output file | |
:return: stdout | |
""" | |
args = [ | |
"--models", | |
os.path.join(self._candc_models_path, ["boxer", "questions"][question]), | |
"--candc-printer", | |
"boxer", | |
] | |
return self._call( | |
"\n".join( | |
sum( | |
([f"<META>'{id}'"] + d for d, id in zip(inputs, discourse_ids)), | |
[], | |
) | |
), | |
self._candc_bin, | |
args, | |
verbose, | |
) | |
def _call_boxer(self, candc_out, verbose=False): | |
""" | |
Call the ``boxer`` binary with the given input. | |
:param candc_out: str output from C&C parser | |
:return: stdout | |
""" | |
f = None | |
try: | |
fd, temp_filename = tempfile.mkstemp( | |
prefix="boxer-", suffix=".in", text=True | |
) | |
f = os.fdopen(fd, "w") | |
f.write(candc_out.decode("utf-8")) | |
finally: | |
if f: | |
f.close() | |
args = [ | |
"--box", | |
"false", | |
"--semantics", | |
"drs", | |
#'--flat', 'false', # removed from boxer | |
"--resolve", | |
["false", "true"][self._resolve], | |
"--elimeq", | |
["false", "true"][self._elimeq], | |
"--format", | |
"prolog", | |
"--instantiate", | |
"true", | |
"--input", | |
temp_filename, | |
] | |
stdout = self._call(None, self._boxer_bin, args, verbose) | |
os.remove(temp_filename) | |
return stdout | |
def _find_binary(self, name, bin_dir, verbose=False): | |
return find_binary( | |
name, | |
path_to_bin=bin_dir, | |
env_vars=["CANDC"], | |
url="http://svn.ask.it.usyd.edu.au/trac/candc/", | |
binary_names=[name, name + ".exe"], | |
verbose=verbose, | |
) | |
def _call(self, input_str, binary, args=[], verbose=False): | |
""" | |
Call the binary with the given input. | |
:param input_str: A string whose contents are used as stdin. | |
:param binary: The location of the binary to call | |
:param args: A list of command-line arguments. | |
:return: stdout | |
""" | |
if verbose: | |
print("Calling:", binary) | |
print("Args:", args) | |
print("Input:", input_str) | |
print("Command:", binary + " " + " ".join(args)) | |
# Call via a subprocess | |
if input_str is None: | |
cmd = [binary] + args | |
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
else: | |
cmd = 'echo "{}" | {} {}'.format(input_str, binary, " ".join(args)) | |
p = subprocess.Popen( | |
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True | |
) | |
stdout, stderr = p.communicate() | |
if verbose: | |
print("Return code:", p.returncode) | |
if stdout: | |
print("stdout:\n", stdout, "\n") | |
if stderr: | |
print("stderr:\n", stderr, "\n") | |
if p.returncode != 0: | |
raise Exception( | |
"ERROR CALLING: {} {}\nReturncode: {}\n{}".format( | |
binary, " ".join(args), p.returncode, stderr | |
) | |
) | |
return stdout | |
def _parse_to_drs_dict(self, boxer_out, use_disc_id): | |
lines = boxer_out.decode("utf-8").split("\n") | |
drs_dict = {} | |
i = 0 | |
while i < len(lines): | |
line = lines[i] | |
if line.startswith("id("): | |
comma_idx = line.index(",") | |
discourse_id = line[3:comma_idx] | |
if discourse_id[0] == "'" and discourse_id[-1] == "'": | |
discourse_id = discourse_id[1:-1] | |
drs_id = line[comma_idx + 1 : line.index(")")] | |
i += 1 | |
line = lines[i] | |
assert line.startswith(f"sem({drs_id},") | |
if line[-4:] == "').'": | |
line = line[:-4] + ")." | |
assert line.endswith(")."), f"can't parse line: {line}" | |
search_start = len(f"sem({drs_id},[") | |
brace_count = 1 | |
drs_start = -1 | |
for j, c in enumerate(line[search_start:]): | |
if c == "[": | |
brace_count += 1 | |
if c == "]": | |
brace_count -= 1 | |
if brace_count == 0: | |
drs_start = search_start + j + 1 | |
if line[drs_start : drs_start + 3] == "','": | |
drs_start = drs_start + 3 | |
else: | |
drs_start = drs_start + 1 | |
break | |
assert drs_start > -1 | |
drs_input = line[drs_start:-2].strip() | |
parsed = self._parse_drs(drs_input, discourse_id, use_disc_id) | |
drs_dict[discourse_id] = self._boxer_drs_interpreter.interpret(parsed) | |
i += 1 | |
return drs_dict | |
def _parse_drs(self, drs_string, discourse_id, use_disc_id): | |
return BoxerOutputDrsParser([None, discourse_id][use_disc_id]).parse(drs_string) | |
class BoxerOutputDrsParser(DrtParser): | |
def __init__(self, discourse_id=None): | |
""" | |
This class is used to parse the Prolog DRS output from Boxer into a | |
hierarchy of python objects. | |
""" | |
DrtParser.__init__(self) | |
self.discourse_id = discourse_id | |
self.sentence_id_offset = None | |
self.quote_chars = [("'", "'", "\\", False)] | |
def parse(self, data, signature=None): | |
return DrtParser.parse(self, data, signature) | |
def get_all_symbols(self): | |
return ["(", ")", ",", "[", "]", ":"] | |
def handle(self, tok, context): | |
return self.handle_drs(tok) | |
def attempt_adjuncts(self, expression, context): | |
return expression | |
def parse_condition(self, indices): | |
""" | |
Parse a DRS condition | |
:return: list of ``DrtExpression`` | |
""" | |
tok = self.token() | |
accum = self.handle_condition(tok, indices) | |
if accum is None: | |
raise UnexpectedTokenException(tok) | |
return accum | |
def handle_drs(self, tok): | |
if tok == "drs": | |
return self.parse_drs() | |
elif tok in ["merge", "smerge"]: | |
return self._handle_binary_expression(self._make_merge_expression)(None, []) | |
elif tok in ["alfa"]: | |
return self._handle_alfa(self._make_merge_expression)(None, []) | |
def handle_condition(self, tok, indices): | |
""" | |
Handle a DRS condition | |
:param indices: list of int | |
:return: list of ``DrtExpression`` | |
""" | |
if tok == "not": | |
return [self._handle_not()] | |
if tok == "or": | |
conds = [self._handle_binary_expression(self._make_or_expression)] | |
elif tok == "imp": | |
conds = [self._handle_binary_expression(self._make_imp_expression)] | |
elif tok == "eq": | |
conds = [self._handle_eq()] | |
elif tok == "prop": | |
conds = [self._handle_prop()] | |
elif tok == "pred": | |
conds = [self._handle_pred()] | |
elif tok == "named": | |
conds = [self._handle_named()] | |
elif tok == "rel": | |
conds = [self._handle_rel()] | |
elif tok == "timex": | |
conds = self._handle_timex() | |
elif tok == "card": | |
conds = [self._handle_card()] | |
elif tok == "whq": | |
conds = [self._handle_whq()] | |
elif tok == "duplex": | |
conds = [self._handle_duplex()] | |
else: | |
conds = [] | |
return sum( | |
( | |
[cond(sent_index, word_indices) for cond in conds] | |
for sent_index, word_indices in self._sent_and_word_indices(indices) | |
), | |
[], | |
) | |
def _handle_not(self): | |
self.assertToken(self.token(), "(") | |
drs = self.process_next_expression(None) | |
self.assertToken(self.token(), ")") | |
return BoxerNot(drs) | |
def _handle_pred(self): | |
# pred(_G3943, dog, n, 0) | |
self.assertToken(self.token(), "(") | |
variable = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
name = self.token() | |
self.assertToken(self.token(), ",") | |
pos = self.token() | |
self.assertToken(self.token(), ",") | |
sense = int(self.token()) | |
self.assertToken(self.token(), ")") | |
def _handle_pred_f(sent_index, word_indices): | |
return BoxerPred( | |
self.discourse_id, sent_index, word_indices, variable, name, pos, sense | |
) | |
return _handle_pred_f | |
def _handle_duplex(self): | |
# duplex(whq, drs(...), var, drs(...)) | |
self.assertToken(self.token(), "(") | |
# self.assertToken(self.token(), '[') | |
ans_types = [] | |
# while self.token(0) != ']': | |
# cat = self.token() | |
# self.assertToken(self.token(), ':') | |
# if cat == 'des': | |
# ans_types.append(self.token()) | |
# elif cat == 'num': | |
# ans_types.append('number') | |
# typ = self.token() | |
# if typ == 'cou': | |
# ans_types.append('count') | |
# else: | |
# ans_types.append(typ) | |
# else: | |
# ans_types.append(self.token()) | |
# self.token() #swallow the ']' | |
self.assertToken(self.token(), "whq") | |
self.assertToken(self.token(), ",") | |
d1 = self.process_next_expression(None) | |
self.assertToken(self.token(), ",") | |
ref = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
d2 = self.process_next_expression(None) | |
self.assertToken(self.token(), ")") | |
return lambda sent_index, word_indices: BoxerWhq( | |
self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2 | |
) | |
def _handle_named(self): | |
# named(x0, john, per, 0) | |
self.assertToken(self.token(), "(") | |
variable = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
name = self.token() | |
self.assertToken(self.token(), ",") | |
type = self.token() | |
self.assertToken(self.token(), ",") | |
sense = self.token() # as per boxer rev 2554 | |
self.assertToken(self.token(), ")") | |
return lambda sent_index, word_indices: BoxerNamed( | |
self.discourse_id, sent_index, word_indices, variable, name, type, sense | |
) | |
def _handle_rel(self): | |
# rel(_G3993, _G3943, agent, 0) | |
self.assertToken(self.token(), "(") | |
var1 = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
var2 = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
rel = self.token() | |
self.assertToken(self.token(), ",") | |
sense = int(self.token()) | |
self.assertToken(self.token(), ")") | |
return lambda sent_index, word_indices: BoxerRel( | |
self.discourse_id, sent_index, word_indices, var1, var2, rel, sense | |
) | |
def _handle_timex(self): | |
# timex(_G18322, date([]: (+), []:'XXXX', [1004]:'04', []:'XX')) | |
self.assertToken(self.token(), "(") | |
arg = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
new_conds = self._handle_time_expression(arg) | |
self.assertToken(self.token(), ")") | |
return new_conds | |
def _handle_time_expression(self, arg): | |
# date([]: (+), []:'XXXX', [1004]:'04', []:'XX') | |
tok = self.token() | |
self.assertToken(self.token(), "(") | |
if tok == "date": | |
conds = self._handle_date(arg) | |
elif tok == "time": | |
conds = self._handle_time(arg) | |
else: | |
return None | |
self.assertToken(self.token(), ")") | |
return [ | |
lambda sent_index, word_indices: BoxerPred( | |
self.discourse_id, sent_index, word_indices, arg, tok, "n", 0 | |
) | |
] + [lambda sent_index, word_indices: cond for cond in conds] | |
def _handle_date(self, arg): | |
# []: (+), []:'XXXX', [1004]:'04', []:'XX' | |
conds = [] | |
((sent_index, word_indices),) = self._sent_and_word_indices( | |
self._parse_index_list() | |
) | |
self.assertToken(self.token(), "(") | |
pol = self.token() | |
self.assertToken(self.token(), ")") | |
conds.append( | |
BoxerPred( | |
self.discourse_id, | |
sent_index, | |
word_indices, | |
arg, | |
f"date_pol_{pol}", | |
"a", | |
0, | |
) | |
) | |
self.assertToken(self.token(), ",") | |
((sent_index, word_indices),) = self._sent_and_word_indices( | |
self._parse_index_list() | |
) | |
year = self.token() | |
if year != "XXXX": | |
year = year.replace(":", "_") | |
conds.append( | |
BoxerPred( | |
self.discourse_id, | |
sent_index, | |
word_indices, | |
arg, | |
f"date_year_{year}", | |
"a", | |
0, | |
) | |
) | |
self.assertToken(self.token(), ",") | |
((sent_index, word_indices),) = self._sent_and_word_indices( | |
self._parse_index_list() | |
) | |
month = self.token() | |
if month != "XX": | |
conds.append( | |
BoxerPred( | |
self.discourse_id, | |
sent_index, | |
word_indices, | |
arg, | |
f"date_month_{month}", | |
"a", | |
0, | |
) | |
) | |
self.assertToken(self.token(), ",") | |
((sent_index, word_indices),) = self._sent_and_word_indices( | |
self._parse_index_list() | |
) | |
day = self.token() | |
if day != "XX": | |
conds.append( | |
BoxerPred( | |
self.discourse_id, | |
sent_index, | |
word_indices, | |
arg, | |
f"date_day_{day}", | |
"a", | |
0, | |
) | |
) | |
return conds | |
def _handle_time(self, arg): | |
# time([1018]:'18', []:'XX', []:'XX') | |
conds = [] | |
self._parse_index_list() | |
hour = self.token() | |
if hour != "XX": | |
conds.append(self._make_atom("r_hour_2", arg, hour)) | |
self.assertToken(self.token(), ",") | |
self._parse_index_list() | |
min = self.token() | |
if min != "XX": | |
conds.append(self._make_atom("r_min_2", arg, min)) | |
self.assertToken(self.token(), ",") | |
self._parse_index_list() | |
sec = self.token() | |
if sec != "XX": | |
conds.append(self._make_atom("r_sec_2", arg, sec)) | |
return conds | |
def _handle_card(self): | |
# card(_G18535, 28, ge) | |
self.assertToken(self.token(), "(") | |
variable = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
value = self.token() | |
self.assertToken(self.token(), ",") | |
type = self.token() | |
self.assertToken(self.token(), ")") | |
return lambda sent_index, word_indices: BoxerCard( | |
self.discourse_id, sent_index, word_indices, variable, value, type | |
) | |
def _handle_prop(self): | |
# prop(_G15949, drs(...)) | |
self.assertToken(self.token(), "(") | |
variable = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
drs = self.process_next_expression(None) | |
self.assertToken(self.token(), ")") | |
return lambda sent_index, word_indices: BoxerProp( | |
self.discourse_id, sent_index, word_indices, variable, drs | |
) | |
def _parse_index_list(self): | |
# [1001,1002]: | |
indices = [] | |
self.assertToken(self.token(), "[") | |
while self.token(0) != "]": | |
indices.append(self.parse_index()) | |
if self.token(0) == ",": | |
self.token() # swallow ',' | |
self.token() # swallow ']' | |
self.assertToken(self.token(), ":") | |
return indices | |
def parse_drs(self): | |
# drs([[1001]:_G3943], | |
# [[1002]:pred(_G3943, dog, n, 0)] | |
# ) | |
self.assertToken(self.token(), "(") | |
self.assertToken(self.token(), "[") | |
refs = set() | |
while self.token(0) != "]": | |
indices = self._parse_index_list() | |
refs.add(self.parse_variable()) | |
if self.token(0) == ",": | |
self.token() # swallow ',' | |
self.token() # swallow ']' | |
self.assertToken(self.token(), ",") | |
self.assertToken(self.token(), "[") | |
conds = [] | |
while self.token(0) != "]": | |
indices = self._parse_index_list() | |
conds.extend(self.parse_condition(indices)) | |
if self.token(0) == ",": | |
self.token() # swallow ',' | |
self.token() # swallow ']' | |
self.assertToken(self.token(), ")") | |
return BoxerDrs(list(refs), conds) | |
def _handle_binary_expression(self, make_callback): | |
self.assertToken(self.token(), "(") | |
drs1 = self.process_next_expression(None) | |
self.assertToken(self.token(), ",") | |
drs2 = self.process_next_expression(None) | |
self.assertToken(self.token(), ")") | |
return lambda sent_index, word_indices: make_callback( | |
sent_index, word_indices, drs1, drs2 | |
) | |
def _handle_alfa(self, make_callback): | |
self.assertToken(self.token(), "(") | |
type = self.token() | |
self.assertToken(self.token(), ",") | |
drs1 = self.process_next_expression(None) | |
self.assertToken(self.token(), ",") | |
drs2 = self.process_next_expression(None) | |
self.assertToken(self.token(), ")") | |
return lambda sent_index, word_indices: make_callback( | |
sent_index, word_indices, drs1, drs2 | |
) | |
def _handle_eq(self): | |
self.assertToken(self.token(), "(") | |
var1 = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
var2 = self.parse_variable() | |
self.assertToken(self.token(), ")") | |
return lambda sent_index, word_indices: BoxerEq( | |
self.discourse_id, sent_index, word_indices, var1, var2 | |
) | |
def _handle_whq(self): | |
self.assertToken(self.token(), "(") | |
self.assertToken(self.token(), "[") | |
ans_types = [] | |
while self.token(0) != "]": | |
cat = self.token() | |
self.assertToken(self.token(), ":") | |
if cat == "des": | |
ans_types.append(self.token()) | |
elif cat == "num": | |
ans_types.append("number") | |
typ = self.token() | |
if typ == "cou": | |
ans_types.append("count") | |
else: | |
ans_types.append(typ) | |
else: | |
ans_types.append(self.token()) | |
self.token() # swallow the ']' | |
self.assertToken(self.token(), ",") | |
d1 = self.process_next_expression(None) | |
self.assertToken(self.token(), ",") | |
ref = self.parse_variable() | |
self.assertToken(self.token(), ",") | |
d2 = self.process_next_expression(None) | |
self.assertToken(self.token(), ")") | |
return lambda sent_index, word_indices: BoxerWhq( | |
self.discourse_id, sent_index, word_indices, ans_types, d1, ref, d2 | |
) | |
def _make_merge_expression(self, sent_index, word_indices, drs1, drs2): | |
return BoxerDrs(drs1.refs + drs2.refs, drs1.conds + drs2.conds) | |
def _make_or_expression(self, sent_index, word_indices, drs1, drs2): | |
return BoxerOr(self.discourse_id, sent_index, word_indices, drs1, drs2) | |
def _make_imp_expression(self, sent_index, word_indices, drs1, drs2): | |
return BoxerDrs(drs1.refs, drs1.conds, drs2) | |
def parse_variable(self): | |
var = self.token() | |
assert re.match(r"^[exps]\d+$", var), var | |
return var | |
def parse_index(self): | |
return int(self.token()) | |
def _sent_and_word_indices(self, indices): | |
""" | |
:return: list of (sent_index, word_indices) tuples | |
""" | |
sent_indices = {(i / 1000) - 1 for i in indices if i >= 0} | |
if sent_indices: | |
pairs = [] | |
for sent_index in sent_indices: | |
word_indices = [ | |
(i % 1000) - 1 for i in indices if sent_index == (i / 1000) - 1 | |
] | |
pairs.append((sent_index, word_indices)) | |
return pairs | |
else: | |
word_indices = [(i % 1000) - 1 for i in indices] | |
return [(None, word_indices)] | |
class BoxerDrsParser(DrtParser): | |
""" | |
Reparse the str form of subclasses of ``AbstractBoxerDrs`` | |
""" | |
def __init__(self, discourse_id=None): | |
DrtParser.__init__(self) | |
self.discourse_id = discourse_id | |
def get_all_symbols(self): | |
return [ | |
DrtTokens.OPEN, | |
DrtTokens.CLOSE, | |
DrtTokens.COMMA, | |
DrtTokens.OPEN_BRACKET, | |
DrtTokens.CLOSE_BRACKET, | |
] | |
def attempt_adjuncts(self, expression, context): | |
return expression | |
def handle(self, tok, context): | |
try: | |
# if tok == 'drs': | |
# self.assertNextToken(DrtTokens.OPEN) | |
# label = int(self.token()) | |
# self.assertNextToken(DrtTokens.COMMA) | |
# refs = list(map(int, self.handle_refs())) | |
# self.assertNextToken(DrtTokens.COMMA) | |
# conds = self.handle_conds(None) | |
# self.assertNextToken(DrtTokens.CLOSE) | |
# return BoxerDrs(label, refs, conds) | |
if tok == "pred": | |
self.assertNextToken(DrtTokens.OPEN) | |
disc_id = ( | |
self.discourse_id if self.discourse_id is not None else self.token() | |
) | |
self.assertNextToken(DrtTokens.COMMA) | |
sent_id = self.nullableIntToken() | |
self.assertNextToken(DrtTokens.COMMA) | |
word_ids = list(map(int, self.handle_refs())) | |
self.assertNextToken(DrtTokens.COMMA) | |
variable = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
name = self.token() | |
self.assertNextToken(DrtTokens.COMMA) | |
pos = self.token() | |
self.assertNextToken(DrtTokens.COMMA) | |
sense = int(self.token()) | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerPred(disc_id, sent_id, word_ids, variable, name, pos, sense) | |
elif tok == "named": | |
self.assertNextToken(DrtTokens.OPEN) | |
disc_id = ( | |
self.discourse_id if self.discourse_id is not None else self.token() | |
) | |
self.assertNextToken(DrtTokens.COMMA) | |
sent_id = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
word_ids = map(int, self.handle_refs()) | |
self.assertNextToken(DrtTokens.COMMA) | |
variable = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
name = self.token() | |
self.assertNextToken(DrtTokens.COMMA) | |
type = self.token() | |
self.assertNextToken(DrtTokens.COMMA) | |
sense = int(self.token()) | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerNamed( | |
disc_id, sent_id, word_ids, variable, name, type, sense | |
) | |
elif tok == "rel": | |
self.assertNextToken(DrtTokens.OPEN) | |
disc_id = ( | |
self.discourse_id if self.discourse_id is not None else self.token() | |
) | |
self.assertNextToken(DrtTokens.COMMA) | |
sent_id = self.nullableIntToken() | |
self.assertNextToken(DrtTokens.COMMA) | |
word_ids = list(map(int, self.handle_refs())) | |
self.assertNextToken(DrtTokens.COMMA) | |
var1 = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
var2 = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
rel = self.token() | |
self.assertNextToken(DrtTokens.COMMA) | |
sense = int(self.token()) | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerRel(disc_id, sent_id, word_ids, var1, var2, rel, sense) | |
elif tok == "prop": | |
self.assertNextToken(DrtTokens.OPEN) | |
disc_id = ( | |
self.discourse_id if self.discourse_id is not None else self.token() | |
) | |
self.assertNextToken(DrtTokens.COMMA) | |
sent_id = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
word_ids = list(map(int, self.handle_refs())) | |
self.assertNextToken(DrtTokens.COMMA) | |
variable = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
drs = self.process_next_expression(None) | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerProp(disc_id, sent_id, word_ids, variable, drs) | |
elif tok == "not": | |
self.assertNextToken(DrtTokens.OPEN) | |
drs = self.process_next_expression(None) | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerNot(drs) | |
elif tok == "imp": | |
self.assertNextToken(DrtTokens.OPEN) | |
drs1 = self.process_next_expression(None) | |
self.assertNextToken(DrtTokens.COMMA) | |
drs2 = self.process_next_expression(None) | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerDrs(drs1.refs, drs1.conds, drs2) | |
elif tok == "or": | |
self.assertNextToken(DrtTokens.OPEN) | |
disc_id = ( | |
self.discourse_id if self.discourse_id is not None else self.token() | |
) | |
self.assertNextToken(DrtTokens.COMMA) | |
sent_id = self.nullableIntToken() | |
self.assertNextToken(DrtTokens.COMMA) | |
word_ids = map(int, self.handle_refs()) | |
self.assertNextToken(DrtTokens.COMMA) | |
drs1 = self.process_next_expression(None) | |
self.assertNextToken(DrtTokens.COMMA) | |
drs2 = self.process_next_expression(None) | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerOr(disc_id, sent_id, word_ids, drs1, drs2) | |
elif tok == "eq": | |
self.assertNextToken(DrtTokens.OPEN) | |
disc_id = ( | |
self.discourse_id if self.discourse_id is not None else self.token() | |
) | |
self.assertNextToken(DrtTokens.COMMA) | |
sent_id = self.nullableIntToken() | |
self.assertNextToken(DrtTokens.COMMA) | |
word_ids = list(map(int, self.handle_refs())) | |
self.assertNextToken(DrtTokens.COMMA) | |
var1 = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
var2 = int(self.token()) | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerEq(disc_id, sent_id, word_ids, var1, var2) | |
elif tok == "card": | |
self.assertNextToken(DrtTokens.OPEN) | |
disc_id = ( | |
self.discourse_id if self.discourse_id is not None else self.token() | |
) | |
self.assertNextToken(DrtTokens.COMMA) | |
sent_id = self.nullableIntToken() | |
self.assertNextToken(DrtTokens.COMMA) | |
word_ids = map(int, self.handle_refs()) | |
self.assertNextToken(DrtTokens.COMMA) | |
var = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
value = self.token() | |
self.assertNextToken(DrtTokens.COMMA) | |
type = self.token() | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerCard(disc_id, sent_id, word_ids, var, value, type) | |
elif tok == "whq": | |
self.assertNextToken(DrtTokens.OPEN) | |
disc_id = ( | |
self.discourse_id if self.discourse_id is not None else self.token() | |
) | |
self.assertNextToken(DrtTokens.COMMA) | |
sent_id = self.nullableIntToken() | |
self.assertNextToken(DrtTokens.COMMA) | |
word_ids = list(map(int, self.handle_refs())) | |
self.assertNextToken(DrtTokens.COMMA) | |
ans_types = self.handle_refs() | |
self.assertNextToken(DrtTokens.COMMA) | |
drs1 = self.process_next_expression(None) | |
self.assertNextToken(DrtTokens.COMMA) | |
var = int(self.token()) | |
self.assertNextToken(DrtTokens.COMMA) | |
drs2 = self.process_next_expression(None) | |
self.assertNextToken(DrtTokens.CLOSE) | |
return BoxerWhq(disc_id, sent_id, word_ids, ans_types, drs1, var, drs2) | |
except Exception as e: | |
raise LogicalExpressionException(self._currentIndex, str(e)) from e | |
assert False, repr(tok) | |
def nullableIntToken(self): | |
t = self.token() | |
return int(t) if t != "None" else None | |
def get_next_token_variable(self, description): | |
try: | |
return self.token() | |
except ExpectedMoreTokensException as e: | |
raise ExpectedMoreTokensException(e.index, "Variable expected.") from e | |
class AbstractBoxerDrs: | |
def variables(self): | |
""" | |
:return: (set<variables>, set<events>, set<propositions>) | |
""" | |
variables, events, propositions = self._variables() | |
return (variables - (events | propositions), events, propositions - events) | |
def variable_types(self): | |
vartypes = {} | |
for t, vars in zip(("z", "e", "p"), self.variables()): | |
for v in vars: | |
vartypes[v] = t | |
return vartypes | |
def _variables(self): | |
""" | |
:return: (set<variables>, set<events>, set<propositions>) | |
""" | |
return (set(), set(), set()) | |
def atoms(self): | |
return set() | |
def clean(self): | |
return self | |
def _clean_name(self, name): | |
return name.replace("-", "_").replace("'", "_") | |
def renumber_sentences(self, f): | |
return self | |
def __hash__(self): | |
return hash(f"{self}") | |
class BoxerDrs(AbstractBoxerDrs): | |
def __init__(self, refs, conds, consequent=None): | |
AbstractBoxerDrs.__init__(self) | |
self.refs = refs | |
self.conds = conds | |
self.consequent = consequent | |
def _variables(self): | |
variables = (set(), set(), set()) | |
for cond in self.conds: | |
for s, v in zip(variables, cond._variables()): | |
s.update(v) | |
if self.consequent is not None: | |
for s, v in zip(variables, self.consequent._variables()): | |
s.update(v) | |
return variables | |
def atoms(self): | |
atoms = reduce(operator.or_, (cond.atoms() for cond in self.conds), set()) | |
if self.consequent is not None: | |
atoms.update(self.consequent.atoms()) | |
return atoms | |
def clean(self): | |
consequent = self.consequent.clean() if self.consequent else None | |
return BoxerDrs(self.refs, [c.clean() for c in self.conds], consequent) | |
def renumber_sentences(self, f): | |
consequent = self.consequent.renumber_sentences(f) if self.consequent else None | |
return BoxerDrs( | |
self.refs, [c.renumber_sentences(f) for c in self.conds], consequent | |
) | |
def __repr__(self): | |
s = "drs([{}], [{}])".format( | |
", ".join("%s" % r for r in self.refs), | |
", ".join("%s" % c for c in self.conds), | |
) | |
if self.consequent is not None: | |
s = f"imp({s}, {self.consequent})" | |
return s | |
def __eq__(self, other): | |
return ( | |
self.__class__ == other.__class__ | |
and self.refs == other.refs | |
and len(self.conds) == len(other.conds) | |
and reduce( | |
operator.and_, (c1 == c2 for c1, c2 in zip(self.conds, other.conds)) | |
) | |
and self.consequent == other.consequent | |
) | |
def __ne__(self, other): | |
return not self == other | |
__hash__ = AbstractBoxerDrs.__hash__ | |
class BoxerNot(AbstractBoxerDrs): | |
def __init__(self, drs): | |
AbstractBoxerDrs.__init__(self) | |
self.drs = drs | |
def _variables(self): | |
return self.drs._variables() | |
def atoms(self): | |
return self.drs.atoms() | |
def clean(self): | |
return BoxerNot(self.drs.clean()) | |
def renumber_sentences(self, f): | |
return BoxerNot(self.drs.renumber_sentences(f)) | |
def __repr__(self): | |
return "not(%s)" % (self.drs) | |
def __eq__(self, other): | |
return self.__class__ == other.__class__ and self.drs == other.drs | |
def __ne__(self, other): | |
return not self == other | |
__hash__ = AbstractBoxerDrs.__hash__ | |
class BoxerIndexed(AbstractBoxerDrs): | |
def __init__(self, discourse_id, sent_index, word_indices): | |
AbstractBoxerDrs.__init__(self) | |
self.discourse_id = discourse_id | |
self.sent_index = sent_index | |
self.word_indices = word_indices | |
def atoms(self): | |
return {self} | |
def __eq__(self, other): | |
return ( | |
self.__class__ == other.__class__ | |
and self.discourse_id == other.discourse_id | |
and self.sent_index == other.sent_index | |
and self.word_indices == other.word_indices | |
and reduce(operator.and_, (s == o for s, o in zip(self, other))) | |
) | |
def __ne__(self, other): | |
return not self == other | |
__hash__ = AbstractBoxerDrs.__hash__ | |
def __repr__(self): | |
s = "{}({}, {}, [{}]".format( | |
self._pred(), | |
self.discourse_id, | |
self.sent_index, | |
", ".join("%s" % wi for wi in self.word_indices), | |
) | |
for v in self: | |
s += ", %s" % v | |
return s + ")" | |
class BoxerPred(BoxerIndexed): | |
def __init__(self, discourse_id, sent_index, word_indices, var, name, pos, sense): | |
BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) | |
self.var = var | |
self.name = name | |
self.pos = pos | |
self.sense = sense | |
def _variables(self): | |
return ({self.var}, set(), set()) | |
def change_var(self, var): | |
return BoxerPred( | |
self.discourse_id, | |
self.sent_index, | |
self.word_indices, | |
var, | |
self.name, | |
self.pos, | |
self.sense, | |
) | |
def clean(self): | |
return BoxerPred( | |
self.discourse_id, | |
self.sent_index, | |
self.word_indices, | |
self.var, | |
self._clean_name(self.name), | |
self.pos, | |
self.sense, | |
) | |
def renumber_sentences(self, f): | |
new_sent_index = f(self.sent_index) | |
return BoxerPred( | |
self.discourse_id, | |
new_sent_index, | |
self.word_indices, | |
self.var, | |
self.name, | |
self.pos, | |
self.sense, | |
) | |
def __iter__(self): | |
return iter((self.var, self.name, self.pos, self.sense)) | |
def _pred(self): | |
return "pred" | |
class BoxerNamed(BoxerIndexed): | |
def __init__(self, discourse_id, sent_index, word_indices, var, name, type, sense): | |
BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) | |
self.var = var | |
self.name = name | |
self.type = type | |
self.sense = sense | |
def _variables(self): | |
return ({self.var}, set(), set()) | |
def change_var(self, var): | |
return BoxerNamed( | |
self.discourse_id, | |
self.sent_index, | |
self.word_indices, | |
var, | |
self.name, | |
self.type, | |
self.sense, | |
) | |
def clean(self): | |
return BoxerNamed( | |
self.discourse_id, | |
self.sent_index, | |
self.word_indices, | |
self.var, | |
self._clean_name(self.name), | |
self.type, | |
self.sense, | |
) | |
def renumber_sentences(self, f): | |
return BoxerNamed( | |
self.discourse_id, | |
f(self.sent_index), | |
self.word_indices, | |
self.var, | |
self.name, | |
self.type, | |
self.sense, | |
) | |
def __iter__(self): | |
return iter((self.var, self.name, self.type, self.sense)) | |
def _pred(self): | |
return "named" | |
class BoxerRel(BoxerIndexed): | |
def __init__(self, discourse_id, sent_index, word_indices, var1, var2, rel, sense): | |
BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) | |
self.var1 = var1 | |
self.var2 = var2 | |
self.rel = rel | |
self.sense = sense | |
def _variables(self): | |
return ({self.var1, self.var2}, set(), set()) | |
def clean(self): | |
return BoxerRel( | |
self.discourse_id, | |
self.sent_index, | |
self.word_indices, | |
self.var1, | |
self.var2, | |
self._clean_name(self.rel), | |
self.sense, | |
) | |
def renumber_sentences(self, f): | |
return BoxerRel( | |
self.discourse_id, | |
f(self.sent_index), | |
self.word_indices, | |
self.var1, | |
self.var2, | |
self.rel, | |
self.sense, | |
) | |
def __iter__(self): | |
return iter((self.var1, self.var2, self.rel, self.sense)) | |
def _pred(self): | |
return "rel" | |
class BoxerProp(BoxerIndexed): | |
def __init__(self, discourse_id, sent_index, word_indices, var, drs): | |
BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) | |
self.var = var | |
self.drs = drs | |
def _variables(self): | |
return tuple( | |
map(operator.or_, (set(), set(), {self.var}), self.drs._variables()) | |
) | |
def referenced_labels(self): | |
return {self.drs} | |
def atoms(self): | |
return self.drs.atoms() | |
def clean(self): | |
return BoxerProp( | |
self.discourse_id, | |
self.sent_index, | |
self.word_indices, | |
self.var, | |
self.drs.clean(), | |
) | |
def renumber_sentences(self, f): | |
return BoxerProp( | |
self.discourse_id, | |
f(self.sent_index), | |
self.word_indices, | |
self.var, | |
self.drs.renumber_sentences(f), | |
) | |
def __iter__(self): | |
return iter((self.var, self.drs)) | |
def _pred(self): | |
return "prop" | |
class BoxerEq(BoxerIndexed): | |
def __init__(self, discourse_id, sent_index, word_indices, var1, var2): | |
BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) | |
self.var1 = var1 | |
self.var2 = var2 | |
def _variables(self): | |
return ({self.var1, self.var2}, set(), set()) | |
def atoms(self): | |
return set() | |
def renumber_sentences(self, f): | |
return BoxerEq( | |
self.discourse_id, | |
f(self.sent_index), | |
self.word_indices, | |
self.var1, | |
self.var2, | |
) | |
def __iter__(self): | |
return iter((self.var1, self.var2)) | |
def _pred(self): | |
return "eq" | |
class BoxerCard(BoxerIndexed): | |
def __init__(self, discourse_id, sent_index, word_indices, var, value, type): | |
BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) | |
self.var = var | |
self.value = value | |
self.type = type | |
def _variables(self): | |
return ({self.var}, set(), set()) | |
def renumber_sentences(self, f): | |
return BoxerCard( | |
self.discourse_id, | |
f(self.sent_index), | |
self.word_indices, | |
self.var, | |
self.value, | |
self.type, | |
) | |
def __iter__(self): | |
return iter((self.var, self.value, self.type)) | |
def _pred(self): | |
return "card" | |
class BoxerOr(BoxerIndexed): | |
def __init__(self, discourse_id, sent_index, word_indices, drs1, drs2): | |
BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) | |
self.drs1 = drs1 | |
self.drs2 = drs2 | |
def _variables(self): | |
return tuple(map(operator.or_, self.drs1._variables(), self.drs2._variables())) | |
def atoms(self): | |
return self.drs1.atoms() | self.drs2.atoms() | |
def clean(self): | |
return BoxerOr( | |
self.discourse_id, | |
self.sent_index, | |
self.word_indices, | |
self.drs1.clean(), | |
self.drs2.clean(), | |
) | |
def renumber_sentences(self, f): | |
return BoxerOr( | |
self.discourse_id, | |
f(self.sent_index), | |
self.word_indices, | |
self.drs1, | |
self.drs2, | |
) | |
def __iter__(self): | |
return iter((self.drs1, self.drs2)) | |
def _pred(self): | |
return "or" | |
class BoxerWhq(BoxerIndexed): | |
def __init__( | |
self, discourse_id, sent_index, word_indices, ans_types, drs1, variable, drs2 | |
): | |
BoxerIndexed.__init__(self, discourse_id, sent_index, word_indices) | |
self.ans_types = ans_types | |
self.drs1 = drs1 | |
self.variable = variable | |
self.drs2 = drs2 | |
def _variables(self): | |
return tuple( | |
map( | |
operator.or_, | |
({self.variable}, set(), set()), | |
self.drs1._variables(), | |
self.drs2._variables(), | |
) | |
) | |
def atoms(self): | |
return self.drs1.atoms() | self.drs2.atoms() | |
def clean(self): | |
return BoxerWhq( | |
self.discourse_id, | |
self.sent_index, | |
self.word_indices, | |
self.ans_types, | |
self.drs1.clean(), | |
self.variable, | |
self.drs2.clean(), | |
) | |
def renumber_sentences(self, f): | |
return BoxerWhq( | |
self.discourse_id, | |
f(self.sent_index), | |
self.word_indices, | |
self.ans_types, | |
self.drs1, | |
self.variable, | |
self.drs2, | |
) | |
def __iter__(self): | |
return iter( | |
("[" + ",".join(self.ans_types) + "]", self.drs1, self.variable, self.drs2) | |
) | |
def _pred(self): | |
return "whq" | |
class PassthroughBoxerDrsInterpreter: | |
def interpret(self, ex): | |
return ex | |
class NltkDrtBoxerDrsInterpreter: | |
def __init__(self, occur_index=False): | |
self._occur_index = occur_index | |
def interpret(self, ex): | |
""" | |
:param ex: ``AbstractBoxerDrs`` | |
:return: ``DrtExpression`` | |
""" | |
if isinstance(ex, BoxerDrs): | |
drs = DRS( | |
[Variable(r) for r in ex.refs], list(map(self.interpret, ex.conds)) | |
) | |
if ex.consequent is not None: | |
drs.consequent = self.interpret(ex.consequent) | |
return drs | |
elif isinstance(ex, BoxerNot): | |
return DrtNegatedExpression(self.interpret(ex.drs)) | |
elif isinstance(ex, BoxerPred): | |
pred = self._add_occur_indexing(f"{ex.pos}_{ex.name}", ex) | |
return self._make_atom(pred, ex.var) | |
elif isinstance(ex, BoxerNamed): | |
pred = self._add_occur_indexing(f"ne_{ex.type}_{ex.name}", ex) | |
return self._make_atom(pred, ex.var) | |
elif isinstance(ex, BoxerRel): | |
pred = self._add_occur_indexing("%s" % (ex.rel), ex) | |
return self._make_atom(pred, ex.var1, ex.var2) | |
elif isinstance(ex, BoxerProp): | |
return DrtProposition(Variable(ex.var), self.interpret(ex.drs)) | |
elif isinstance(ex, BoxerEq): | |
return DrtEqualityExpression( | |
DrtVariableExpression(Variable(ex.var1)), | |
DrtVariableExpression(Variable(ex.var2)), | |
) | |
elif isinstance(ex, BoxerCard): | |
pred = self._add_occur_indexing(f"card_{ex.type}_{ex.value}", ex) | |
return self._make_atom(pred, ex.var) | |
elif isinstance(ex, BoxerOr): | |
return DrtOrExpression(self.interpret(ex.drs1), self.interpret(ex.drs2)) | |
elif isinstance(ex, BoxerWhq): | |
drs1 = self.interpret(ex.drs1) | |
drs2 = self.interpret(ex.drs2) | |
return DRS(drs1.refs + drs2.refs, drs1.conds + drs2.conds) | |
assert False, f"{ex.__class__.__name__}: {ex}" | |
def _make_atom(self, pred, *args): | |
accum = DrtVariableExpression(Variable(pred)) | |
for arg in args: | |
accum = DrtApplicationExpression( | |
accum, DrtVariableExpression(Variable(arg)) | |
) | |
return accum | |
def _add_occur_indexing(self, base, ex): | |
if self._occur_index and ex.sent_index is not None: | |
if ex.discourse_id: | |
base += "_%s" % ex.discourse_id | |
base += "_s%s" % ex.sent_index | |
base += "_w%s" % sorted(ex.word_indices)[0] | |
return base | |
class UnparseableInputException(Exception): | |
pass | |
if __name__ == "__main__": | |
opts = OptionParser("usage: %prog TEXT [options]") | |
opts.add_option( | |
"--verbose", | |
"-v", | |
help="display verbose logs", | |
action="store_true", | |
default=False, | |
dest="verbose", | |
) | |
opts.add_option( | |
"--fol", "-f", help="output FOL", action="store_true", default=False, dest="fol" | |
) | |
opts.add_option( | |
"--question", | |
"-q", | |
help="input is a question", | |
action="store_true", | |
default=False, | |
dest="question", | |
) | |
opts.add_option( | |
"--occur", | |
"-o", | |
help="occurrence index", | |
action="store_true", | |
default=False, | |
dest="occur_index", | |
) | |
(options, args) = opts.parse_args() | |
if len(args) != 1: | |
opts.error("incorrect number of arguments") | |
interpreter = NltkDrtBoxerDrsInterpreter(occur_index=options.occur_index) | |
drs = Boxer(interpreter).interpret_multi( | |
args[0].split(r"\n"), question=options.question, verbose=options.verbose | |
) | |
if drs is None: | |
print(None) | |
else: | |
drs = drs.simplify().eliminate_equality() | |
if options.fol: | |
print(drs.fol().normalize()) | |
else: | |
drs.pretty_print() | |