Spaces:
Sleeping
Sleeping
# Natural Language Toolkit: CFG visualization | |
# | |
# Copyright (C) 2001-2023 NLTK Project | |
# Author: Edward Loper <[email protected]> | |
# URL: <https://www.nltk.org/> | |
# For license information, see LICENSE.TXT | |
""" | |
Visualization tools for CFGs. | |
""" | |
# Idea for a nice demo: | |
# - 3 panes: grammar, treelet, working area | |
# - grammar is a list of productions | |
# - when you select a production, the treelet that it licenses appears | |
# in the treelet area | |
# - the working area has the text on the bottom, and S at top. When | |
# you select a production, it shows (ghosted) the locations where | |
# that production's treelet could be attached to either the text | |
# or the tree rooted at S. | |
# - the user can drag the treelet onto one of those (or click on them?) | |
# - the user can delete pieces of the tree from the working area | |
# (right click?) | |
# - connecting top to bottom? drag one NP onto another? | |
# | |
# +-------------------------------------------------------------+ | |
# | S -> NP VP | S | | |
# |[NP -> Det N ]| / \ | | |
# | ... | NP VP | | |
# | N -> 'dog' | | | |
# | N -> 'cat' | | | |
# | ... | | | |
# +--------------+ | | |
# | NP | Det N | | |
# | / \ | | | | | |
# | Det N | the cat saw the dog | | |
# | | | | |
# +--------------+----------------------------------------------+ | |
# | |
# Operations: | |
# - connect a new treelet -- drag or click shadow | |
# - delete a treelet -- right click | |
# - if only connected to top, delete everything below | |
# - if only connected to bottom, delete everything above | |
# - connect top & bottom -- drag a leaf to a root or a root to a leaf | |
# - disconnect top & bottom -- right click | |
# - if connected to top & bottom, then disconnect | |
import re | |
from tkinter import ( | |
Button, | |
Canvas, | |
Entry, | |
Frame, | |
IntVar, | |
Label, | |
Scrollbar, | |
Text, | |
Tk, | |
Toplevel, | |
) | |
from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment | |
from nltk.draw.util import ( | |
CanvasFrame, | |
ColorizedList, | |
ShowText, | |
SymbolWidget, | |
TextWidget, | |
) | |
from nltk.grammar import CFG, Nonterminal, _read_cfg_production, nonterminals | |
from nltk.tree import Tree | |
###################################################################### | |
# Production List | |
###################################################################### | |
class ProductionList(ColorizedList): | |
ARROW = SymbolWidget.SYMBOLS["rightarrow"] | |
def _init_colortags(self, textwidget, options): | |
textwidget.tag_config("terminal", foreground="#006000") | |
textwidget.tag_config("arrow", font="symbol", underline="0") | |
textwidget.tag_config( | |
"nonterminal", foreground="blue", font=("helvetica", -12, "bold") | |
) | |
def _item_repr(self, item): | |
contents = [] | |
contents.append(("%s\t" % item.lhs(), "nonterminal")) | |
contents.append((self.ARROW, "arrow")) | |
for elt in item.rhs(): | |
if isinstance(elt, Nonterminal): | |
contents.append((" %s" % elt.symbol(), "nonterminal")) | |
else: | |
contents.append((" %r" % elt, "terminal")) | |
return contents | |
###################################################################### | |
# CFG Editor | |
###################################################################### | |
_CFGEditor_HELP = """ | |
The CFG Editor can be used to create or modify context free grammars. | |
A context free grammar consists of a start symbol and a list of | |
productions. The start symbol is specified by the text entry field in | |
the upper right hand corner of the editor; and the list of productions | |
are specified in the main text editing box. | |
Every non-blank line specifies a single production. Each production | |
has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS | |
is a list of nonterminals and terminals. | |
Nonterminals must be a single word, such as S or NP or NP_subj. | |
Currently, nonterminals must consists of alphanumeric characters and | |
underscores (_). Nonterminals are colored blue. If you place the | |
mouse over any nonterminal, then all occurrences of that nonterminal | |
will be highlighted. | |
Terminals must be surrounded by single quotes (') or double | |
quotes(\"). For example, "dog" and "New York" are terminals. | |
Currently, the string within the quotes must consist of alphanumeric | |
characters, underscores, and spaces. | |
To enter a new production, go to a blank line, and type a nonterminal, | |
followed by an arrow (->), followed by a sequence of terminals and | |
nonterminals. Note that "->" (dash + greater-than) is automatically | |
converted to an arrow symbol. When you move your cursor to a | |
different line, your production will automatically be colorized. If | |
there are any errors, they will be highlighted in red. | |
Note that the order of the productions is significant for some | |
algorithms. To re-order the productions, use cut and paste to move | |
them. | |
Use the buttons at the bottom of the window when you are done editing | |
the CFG: | |
- Ok: apply the new CFG, and exit the editor. | |
- Apply: apply the new CFG, and do not exit the editor. | |
- Reset: revert to the original CFG, and do not exit the editor. | |
- Cancel: revert to the original CFG, and exit the editor. | |
""" | |
class CFGEditor: | |
""" | |
A dialog window for creating and editing context free grammars. | |
``CFGEditor`` imposes the following restrictions: | |
- All nonterminals must be strings consisting of word | |
characters. | |
- All terminals must be strings consisting of word characters | |
and space characters. | |
""" | |
# Regular expressions used by _analyze_line. Precompile them, so | |
# we can process the text faster. | |
ARROW = SymbolWidget.SYMBOLS["rightarrow"] | |
_LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))") | |
_ARROW_RE = re.compile(r"\s*(->|(" + ARROW + r"))\s*") | |
_PRODUCTION_RE = re.compile( | |
r"(^\s*\w+\s*)" | |
+ "(->|(" # LHS | |
+ ARROW | |
+ r"))\s*" | |
+ r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$" # arrow | |
) # RHS | |
_TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")") | |
_BOLD = ("helvetica", -12, "bold") | |
def __init__(self, parent, cfg=None, set_cfg_callback=None): | |
self._parent = parent | |
if cfg is not None: | |
self._cfg = cfg | |
else: | |
self._cfg = CFG(Nonterminal("S"), []) | |
self._set_cfg_callback = set_cfg_callback | |
self._highlight_matching_nonterminals = 1 | |
# Create the top-level window. | |
self._top = Toplevel(parent) | |
self._init_bindings() | |
self._init_startframe() | |
self._startframe.pack(side="top", fill="x", expand=0) | |
self._init_prodframe() | |
self._prodframe.pack(side="top", fill="both", expand=1) | |
self._init_buttons() | |
self._buttonframe.pack(side="bottom", fill="x", expand=0) | |
self._textwidget.focus() | |
def _init_startframe(self): | |
frame = self._startframe = Frame(self._top) | |
self._start = Entry(frame) | |
self._start.pack(side="right") | |
Label(frame, text="Start Symbol:").pack(side="right") | |
Label(frame, text="Productions:").pack(side="left") | |
self._start.insert(0, self._cfg.start().symbol()) | |
def _init_buttons(self): | |
frame = self._buttonframe = Frame(self._top) | |
Button(frame, text="Ok", command=self._ok, underline=0, takefocus=0).pack( | |
side="left" | |
) | |
Button(frame, text="Apply", command=self._apply, underline=0, takefocus=0).pack( | |
side="left" | |
) | |
Button(frame, text="Reset", command=self._reset, underline=0, takefocus=0).pack( | |
side="left" | |
) | |
Button( | |
frame, text="Cancel", command=self._cancel, underline=0, takefocus=0 | |
).pack(side="left") | |
Button(frame, text="Help", command=self._help, underline=0, takefocus=0).pack( | |
side="right" | |
) | |
def _init_bindings(self): | |
self._top.title("CFG Editor") | |
self._top.bind("<Control-q>", self._cancel) | |
self._top.bind("<Alt-q>", self._cancel) | |
self._top.bind("<Control-d>", self._cancel) | |
# self._top.bind('<Control-x>', self._cancel) | |
self._top.bind("<Alt-x>", self._cancel) | |
self._top.bind("<Escape>", self._cancel) | |
# self._top.bind('<Control-c>', self._cancel) | |
self._top.bind("<Alt-c>", self._cancel) | |
self._top.bind("<Control-o>", self._ok) | |
self._top.bind("<Alt-o>", self._ok) | |
self._top.bind("<Control-a>", self._apply) | |
self._top.bind("<Alt-a>", self._apply) | |
self._top.bind("<Control-r>", self._reset) | |
self._top.bind("<Alt-r>", self._reset) | |
self._top.bind("<Control-h>", self._help) | |
self._top.bind("<Alt-h>", self._help) | |
self._top.bind("<F1>", self._help) | |
def _init_prodframe(self): | |
self._prodframe = Frame(self._top) | |
# Create the basic Text widget & scrollbar. | |
self._textwidget = Text( | |
self._prodframe, background="#e0e0e0", exportselection=1 | |
) | |
self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient="vertical") | |
self._textwidget.config(yscrollcommand=self._textscroll.set) | |
self._textscroll.config(command=self._textwidget.yview) | |
self._textscroll.pack(side="right", fill="y") | |
self._textwidget.pack(expand=1, fill="both", side="left") | |
# Initialize the colorization tags. Each nonterminal gets its | |
# own tag, so they aren't listed here. | |
self._textwidget.tag_config("terminal", foreground="#006000") | |
self._textwidget.tag_config("arrow", font="symbol") | |
self._textwidget.tag_config("error", background="red") | |
# Keep track of what line they're on. We use that to remember | |
# to re-analyze a line whenever they leave it. | |
self._linenum = 0 | |
# Expand "->" to an arrow. | |
self._top.bind(">", self._replace_arrows) | |
# Re-colorize lines when appropriate. | |
self._top.bind("<<Paste>>", self._analyze) | |
self._top.bind("<KeyPress>", self._check_analyze) | |
self._top.bind("<ButtonPress>", self._check_analyze) | |
# Tab cycles focus. (why doesn't this work??) | |
def cycle(e, textwidget=self._textwidget): | |
textwidget.tk_focusNext().focus() | |
self._textwidget.bind("<Tab>", cycle) | |
prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()] | |
for i in range(len(prod_tuples) - 1, 0, -1): | |
if prod_tuples[i][0] == prod_tuples[i - 1][0]: | |
if () in prod_tuples[i][1]: | |
continue | |
if () in prod_tuples[i - 1][1]: | |
continue | |
print(prod_tuples[i - 1][1]) | |
print(prod_tuples[i][1]) | |
prod_tuples[i - 1][1].extend(prod_tuples[i][1]) | |
del prod_tuples[i] | |
for lhs, rhss in prod_tuples: | |
print(lhs, rhss) | |
s = "%s ->" % lhs | |
for rhs in rhss: | |
for elt in rhs: | |
if isinstance(elt, Nonterminal): | |
s += " %s" % elt | |
else: | |
s += " %r" % elt | |
s += " |" | |
s = s[:-2] + "\n" | |
self._textwidget.insert("end", s) | |
self._analyze() | |
# # Add the producitons to the text widget, and colorize them. | |
# prod_by_lhs = {} | |
# for prod in self._cfg.productions(): | |
# if len(prod.rhs()) > 0: | |
# prod_by_lhs.setdefault(prod.lhs(),[]).append(prod) | |
# for (lhs, prods) in prod_by_lhs.items(): | |
# self._textwidget.insert('end', '%s ->' % lhs) | |
# self._textwidget.insert('end', self._rhs(prods[0])) | |
# for prod in prods[1:]: | |
# print '\t|'+self._rhs(prod), | |
# self._textwidget.insert('end', '\t|'+self._rhs(prod)) | |
# self._textwidget.insert('end', '\n') | |
# for prod in self._cfg.productions(): | |
# if len(prod.rhs()) == 0: | |
# self._textwidget.insert('end', '%s' % prod) | |
# self._analyze() | |
# def _rhs(self, prod): | |
# s = '' | |
# for elt in prod.rhs(): | |
# if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol() | |
# else: s += ' %r' % elt | |
# return s | |
def _clear_tags(self, linenum): | |
""" | |
Remove all tags (except ``arrow`` and ``sel``) from the given | |
line of the text widget used for editing the productions. | |
""" | |
start = "%d.0" % linenum | |
end = "%d.end" % linenum | |
for tag in self._textwidget.tag_names(): | |
if tag not in ("arrow", "sel"): | |
self._textwidget.tag_remove(tag, start, end) | |
def _check_analyze(self, *e): | |
""" | |
Check if we've moved to a new line. If we have, then remove | |
all colorization from the line we moved to, and re-colorize | |
the line that we moved from. | |
""" | |
linenum = int(self._textwidget.index("insert").split(".")[0]) | |
if linenum != self._linenum: | |
self._clear_tags(linenum) | |
self._analyze_line(self._linenum) | |
self._linenum = linenum | |
def _replace_arrows(self, *e): | |
""" | |
Replace any ``'->'`` text strings with arrows (char \\256, in | |
symbol font). This searches the whole buffer, but is fast | |
enough to be done anytime they press '>'. | |
""" | |
arrow = "1.0" | |
while True: | |
arrow = self._textwidget.search("->", arrow, "end+1char") | |
if arrow == "": | |
break | |
self._textwidget.delete(arrow, arrow + "+2char") | |
self._textwidget.insert(arrow, self.ARROW, "arrow") | |
self._textwidget.insert(arrow, "\t") | |
arrow = "1.0" | |
while True: | |
arrow = self._textwidget.search(self.ARROW, arrow + "+1char", "end+1char") | |
if arrow == "": | |
break | |
self._textwidget.tag_add("arrow", arrow, arrow + "+1char") | |
def _analyze_token(self, match, linenum): | |
""" | |
Given a line number and a regexp match for a token on that | |
line, colorize the token. Note that the regexp match gives us | |
the token's text, start index (on the line), and end index (on | |
the line). | |
""" | |
# What type of token is it? | |
if match.group()[0] in "'\"": | |
tag = "terminal" | |
elif match.group() in ("->", self.ARROW): | |
tag = "arrow" | |
else: | |
# If it's a nonterminal, then set up new bindings, so we | |
# can highlight all instances of that nonterminal when we | |
# put the mouse over it. | |
tag = "nonterminal_" + match.group() | |
if tag not in self._textwidget.tag_names(): | |
self._init_nonterminal_tag(tag) | |
start = "%d.%d" % (linenum, match.start()) | |
end = "%d.%d" % (linenum, match.end()) | |
self._textwidget.tag_add(tag, start, end) | |
def _init_nonterminal_tag(self, tag, foreground="blue"): | |
self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD) | |
if not self._highlight_matching_nonterminals: | |
return | |
def enter(e, textwidget=self._textwidget, tag=tag): | |
textwidget.tag_config(tag, background="#80ff80") | |
def leave(e, textwidget=self._textwidget, tag=tag): | |
textwidget.tag_config(tag, background="") | |
self._textwidget.tag_bind(tag, "<Enter>", enter) | |
self._textwidget.tag_bind(tag, "<Leave>", leave) | |
def _analyze_line(self, linenum): | |
""" | |
Colorize a given line. | |
""" | |
# Get rid of any tags that were previously on the line. | |
self._clear_tags(linenum) | |
# Get the line line's text string. | |
line = self._textwidget.get(repr(linenum) + ".0", repr(linenum) + ".end") | |
# If it's a valid production, then colorize each token. | |
if CFGEditor._PRODUCTION_RE.match(line): | |
# It's valid; Use _TOKEN_RE to tokenize the production, | |
# and call analyze_token on each token. | |
def analyze_token(match, self=self, linenum=linenum): | |
self._analyze_token(match, linenum) | |
return "" | |
CFGEditor._TOKEN_RE.sub(analyze_token, line) | |
elif line.strip() != "": | |
# It's invalid; show the user where the error is. | |
self._mark_error(linenum, line) | |
def _mark_error(self, linenum, line): | |
""" | |
Mark the location of an error in a line. | |
""" | |
arrowmatch = CFGEditor._ARROW_RE.search(line) | |
if not arrowmatch: | |
# If there's no arrow at all, highlight the whole line. | |
start = "%d.0" % linenum | |
end = "%d.end" % linenum | |
elif not CFGEditor._LHS_RE.match(line): | |
# Otherwise, if the LHS is bad, highlight it. | |
start = "%d.0" % linenum | |
end = "%d.%d" % (linenum, arrowmatch.start()) | |
else: | |
# Otherwise, highlight the RHS. | |
start = "%d.%d" % (linenum, arrowmatch.end()) | |
end = "%d.end" % linenum | |
# If we're highlighting 0 chars, highlight the whole line. | |
if self._textwidget.compare(start, "==", end): | |
start = "%d.0" % linenum | |
end = "%d.end" % linenum | |
self._textwidget.tag_add("error", start, end) | |
def _analyze(self, *e): | |
""" | |
Replace ``->`` with arrows, and colorize the entire buffer. | |
""" | |
self._replace_arrows() | |
numlines = int(self._textwidget.index("end").split(".")[0]) | |
for linenum in range(1, numlines + 1): # line numbers start at 1. | |
self._analyze_line(linenum) | |
def _parse_productions(self): | |
""" | |
Parse the current contents of the textwidget buffer, to create | |
a list of productions. | |
""" | |
productions = [] | |
# Get the text, normalize it, and split it into lines. | |
text = self._textwidget.get("1.0", "end") | |
text = re.sub(self.ARROW, "->", text) | |
text = re.sub("\t", " ", text) | |
lines = text.split("\n") | |
# Convert each line to a CFG production | |
for line in lines: | |
line = line.strip() | |
if line == "": | |
continue | |
productions += _read_cfg_production(line) | |
# if line.strip() == '': continue | |
# if not CFGEditor._PRODUCTION_RE.match(line): | |
# raise ValueError('Bad production string %r' % line) | |
# | |
# (lhs_str, rhs_str) = line.split('->') | |
# lhs = Nonterminal(lhs_str.strip()) | |
# rhs = [] | |
# def parse_token(match, rhs=rhs): | |
# token = match.group() | |
# if token[0] in "'\"": rhs.append(token[1:-1]) | |
# else: rhs.append(Nonterminal(token)) | |
# return '' | |
# CFGEditor._TOKEN_RE.sub(parse_token, rhs_str) | |
# | |
# productions.append(Production(lhs, *rhs)) | |
return productions | |
def _destroy(self, *e): | |
if self._top is None: | |
return | |
self._top.destroy() | |
self._top = None | |
def _ok(self, *e): | |
self._apply() | |
self._destroy() | |
def _apply(self, *e): | |
productions = self._parse_productions() | |
start = Nonterminal(self._start.get()) | |
cfg = CFG(start, productions) | |
if self._set_cfg_callback is not None: | |
self._set_cfg_callback(cfg) | |
def _reset(self, *e): | |
self._textwidget.delete("1.0", "end") | |
for production in self._cfg.productions(): | |
self._textwidget.insert("end", "%s\n" % production) | |
self._analyze() | |
if self._set_cfg_callback is not None: | |
self._set_cfg_callback(self._cfg) | |
def _cancel(self, *e): | |
try: | |
self._reset() | |
except: | |
pass | |
self._destroy() | |
def _help(self, *e): | |
# The default font's not very legible; try using 'fixed' instead. | |
try: | |
ShowText( | |
self._parent, | |
"Help: Chart Parser Demo", | |
(_CFGEditor_HELP).strip(), | |
width=75, | |
font="fixed", | |
) | |
except: | |
ShowText( | |
self._parent, | |
"Help: Chart Parser Demo", | |
(_CFGEditor_HELP).strip(), | |
width=75, | |
) | |
###################################################################### | |
# New Demo (built tree based on cfg) | |
###################################################################### | |
class CFGDemo: | |
def __init__(self, grammar, text): | |
self._grammar = grammar | |
self._text = text | |
# Set up the main window. | |
self._top = Tk() | |
self._top.title("Context Free Grammar Demo") | |
# Base font size | |
self._size = IntVar(self._top) | |
self._size.set(12) # = medium | |
# Set up the key bindings | |
self._init_bindings(self._top) | |
# Create the basic frames | |
frame1 = Frame(self._top) | |
frame1.pack(side="left", fill="y", expand=0) | |
self._init_menubar(self._top) | |
self._init_buttons(self._top) | |
self._init_grammar(frame1) | |
self._init_treelet(frame1) | |
self._init_workspace(self._top) | |
# ////////////////////////////////////////////////// | |
# Initialization | |
# ////////////////////////////////////////////////// | |
def _init_bindings(self, top): | |
top.bind("<Control-q>", self.destroy) | |
def _init_menubar(self, parent): | |
pass | |
def _init_buttons(self, parent): | |
pass | |
def _init_grammar(self, parent): | |
self._prodlist = ProductionList(parent, self._grammar, width=20) | |
self._prodlist.pack(side="top", fill="both", expand=1) | |
self._prodlist.focus() | |
self._prodlist.add_callback("select", self._selectprod_cb) | |
self._prodlist.add_callback("move", self._selectprod_cb) | |
def _init_treelet(self, parent): | |
self._treelet_canvas = Canvas(parent, background="white") | |
self._treelet_canvas.pack(side="bottom", fill="x") | |
self._treelet = None | |
def _init_workspace(self, parent): | |
self._workspace = CanvasFrame(parent, background="white") | |
self._workspace.pack(side="right", fill="both", expand=1) | |
self._tree = None | |
self.reset_workspace() | |
# ////////////////////////////////////////////////// | |
# Workspace | |
# ////////////////////////////////////////////////// | |
def reset_workspace(self): | |
c = self._workspace.canvas() | |
fontsize = int(self._size.get()) | |
node_font = ("helvetica", -(fontsize + 4), "bold") | |
leaf_font = ("helvetica", -(fontsize + 2)) | |
# Remove the old tree | |
if self._tree is not None: | |
self._workspace.remove_widget(self._tree) | |
# The root of the tree. | |
start = self._grammar.start().symbol() | |
rootnode = TextWidget(c, start, font=node_font, draggable=1) | |
# The leaves of the tree. | |
leaves = [] | |
for word in self._text: | |
leaves.append(TextWidget(c, word, font=leaf_font, draggable=1)) | |
# Put it all together into one tree | |
self._tree = TreeSegmentWidget(c, rootnode, leaves, color="white") | |
# Add it to the workspace. | |
self._workspace.add_widget(self._tree) | |
# Move the leaves to the bottom of the workspace. | |
for leaf in leaves: | |
leaf.move(0, 100) | |
# self._nodes = {start:1} | |
# self._leaves = dict([(l,1) for l in leaves]) | |
def workspace_markprod(self, production): | |
pass | |
def _markproduction(self, prod, tree=None): | |
if tree is None: | |
tree = self._tree | |
for i in range(len(tree.subtrees()) - len(prod.rhs())): | |
if tree["color", i] == "white": | |
self._markproduction # FIXME: Is this necessary at all? | |
for j, node in enumerate(prod.rhs()): | |
widget = tree.subtrees()[i + j] | |
if ( | |
isinstance(node, Nonterminal) | |
and isinstance(widget, TreeSegmentWidget) | |
and node.symbol == widget.label().text() | |
): | |
pass # matching nonterminal | |
elif ( | |
isinstance(node, str) | |
and isinstance(widget, TextWidget) | |
and node == widget.text() | |
): | |
pass # matching nonterminal | |
else: | |
break | |
else: | |
# Everything matched! | |
print("MATCH AT", i) | |
# ////////////////////////////////////////////////// | |
# Grammar | |
# ////////////////////////////////////////////////// | |
def _selectprod_cb(self, production): | |
canvas = self._treelet_canvas | |
self._prodlist.highlight(production) | |
if self._treelet is not None: | |
self._treelet.destroy() | |
# Convert the production to a tree. | |
rhs = production.rhs() | |
for (i, elt) in enumerate(rhs): | |
if isinstance(elt, Nonterminal): | |
elt = Tree(elt) | |
tree = Tree(production.lhs().symbol(), *rhs) | |
# Draw the tree in the treelet area. | |
fontsize = int(self._size.get()) | |
node_font = ("helvetica", -(fontsize + 4), "bold") | |
leaf_font = ("helvetica", -(fontsize + 2)) | |
self._treelet = tree_to_treesegment( | |
canvas, tree, node_font=node_font, leaf_font=leaf_font | |
) | |
self._treelet["draggable"] = 1 | |
# Center the treelet. | |
(x1, y1, x2, y2) = self._treelet.bbox() | |
w, h = int(canvas["width"]), int(canvas["height"]) | |
self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2) | |
# Mark the places where we can add it to the workspace. | |
self._markproduction(production) | |
def destroy(self, *args): | |
self._top.destroy() | |
def mainloop(self, *args, **kwargs): | |
self._top.mainloop(*args, **kwargs) | |
def demo2(): | |
from nltk import CFG, Nonterminal, Production | |
nonterminals = "S VP NP PP P N Name V Det" | |
(S, VP, NP, PP, P, N, Name, V, Det) = (Nonterminal(s) for s in nonterminals.split()) | |
productions = ( | |
# Syntactic Productions | |
Production(S, [NP, VP]), | |
Production(NP, [Det, N]), | |
Production(NP, [NP, PP]), | |
Production(VP, [VP, PP]), | |
Production(VP, [V, NP, PP]), | |
Production(VP, [V, NP]), | |
Production(PP, [P, NP]), | |
Production(PP, []), | |
Production(PP, ["up", "over", NP]), | |
# Lexical Productions | |
Production(NP, ["I"]), | |
Production(Det, ["the"]), | |
Production(Det, ["a"]), | |
Production(N, ["man"]), | |
Production(V, ["saw"]), | |
Production(P, ["in"]), | |
Production(P, ["with"]), | |
Production(N, ["park"]), | |
Production(N, ["dog"]), | |
Production(N, ["statue"]), | |
Production(Det, ["my"]), | |
) | |
grammar = CFG(S, productions) | |
text = "I saw a man in the park".split() | |
d = CFGDemo(grammar, text) | |
d.mainloop() | |
###################################################################### | |
# Old Demo | |
###################################################################### | |
def demo(): | |
from nltk import CFG, Nonterminal | |
nonterminals = "S VP NP PP P N Name V Det" | |
(S, VP, NP, PP, P, N, Name, V, Det) = (Nonterminal(s) for s in nonterminals.split()) | |
grammar = CFG.fromstring( | |
""" | |
S -> NP VP | |
PP -> P NP | |
NP -> Det N | |
NP -> NP PP | |
VP -> V NP | |
VP -> VP PP | |
Det -> 'a' | |
Det -> 'the' | |
Det -> 'my' | |
NP -> 'I' | |
N -> 'dog' | |
N -> 'man' | |
N -> 'park' | |
N -> 'statue' | |
V -> 'saw' | |
P -> 'in' | |
P -> 'up' | |
P -> 'over' | |
P -> 'with' | |
""" | |
) | |
def cb(grammar): | |
print(grammar) | |
top = Tk() | |
editor = CFGEditor(top, grammar, cb) | |
Label(top, text="\nTesting CFG Editor\n").pack() | |
Button(top, text="Quit", command=top.destroy).pack() | |
top.mainloop() | |
def demo3(): | |
from nltk import Production | |
(S, VP, NP, PP, P, N, Name, V, Det) = nonterminals( | |
"S, VP, NP, PP, P, N, Name, V, Det" | |
) | |
productions = ( | |
# Syntactic Productions | |
Production(S, [NP, VP]), | |
Production(NP, [Det, N]), | |
Production(NP, [NP, PP]), | |
Production(VP, [VP, PP]), | |
Production(VP, [V, NP, PP]), | |
Production(VP, [V, NP]), | |
Production(PP, [P, NP]), | |
Production(PP, []), | |
Production(PP, ["up", "over", NP]), | |
# Lexical Productions | |
Production(NP, ["I"]), | |
Production(Det, ["the"]), | |
Production(Det, ["a"]), | |
Production(N, ["man"]), | |
Production(V, ["saw"]), | |
Production(P, ["in"]), | |
Production(P, ["with"]), | |
Production(N, ["park"]), | |
Production(N, ["dog"]), | |
Production(N, ["statue"]), | |
Production(Det, ["my"]), | |
) | |
t = Tk() | |
def destroy(e, t=t): | |
t.destroy() | |
t.bind("q", destroy) | |
p = ProductionList(t, productions) | |
p.pack(expand=1, fill="both") | |
p.add_callback("select", p.markonly) | |
p.add_callback("move", p.markonly) | |
p.focus() | |
p.mark(productions[2]) | |
p.mark(productions[8]) | |
if __name__ == "__main__": | |
demo() | |