Spaces:
Sleeping
Sleeping
sunnychenxiwang
commited on
Commit
•
d916065
1
Parent(s):
24c4def
update nltk
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- pipeline/nltk/VERSION +1 -0
- pipeline/nltk/__init__.py +209 -0
- pipeline/nltk/__pycache__/__init__.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/book.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/cli.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/collections.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/collocations.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/compat.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/data.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/decorators.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/downloader.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/featstruct.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/grammar.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/help.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/internals.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/jsontags.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/langnames.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/lazyimport.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/probability.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/text.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/tgrep.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/toolbox.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/treeprettyprinter.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/treetransforms.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/util.cpython-39.pyc +0 -0
- pipeline/nltk/__pycache__/wsd.cpython-39.pyc +0 -0
- pipeline/nltk/app/__init__.py +47 -0
- pipeline/nltk/app/__pycache__/__init__.cpython-39.pyc +0 -0
- pipeline/nltk/app/__pycache__/chartparser_app.cpython-39.pyc +0 -0
- pipeline/nltk/app/__pycache__/chunkparser_app.cpython-39.pyc +0 -0
- pipeline/nltk/app/__pycache__/collocations_app.cpython-39.pyc +0 -0
- pipeline/nltk/app/__pycache__/concordance_app.cpython-39.pyc +0 -0
- pipeline/nltk/app/__pycache__/nemo_app.cpython-39.pyc +0 -0
- pipeline/nltk/app/__pycache__/rdparser_app.cpython-39.pyc +0 -0
- pipeline/nltk/app/__pycache__/srparser_app.cpython-39.pyc +0 -0
- pipeline/nltk/app/__pycache__/wordfreq_app.cpython-39.pyc +0 -0
- pipeline/nltk/app/__pycache__/wordnet_app.cpython-39.pyc +0 -0
- pipeline/nltk/app/chartparser_app.py +2569 -0
- pipeline/nltk/app/chunkparser_app.py +1500 -0
- pipeline/nltk/app/collocations_app.py +438 -0
- pipeline/nltk/app/concordance_app.py +709 -0
- pipeline/nltk/app/nemo_app.py +163 -0
- pipeline/nltk/app/rdparser_app.py +1052 -0
- pipeline/nltk/app/srparser_app.py +937 -0
- pipeline/nltk/app/wordfreq_app.py +36 -0
- pipeline/nltk/app/wordnet_app.py +1005 -0
- pipeline/nltk/book.py +213 -0
- pipeline/nltk/ccg/__init__.py +34 -0
- pipeline/nltk/ccg/__pycache__/__init__.cpython-39.pyc +0 -0
- pipeline/nltk/ccg/__pycache__/api.cpython-39.pyc +0 -0
pipeline/nltk/VERSION
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.8.1
|
pipeline/nltk/__init__.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit (NLTK)
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Authors: Steven Bird <[email protected]>
|
5 |
+
# Edward Loper <[email protected]>
|
6 |
+
# URL: <https://www.nltk.org/>
|
7 |
+
# For license information, see LICENSE.TXT
|
8 |
+
|
9 |
+
"""
|
10 |
+
The Natural Language Toolkit (NLTK) is an open source Python library
|
11 |
+
for Natural Language Processing. A free online book is available.
|
12 |
+
(If you use the library for academic research, please cite the book.)
|
13 |
+
|
14 |
+
Steven Bird, Ewan Klein, and Edward Loper (2009).
|
15 |
+
Natural Language Processing with Python. O'Reilly Media Inc.
|
16 |
+
https://www.nltk.org/book/
|
17 |
+
|
18 |
+
isort:skip_file
|
19 |
+
"""
|
20 |
+
|
21 |
+
import os
|
22 |
+
|
23 |
+
# //////////////////////////////////////////////////////
|
24 |
+
# Metadata
|
25 |
+
# //////////////////////////////////////////////////////
|
26 |
+
|
27 |
+
# Version. For each new release, the version number should be updated
|
28 |
+
# in the file VERSION.
|
29 |
+
try:
|
30 |
+
# If a VERSION file exists, use it!
|
31 |
+
version_file = os.path.join(os.path.dirname(__file__), "VERSION")
|
32 |
+
with open(version_file) as infile:
|
33 |
+
__version__ = infile.read().strip()
|
34 |
+
except NameError:
|
35 |
+
__version__ = "unknown (running code interactively?)"
|
36 |
+
except OSError as ex:
|
37 |
+
__version__ = "unknown (%s)" % ex
|
38 |
+
|
39 |
+
if __doc__ is not None: # fix for the ``python -OO``
|
40 |
+
__doc__ += "\n@version: " + __version__
|
41 |
+
|
42 |
+
|
43 |
+
# Copyright notice
|
44 |
+
__copyright__ = """\
|
45 |
+
Copyright (C) 2001-2023 NLTK Project.
|
46 |
+
|
47 |
+
Distributed and Licensed under the Apache License, Version 2.0,
|
48 |
+
which is included by reference.
|
49 |
+
"""
|
50 |
+
|
51 |
+
__license__ = "Apache License, Version 2.0"
|
52 |
+
# Description of the toolkit, keywords, and the project's primary URL.
|
53 |
+
__longdescr__ = """\
|
54 |
+
The Natural Language Toolkit (NLTK) is a Python package for
|
55 |
+
natural language processing. NLTK requires Python 3.7, 3.8, 3.9, 3.10 or 3.11."""
|
56 |
+
__keywords__ = [
|
57 |
+
"NLP",
|
58 |
+
"CL",
|
59 |
+
"natural language processing",
|
60 |
+
"computational linguistics",
|
61 |
+
"parsing",
|
62 |
+
"tagging",
|
63 |
+
"tokenizing",
|
64 |
+
"syntax",
|
65 |
+
"linguistics",
|
66 |
+
"language",
|
67 |
+
"natural language",
|
68 |
+
"text analytics",
|
69 |
+
]
|
70 |
+
__url__ = "https://www.nltk.org/"
|
71 |
+
|
72 |
+
# Maintainer, contributors, etc.
|
73 |
+
__maintainer__ = "NLTK Team"
|
74 |
+
__maintainer_email__ = "[email protected]"
|
75 |
+
__author__ = __maintainer__
|
76 |
+
__author_email__ = __maintainer_email__
|
77 |
+
|
78 |
+
# "Trove" classifiers for Python Package Index.
|
79 |
+
__classifiers__ = [
|
80 |
+
"Development Status :: 5 - Production/Stable",
|
81 |
+
"Intended Audience :: Developers",
|
82 |
+
"Intended Audience :: Education",
|
83 |
+
"Intended Audience :: Information Technology",
|
84 |
+
"Intended Audience :: Science/Research",
|
85 |
+
"License :: OSI Approved :: Apache Software License",
|
86 |
+
"Operating System :: OS Independent",
|
87 |
+
"Programming Language :: Python :: 3.7",
|
88 |
+
"Programming Language :: Python :: 3.8",
|
89 |
+
"Programming Language :: Python :: 3.9",
|
90 |
+
"Programming Language :: Python :: 3.10",
|
91 |
+
"Programming Language :: Python :: 3.11",
|
92 |
+
"Topic :: Scientific/Engineering",
|
93 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
94 |
+
"Topic :: Scientific/Engineering :: Human Machine Interfaces",
|
95 |
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
96 |
+
"Topic :: Text Processing",
|
97 |
+
"Topic :: Text Processing :: Filters",
|
98 |
+
"Topic :: Text Processing :: General",
|
99 |
+
"Topic :: Text Processing :: Indexing",
|
100 |
+
"Topic :: Text Processing :: Linguistic",
|
101 |
+
]
|
102 |
+
|
103 |
+
from nltk.internals import config_java
|
104 |
+
|
105 |
+
# support numpy from pypy
|
106 |
+
try:
|
107 |
+
import numpypy
|
108 |
+
except ImportError:
|
109 |
+
pass
|
110 |
+
|
111 |
+
# Override missing methods on environments where it cannot be used like GAE.
|
112 |
+
import subprocess
|
113 |
+
|
114 |
+
if not hasattr(subprocess, "PIPE"):
|
115 |
+
|
116 |
+
def _fake_PIPE(*args, **kwargs):
|
117 |
+
raise NotImplementedError("subprocess.PIPE is not supported.")
|
118 |
+
|
119 |
+
subprocess.PIPE = _fake_PIPE
|
120 |
+
if not hasattr(subprocess, "Popen"):
|
121 |
+
|
122 |
+
def _fake_Popen(*args, **kwargs):
|
123 |
+
raise NotImplementedError("subprocess.Popen is not supported.")
|
124 |
+
|
125 |
+
subprocess.Popen = _fake_Popen
|
126 |
+
|
127 |
+
###########################################################
|
128 |
+
# TOP-LEVEL MODULES
|
129 |
+
###########################################################
|
130 |
+
|
131 |
+
# Import top-level functionality into top-level namespace
|
132 |
+
|
133 |
+
from nltk.collocations import *
|
134 |
+
from nltk.decorators import decorator, memoize
|
135 |
+
from nltk.featstruct import *
|
136 |
+
from nltk.grammar import *
|
137 |
+
from nltk.probability import *
|
138 |
+
from nltk.text import *
|
139 |
+
from nltk.util import *
|
140 |
+
from nltk.jsontags import *
|
141 |
+
|
142 |
+
###########################################################
|
143 |
+
# PACKAGES
|
144 |
+
###########################################################
|
145 |
+
|
146 |
+
from nltk.chunk import *
|
147 |
+
from nltk.classify import *
|
148 |
+
from nltk.inference import *
|
149 |
+
from nltk.metrics import *
|
150 |
+
from nltk.parse import *
|
151 |
+
from nltk.tag import *
|
152 |
+
from nltk.tokenize import *
|
153 |
+
from nltk.translate import *
|
154 |
+
from nltk.tree import *
|
155 |
+
from nltk.sem import *
|
156 |
+
from nltk.stem import *
|
157 |
+
|
158 |
+
# Packages which can be lazily imported
|
159 |
+
# (a) we don't import *
|
160 |
+
# (b) they're slow to import or have run-time dependencies
|
161 |
+
# that can safely fail at run time
|
162 |
+
|
163 |
+
from nltk import lazyimport
|
164 |
+
|
165 |
+
app = lazyimport.LazyModule("app", locals(), globals())
|
166 |
+
chat = lazyimport.LazyModule("chat", locals(), globals())
|
167 |
+
corpus = lazyimport.LazyModule("corpus", locals(), globals())
|
168 |
+
draw = lazyimport.LazyModule("draw", locals(), globals())
|
169 |
+
toolbox = lazyimport.LazyModule("toolbox", locals(), globals())
|
170 |
+
|
171 |
+
# Optional loading
|
172 |
+
|
173 |
+
try:
|
174 |
+
import numpy
|
175 |
+
except ImportError:
|
176 |
+
pass
|
177 |
+
else:
|
178 |
+
from nltk import cluster
|
179 |
+
|
180 |
+
from nltk.downloader import download, download_shell
|
181 |
+
|
182 |
+
try:
|
183 |
+
import tkinter
|
184 |
+
except ImportError:
|
185 |
+
pass
|
186 |
+
else:
|
187 |
+
try:
|
188 |
+
from nltk.downloader import download_gui
|
189 |
+
except RuntimeError as e:
|
190 |
+
import warnings
|
191 |
+
|
192 |
+
warnings.warn(
|
193 |
+
"Corpus downloader GUI not loaded "
|
194 |
+
"(RuntimeError during import: %s)" % str(e)
|
195 |
+
)
|
196 |
+
|
197 |
+
# explicitly import all top-level modules (ensuring
|
198 |
+
# they override the same names inadvertently imported
|
199 |
+
# from a subpackage)
|
200 |
+
|
201 |
+
from nltk import ccg, chunk, classify, collocations
|
202 |
+
from nltk import data, featstruct, grammar, help, inference, metrics
|
203 |
+
from nltk import misc, parse, probability, sem, stem, wsd
|
204 |
+
from nltk import tag, tbl, text, tokenize, translate, tree, util
|
205 |
+
|
206 |
+
|
207 |
+
# FIXME: override any accidentally imported demo, see https://github.com/nltk/nltk/issues/2116
|
208 |
+
def demo():
|
209 |
+
print("To run the demo code for a module, type nltk.module.demo()")
|
pipeline/nltk/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (4.84 kB). View file
|
|
pipeline/nltk/__pycache__/book.cpython-39.pyc
ADDED
Binary file (2.99 kB). View file
|
|
pipeline/nltk/__pycache__/cli.cpython-39.pyc
ADDED
Binary file (1.63 kB). View file
|
|
pipeline/nltk/__pycache__/collections.cpython-39.pyc
ADDED
Binary file (23.4 kB). View file
|
|
pipeline/nltk/__pycache__/collocations.cpython-39.pyc
ADDED
Binary file (14.9 kB). View file
|
|
pipeline/nltk/__pycache__/compat.cpython-39.pyc
ADDED
Binary file (1.13 kB). View file
|
|
pipeline/nltk/__pycache__/data.cpython-39.pyc
ADDED
Binary file (38.6 kB). View file
|
|
pipeline/nltk/__pycache__/decorators.cpython-39.pyc
ADDED
Binary file (6.43 kB). View file
|
|
pipeline/nltk/__pycache__/downloader.cpython-39.pyc
ADDED
Binary file (61.9 kB). View file
|
|
pipeline/nltk/__pycache__/featstruct.cpython-39.pyc
ADDED
Binary file (74.1 kB). View file
|
|
pipeline/nltk/__pycache__/grammar.cpython-39.pyc
ADDED
Binary file (53.7 kB). View file
|
|
pipeline/nltk/__pycache__/help.cpython-39.pyc
ADDED
Binary file (1.63 kB). View file
|
|
pipeline/nltk/__pycache__/internals.cpython-39.pyc
ADDED
Binary file (29 kB). View file
|
|
pipeline/nltk/__pycache__/jsontags.cpython-39.pyc
ADDED
Binary file (2.31 kB). View file
|
|
pipeline/nltk/__pycache__/langnames.cpython-39.pyc
ADDED
Binary file (15.3 kB). View file
|
|
pipeline/nltk/__pycache__/lazyimport.cpython-39.pyc
ADDED
Binary file (3.73 kB). View file
|
|
pipeline/nltk/__pycache__/probability.cpython-39.pyc
ADDED
Binary file (87.2 kB). View file
|
|
pipeline/nltk/__pycache__/text.cpython-39.pyc
ADDED
Binary file (28.4 kB). View file
|
|
pipeline/nltk/__pycache__/tgrep.cpython-39.pyc
ADDED
Binary file (35.3 kB). View file
|
|
pipeline/nltk/__pycache__/toolbox.cpython-39.pyc
ADDED
Binary file (15.8 kB). View file
|
|
pipeline/nltk/__pycache__/treeprettyprinter.cpython-39.pyc
ADDED
Binary file (952 Bytes). View file
|
|
pipeline/nltk/__pycache__/treetransforms.cpython-39.pyc
ADDED
Binary file (4.99 kB). View file
|
|
pipeline/nltk/__pycache__/util.cpython-39.pyc
ADDED
Binary file (32.5 kB). View file
|
|
pipeline/nltk/__pycache__/wsd.cpython-39.pyc
ADDED
Binary file (1.82 kB). View file
|
|
pipeline/nltk/app/__init__.py
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Applications package
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Edward Loper <[email protected]>
|
5 |
+
# Steven Bird <[email protected]>
|
6 |
+
# URL: <https://www.nltk.org/>
|
7 |
+
# For license information, see LICENSE.TXT
|
8 |
+
|
9 |
+
"""
|
10 |
+
Interactive NLTK Applications:
|
11 |
+
|
12 |
+
chartparser: Chart Parser
|
13 |
+
chunkparser: Regular-Expression Chunk Parser
|
14 |
+
collocations: Find collocations in text
|
15 |
+
concordance: Part-of-speech concordancer
|
16 |
+
nemo: Finding (and Replacing) Nemo regular expression tool
|
17 |
+
rdparser: Recursive Descent Parser
|
18 |
+
srparser: Shift-Reduce Parser
|
19 |
+
wordnet: WordNet Browser
|
20 |
+
"""
|
21 |
+
|
22 |
+
|
23 |
+
# Import Tkinter-based modules if Tkinter is installed
|
24 |
+
try:
|
25 |
+
import tkinter
|
26 |
+
except ImportError:
|
27 |
+
import warnings
|
28 |
+
|
29 |
+
warnings.warn("nltk.app package not loaded (please install Tkinter library).")
|
30 |
+
else:
|
31 |
+
from nltk.app.chartparser_app import app as chartparser
|
32 |
+
from nltk.app.chunkparser_app import app as chunkparser
|
33 |
+
from nltk.app.collocations_app import app as collocations
|
34 |
+
from nltk.app.concordance_app import app as concordance
|
35 |
+
from nltk.app.nemo_app import app as nemo
|
36 |
+
from nltk.app.rdparser_app import app as rdparser
|
37 |
+
from nltk.app.srparser_app import app as srparser
|
38 |
+
from nltk.app.wordnet_app import app as wordnet
|
39 |
+
|
40 |
+
try:
|
41 |
+
from matplotlib import pylab
|
42 |
+
except ImportError:
|
43 |
+
import warnings
|
44 |
+
|
45 |
+
warnings.warn("nltk.app.wordfreq not loaded (requires the matplotlib library).")
|
46 |
+
else:
|
47 |
+
from nltk.app.wordfreq_app import app as wordfreq
|
pipeline/nltk/app/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (1.31 kB). View file
|
|
pipeline/nltk/app/__pycache__/chartparser_app.cpython-39.pyc
ADDED
Binary file (63.2 kB). View file
|
|
pipeline/nltk/app/__pycache__/chunkparser_app.cpython-39.pyc
ADDED
Binary file (33.4 kB). View file
|
|
pipeline/nltk/app/__pycache__/collocations_app.cpython-39.pyc
ADDED
Binary file (14.8 kB). View file
|
|
pipeline/nltk/app/__pycache__/concordance_app.cpython-39.pyc
ADDED
Binary file (22.7 kB). View file
|
|
pipeline/nltk/app/__pycache__/nemo_app.cpython-39.pyc
ADDED
Binary file (12.3 kB). View file
|
|
pipeline/nltk/app/__pycache__/rdparser_app.cpython-39.pyc
ADDED
Binary file (26 kB). View file
|
|
pipeline/nltk/app/__pycache__/srparser_app.cpython-39.pyc
ADDED
Binary file (22 kB). View file
|
|
pipeline/nltk/app/__pycache__/wordfreq_app.cpython-39.pyc
ADDED
Binary file (1.46 kB). View file
|
|
pipeline/nltk/app/__pycache__/wordnet_app.cpython-39.pyc
ADDED
Binary file (31.1 kB). View file
|
|
pipeline/nltk/app/chartparser_app.py
ADDED
@@ -0,0 +1,2569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Chart Parser Application
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Edward Loper <[email protected]>
|
5 |
+
# Jean Mark Gawron <[email protected]>
|
6 |
+
# Steven Bird <[email protected]>
|
7 |
+
# URL: <https://www.nltk.org/>
|
8 |
+
# For license information, see LICENSE.TXT
|
9 |
+
|
10 |
+
"""
|
11 |
+
A graphical tool for exploring chart parsing.
|
12 |
+
|
13 |
+
Chart parsing is a flexible parsing algorithm that uses a data
|
14 |
+
structure called a "chart" to record hypotheses about syntactic
|
15 |
+
constituents. Each hypothesis is represented by a single "edge" on
|
16 |
+
the chart. A set of "chart rules" determine when new edges can be
|
17 |
+
added to the chart. This set of rules controls the overall behavior
|
18 |
+
of the parser (e.g. whether it parses top-down or bottom-up).
|
19 |
+
|
20 |
+
The chart parsing tool demonstrates the process of parsing a single
|
21 |
+
sentence, with a given grammar and lexicon. Its display is divided
|
22 |
+
into three sections: the bottom section displays the chart; the middle
|
23 |
+
section displays the sentence; and the top section displays the
|
24 |
+
partial syntax tree corresponding to the selected edge. Buttons along
|
25 |
+
the bottom of the window are used to control the execution of the
|
26 |
+
algorithm.
|
27 |
+
|
28 |
+
The chart parsing tool allows for flexible control of the parsing
|
29 |
+
algorithm. At each step of the algorithm, you can select which rule
|
30 |
+
or strategy you wish to apply. This allows you to experiment with
|
31 |
+
mixing different strategies (e.g. top-down and bottom-up). You can
|
32 |
+
exercise fine-grained control over the algorithm by selecting which
|
33 |
+
edge you wish to apply a rule to.
|
34 |
+
"""
|
35 |
+
|
36 |
+
# At some point, we should rewrite this tool to use the new canvas
|
37 |
+
# widget system.
|
38 |
+
|
39 |
+
|
40 |
+
import os.path
|
41 |
+
import pickle
|
42 |
+
from tkinter import (
|
43 |
+
Button,
|
44 |
+
Canvas,
|
45 |
+
Checkbutton,
|
46 |
+
Frame,
|
47 |
+
IntVar,
|
48 |
+
Label,
|
49 |
+
Menu,
|
50 |
+
Scrollbar,
|
51 |
+
Tk,
|
52 |
+
Toplevel,
|
53 |
+
)
|
54 |
+
from tkinter.filedialog import askopenfilename, asksaveasfilename
|
55 |
+
from tkinter.font import Font
|
56 |
+
from tkinter.messagebox import showerror, showinfo
|
57 |
+
|
58 |
+
from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
|
59 |
+
from nltk.draw.util import (
|
60 |
+
CanvasFrame,
|
61 |
+
ColorizedList,
|
62 |
+
EntryDialog,
|
63 |
+
MutableOptionMenu,
|
64 |
+
ShowText,
|
65 |
+
SymbolWidget,
|
66 |
+
)
|
67 |
+
from nltk.grammar import CFG, Nonterminal
|
68 |
+
from nltk.parse.chart import (
|
69 |
+
BottomUpPredictCombineRule,
|
70 |
+
BottomUpPredictRule,
|
71 |
+
Chart,
|
72 |
+
LeafEdge,
|
73 |
+
LeafInitRule,
|
74 |
+
SingleEdgeFundamentalRule,
|
75 |
+
SteppingChartParser,
|
76 |
+
TopDownInitRule,
|
77 |
+
TopDownPredictRule,
|
78 |
+
TreeEdge,
|
79 |
+
)
|
80 |
+
from nltk.tree import Tree
|
81 |
+
from nltk.util import in_idle
|
82 |
+
|
83 |
+
# Known bug: ChartView doesn't handle edges generated by epsilon
|
84 |
+
# productions (e.g., [Production: PP -> ]) very well.
|
85 |
+
|
86 |
+
#######################################################################
|
87 |
+
# Edge List
|
88 |
+
#######################################################################
|
89 |
+
|
90 |
+
|
91 |
+
class EdgeList(ColorizedList):
|
92 |
+
ARROW = SymbolWidget.SYMBOLS["rightarrow"]
|
93 |
+
|
94 |
+
def _init_colortags(self, textwidget, options):
|
95 |
+
textwidget.tag_config("terminal", foreground="#006000")
|
96 |
+
textwidget.tag_config("arrow", font="symbol", underline="0")
|
97 |
+
textwidget.tag_config("dot", foreground="#000000")
|
98 |
+
textwidget.tag_config(
|
99 |
+
"nonterminal", foreground="blue", font=("helvetica", -12, "bold")
|
100 |
+
)
|
101 |
+
|
102 |
+
def _item_repr(self, item):
|
103 |
+
contents = []
|
104 |
+
contents.append(("%s\t" % item.lhs(), "nonterminal"))
|
105 |
+
contents.append((self.ARROW, "arrow"))
|
106 |
+
for i, elt in enumerate(item.rhs()):
|
107 |
+
if i == item.dot():
|
108 |
+
contents.append((" *", "dot"))
|
109 |
+
if isinstance(elt, Nonterminal):
|
110 |
+
contents.append((" %s" % elt.symbol(), "nonterminal"))
|
111 |
+
else:
|
112 |
+
contents.append((" %r" % elt, "terminal"))
|
113 |
+
if item.is_complete():
|
114 |
+
contents.append((" *", "dot"))
|
115 |
+
return contents
|
116 |
+
|
117 |
+
|
118 |
+
#######################################################################
|
119 |
+
# Chart Matrix View
|
120 |
+
#######################################################################
|
121 |
+
|
122 |
+
|
123 |
+
class ChartMatrixView:
|
124 |
+
"""
|
125 |
+
A view of a chart that displays the contents of the corresponding matrix.
|
126 |
+
"""
|
127 |
+
|
128 |
+
def __init__(
|
129 |
+
self, parent, chart, toplevel=True, title="Chart Matrix", show_numedges=False
|
130 |
+
):
|
131 |
+
self._chart = chart
|
132 |
+
self._cells = []
|
133 |
+
self._marks = []
|
134 |
+
|
135 |
+
self._selected_cell = None
|
136 |
+
|
137 |
+
if toplevel:
|
138 |
+
self._root = Toplevel(parent)
|
139 |
+
self._root.title(title)
|
140 |
+
self._root.bind("<Control-q>", self.destroy)
|
141 |
+
self._init_quit(self._root)
|
142 |
+
else:
|
143 |
+
self._root = Frame(parent)
|
144 |
+
|
145 |
+
self._init_matrix(self._root)
|
146 |
+
self._init_list(self._root)
|
147 |
+
if show_numedges:
|
148 |
+
self._init_numedges(self._root)
|
149 |
+
else:
|
150 |
+
self._numedges_label = None
|
151 |
+
|
152 |
+
self._callbacks = {}
|
153 |
+
|
154 |
+
self._num_edges = 0
|
155 |
+
|
156 |
+
self.draw()
|
157 |
+
|
158 |
+
def _init_quit(self, root):
|
159 |
+
quit = Button(root, text="Quit", command=self.destroy)
|
160 |
+
quit.pack(side="bottom", expand=0, fill="none")
|
161 |
+
|
162 |
+
def _init_matrix(self, root):
|
163 |
+
cframe = Frame(root, border=2, relief="sunken")
|
164 |
+
cframe.pack(expand=0, fill="none", padx=1, pady=3, side="top")
|
165 |
+
self._canvas = Canvas(cframe, width=200, height=200, background="white")
|
166 |
+
self._canvas.pack(expand=0, fill="none")
|
167 |
+
|
168 |
+
def _init_numedges(self, root):
|
169 |
+
self._numedges_label = Label(root, text="0 edges")
|
170 |
+
self._numedges_label.pack(expand=0, fill="none", side="top")
|
171 |
+
|
172 |
+
def _init_list(self, root):
|
173 |
+
self._list = EdgeList(root, [], width=20, height=5)
|
174 |
+
self._list.pack(side="top", expand=1, fill="both", pady=3)
|
175 |
+
|
176 |
+
def cb(edge, self=self):
|
177 |
+
self._fire_callbacks("select", edge)
|
178 |
+
|
179 |
+
self._list.add_callback("select", cb)
|
180 |
+
self._list.focus()
|
181 |
+
|
182 |
+
def destroy(self, *e):
|
183 |
+
if self._root is None:
|
184 |
+
return
|
185 |
+
try:
|
186 |
+
self._root.destroy()
|
187 |
+
except:
|
188 |
+
pass
|
189 |
+
self._root = None
|
190 |
+
|
191 |
+
def set_chart(self, chart):
|
192 |
+
if chart is not self._chart:
|
193 |
+
self._chart = chart
|
194 |
+
self._num_edges = 0
|
195 |
+
self.draw()
|
196 |
+
|
197 |
+
def update(self):
|
198 |
+
if self._root is None:
|
199 |
+
return
|
200 |
+
|
201 |
+
# Count the edges in each cell
|
202 |
+
N = len(self._cells)
|
203 |
+
cell_edges = [[0 for i in range(N)] for j in range(N)]
|
204 |
+
for edge in self._chart:
|
205 |
+
cell_edges[edge.start()][edge.end()] += 1
|
206 |
+
|
207 |
+
# Color the cells correspondingly.
|
208 |
+
for i in range(N):
|
209 |
+
for j in range(i, N):
|
210 |
+
if cell_edges[i][j] == 0:
|
211 |
+
color = "gray20"
|
212 |
+
else:
|
213 |
+
color = "#00{:02x}{:02x}".format(
|
214 |
+
min(255, 50 + 128 * cell_edges[i][j] / 10),
|
215 |
+
max(0, 128 - 128 * cell_edges[i][j] / 10),
|
216 |
+
)
|
217 |
+
cell_tag = self._cells[i][j]
|
218 |
+
self._canvas.itemconfig(cell_tag, fill=color)
|
219 |
+
if (i, j) == self._selected_cell:
|
220 |
+
self._canvas.itemconfig(cell_tag, outline="#00ffff", width=3)
|
221 |
+
self._canvas.tag_raise(cell_tag)
|
222 |
+
else:
|
223 |
+
self._canvas.itemconfig(cell_tag, outline="black", width=1)
|
224 |
+
|
225 |
+
# Update the edge list.
|
226 |
+
edges = list(self._chart.select(span=self._selected_cell))
|
227 |
+
self._list.set(edges)
|
228 |
+
|
229 |
+
# Update our edge count.
|
230 |
+
self._num_edges = self._chart.num_edges()
|
231 |
+
if self._numedges_label is not None:
|
232 |
+
self._numedges_label["text"] = "%d edges" % self._num_edges
|
233 |
+
|
234 |
+
def activate(self):
|
235 |
+
self._canvas.itemconfig("inactivebox", state="hidden")
|
236 |
+
self.update()
|
237 |
+
|
238 |
+
def inactivate(self):
|
239 |
+
self._canvas.itemconfig("inactivebox", state="normal")
|
240 |
+
self.update()
|
241 |
+
|
242 |
+
def add_callback(self, event, func):
|
243 |
+
self._callbacks.setdefault(event, {})[func] = 1
|
244 |
+
|
245 |
+
def remove_callback(self, event, func=None):
|
246 |
+
if func is None:
|
247 |
+
del self._callbacks[event]
|
248 |
+
else:
|
249 |
+
try:
|
250 |
+
del self._callbacks[event][func]
|
251 |
+
except:
|
252 |
+
pass
|
253 |
+
|
254 |
+
def _fire_callbacks(self, event, *args):
|
255 |
+
if event not in self._callbacks:
|
256 |
+
return
|
257 |
+
for cb_func in list(self._callbacks[event].keys()):
|
258 |
+
cb_func(*args)
|
259 |
+
|
260 |
+
def select_cell(self, i, j):
|
261 |
+
if self._root is None:
|
262 |
+
return
|
263 |
+
|
264 |
+
# If the cell is already selected (and the chart contents
|
265 |
+
# haven't changed), then do nothing.
|
266 |
+
if (i, j) == self._selected_cell and self._chart.num_edges() == self._num_edges:
|
267 |
+
return
|
268 |
+
|
269 |
+
self._selected_cell = (i, j)
|
270 |
+
self.update()
|
271 |
+
|
272 |
+
# Fire the callback.
|
273 |
+
self._fire_callbacks("select_cell", i, j)
|
274 |
+
|
275 |
+
def deselect_cell(self):
|
276 |
+
if self._root is None:
|
277 |
+
return
|
278 |
+
self._selected_cell = None
|
279 |
+
self._list.set([])
|
280 |
+
self.update()
|
281 |
+
|
282 |
+
def _click_cell(self, i, j):
|
283 |
+
if self._selected_cell == (i, j):
|
284 |
+
self.deselect_cell()
|
285 |
+
else:
|
286 |
+
self.select_cell(i, j)
|
287 |
+
|
288 |
+
def view_edge(self, edge):
|
289 |
+
self.select_cell(*edge.span())
|
290 |
+
self._list.view(edge)
|
291 |
+
|
292 |
+
def mark_edge(self, edge):
|
293 |
+
if self._root is None:
|
294 |
+
return
|
295 |
+
self.select_cell(*edge.span())
|
296 |
+
self._list.mark(edge)
|
297 |
+
|
298 |
+
def unmark_edge(self, edge=None):
|
299 |
+
if self._root is None:
|
300 |
+
return
|
301 |
+
self._list.unmark(edge)
|
302 |
+
|
303 |
+
def markonly_edge(self, edge):
|
304 |
+
if self._root is None:
|
305 |
+
return
|
306 |
+
self.select_cell(*edge.span())
|
307 |
+
self._list.markonly(edge)
|
308 |
+
|
309 |
+
def draw(self):
|
310 |
+
if self._root is None:
|
311 |
+
return
|
312 |
+
LEFT_MARGIN = BOT_MARGIN = 15
|
313 |
+
TOP_MARGIN = 5
|
314 |
+
c = self._canvas
|
315 |
+
c.delete("all")
|
316 |
+
N = self._chart.num_leaves() + 1
|
317 |
+
dx = (int(c["width"]) - LEFT_MARGIN) / N
|
318 |
+
dy = (int(c["height"]) - TOP_MARGIN - BOT_MARGIN) / N
|
319 |
+
|
320 |
+
c.delete("all")
|
321 |
+
|
322 |
+
# Labels and dotted lines
|
323 |
+
for i in range(N):
|
324 |
+
c.create_text(
|
325 |
+
LEFT_MARGIN - 2, i * dy + dy / 2 + TOP_MARGIN, text=repr(i), anchor="e"
|
326 |
+
)
|
327 |
+
c.create_text(
|
328 |
+
i * dx + dx / 2 + LEFT_MARGIN,
|
329 |
+
N * dy + TOP_MARGIN + 1,
|
330 |
+
text=repr(i),
|
331 |
+
anchor="n",
|
332 |
+
)
|
333 |
+
c.create_line(
|
334 |
+
LEFT_MARGIN,
|
335 |
+
dy * (i + 1) + TOP_MARGIN,
|
336 |
+
dx * N + LEFT_MARGIN,
|
337 |
+
dy * (i + 1) + TOP_MARGIN,
|
338 |
+
dash=".",
|
339 |
+
)
|
340 |
+
c.create_line(
|
341 |
+
dx * i + LEFT_MARGIN,
|
342 |
+
TOP_MARGIN,
|
343 |
+
dx * i + LEFT_MARGIN,
|
344 |
+
dy * N + TOP_MARGIN,
|
345 |
+
dash=".",
|
346 |
+
)
|
347 |
+
|
348 |
+
# A box around the whole thing
|
349 |
+
c.create_rectangle(
|
350 |
+
LEFT_MARGIN, TOP_MARGIN, LEFT_MARGIN + dx * N, dy * N + TOP_MARGIN, width=2
|
351 |
+
)
|
352 |
+
|
353 |
+
# Cells
|
354 |
+
self._cells = [[None for i in range(N)] for j in range(N)]
|
355 |
+
for i in range(N):
|
356 |
+
for j in range(i, N):
|
357 |
+
t = c.create_rectangle(
|
358 |
+
j * dx + LEFT_MARGIN,
|
359 |
+
i * dy + TOP_MARGIN,
|
360 |
+
(j + 1) * dx + LEFT_MARGIN,
|
361 |
+
(i + 1) * dy + TOP_MARGIN,
|
362 |
+
fill="gray20",
|
363 |
+
)
|
364 |
+
self._cells[i][j] = t
|
365 |
+
|
366 |
+
def cb(event, self=self, i=i, j=j):
|
367 |
+
self._click_cell(i, j)
|
368 |
+
|
369 |
+
c.tag_bind(t, "<Button-1>", cb)
|
370 |
+
|
371 |
+
# Inactive box
|
372 |
+
xmax, ymax = int(c["width"]), int(c["height"])
|
373 |
+
t = c.create_rectangle(
|
374 |
+
-100,
|
375 |
+
-100,
|
376 |
+
xmax + 100,
|
377 |
+
ymax + 100,
|
378 |
+
fill="gray50",
|
379 |
+
state="hidden",
|
380 |
+
tag="inactivebox",
|
381 |
+
)
|
382 |
+
c.tag_lower(t)
|
383 |
+
|
384 |
+
# Update the cells.
|
385 |
+
self.update()
|
386 |
+
|
387 |
+
def pack(self, *args, **kwargs):
|
388 |
+
self._root.pack(*args, **kwargs)
|
389 |
+
|
390 |
+
|
391 |
+
#######################################################################
|
392 |
+
# Chart Results View
|
393 |
+
#######################################################################
|
394 |
+
|
395 |
+
|
396 |
+
class ChartResultsView:
|
397 |
+
def __init__(self, parent, chart, grammar, toplevel=True):
|
398 |
+
self._chart = chart
|
399 |
+
self._grammar = grammar
|
400 |
+
self._trees = []
|
401 |
+
self._y = 10
|
402 |
+
self._treewidgets = []
|
403 |
+
self._selection = None
|
404 |
+
self._selectbox = None
|
405 |
+
|
406 |
+
if toplevel:
|
407 |
+
self._root = Toplevel(parent)
|
408 |
+
self._root.title("Chart Parser Application: Results")
|
409 |
+
self._root.bind("<Control-q>", self.destroy)
|
410 |
+
else:
|
411 |
+
self._root = Frame(parent)
|
412 |
+
|
413 |
+
# Buttons
|
414 |
+
if toplevel:
|
415 |
+
buttons = Frame(self._root)
|
416 |
+
buttons.pack(side="bottom", expand=0, fill="x")
|
417 |
+
Button(buttons, text="Quit", command=self.destroy).pack(side="right")
|
418 |
+
Button(buttons, text="Print All", command=self.print_all).pack(side="left")
|
419 |
+
Button(buttons, text="Print Selection", command=self.print_selection).pack(
|
420 |
+
side="left"
|
421 |
+
)
|
422 |
+
|
423 |
+
# Canvas frame.
|
424 |
+
self._cframe = CanvasFrame(self._root, closeenough=20)
|
425 |
+
self._cframe.pack(side="top", expand=1, fill="both")
|
426 |
+
|
427 |
+
# Initial update
|
428 |
+
self.update()
|
429 |
+
|
430 |
+
def update(self, edge=None):
|
431 |
+
if self._root is None:
|
432 |
+
return
|
433 |
+
# If the edge isn't a parse edge, do nothing.
|
434 |
+
if edge is not None:
|
435 |
+
if edge.lhs() != self._grammar.start():
|
436 |
+
return
|
437 |
+
if edge.span() != (0, self._chart.num_leaves()):
|
438 |
+
return
|
439 |
+
|
440 |
+
for parse in self._chart.parses(self._grammar.start()):
|
441 |
+
if parse not in self._trees:
|
442 |
+
self._add(parse)
|
443 |
+
|
444 |
+
def _add(self, parse):
|
445 |
+
# Add it to self._trees.
|
446 |
+
self._trees.append(parse)
|
447 |
+
|
448 |
+
# Create a widget for it.
|
449 |
+
c = self._cframe.canvas()
|
450 |
+
treewidget = tree_to_treesegment(c, parse)
|
451 |
+
|
452 |
+
# Add it to the canvas frame.
|
453 |
+
self._treewidgets.append(treewidget)
|
454 |
+
self._cframe.add_widget(treewidget, 10, self._y)
|
455 |
+
|
456 |
+
# Register callbacks.
|
457 |
+
treewidget.bind_click(self._click)
|
458 |
+
|
459 |
+
# Update y.
|
460 |
+
self._y = treewidget.bbox()[3] + 10
|
461 |
+
|
462 |
+
def _click(self, widget):
|
463 |
+
c = self._cframe.canvas()
|
464 |
+
if self._selection is not None:
|
465 |
+
c.delete(self._selectbox)
|
466 |
+
self._selection = widget
|
467 |
+
(x1, y1, x2, y2) = widget.bbox()
|
468 |
+
self._selectbox = c.create_rectangle(x1, y1, x2, y2, width=2, outline="#088")
|
469 |
+
|
470 |
+
def _color(self, treewidget, color):
|
471 |
+
treewidget.label()["color"] = color
|
472 |
+
for child in treewidget.subtrees():
|
473 |
+
if isinstance(child, TreeSegmentWidget):
|
474 |
+
self._color(child, color)
|
475 |
+
else:
|
476 |
+
child["color"] = color
|
477 |
+
|
478 |
+
def print_all(self, *e):
|
479 |
+
if self._root is None:
|
480 |
+
return
|
481 |
+
self._cframe.print_to_file()
|
482 |
+
|
483 |
+
def print_selection(self, *e):
|
484 |
+
if self._root is None:
|
485 |
+
return
|
486 |
+
if self._selection is None:
|
487 |
+
showerror("Print Error", "No tree selected")
|
488 |
+
else:
|
489 |
+
c = self._cframe.canvas()
|
490 |
+
for widget in self._treewidgets:
|
491 |
+
if widget is not self._selection:
|
492 |
+
self._cframe.destroy_widget(widget)
|
493 |
+
c.delete(self._selectbox)
|
494 |
+
(x1, y1, x2, y2) = self._selection.bbox()
|
495 |
+
self._selection.move(10 - x1, 10 - y1)
|
496 |
+
c["scrollregion"] = f"0 0 {x2 - x1 + 20} {y2 - y1 + 20}"
|
497 |
+
self._cframe.print_to_file()
|
498 |
+
|
499 |
+
# Restore our state.
|
500 |
+
self._treewidgets = [self._selection]
|
501 |
+
self.clear()
|
502 |
+
self.update()
|
503 |
+
|
504 |
+
def clear(self):
|
505 |
+
if self._root is None:
|
506 |
+
return
|
507 |
+
for treewidget in self._treewidgets:
|
508 |
+
self._cframe.destroy_widget(treewidget)
|
509 |
+
self._trees = []
|
510 |
+
self._treewidgets = []
|
511 |
+
if self._selection is not None:
|
512 |
+
self._cframe.canvas().delete(self._selectbox)
|
513 |
+
self._selection = None
|
514 |
+
self._y = 10
|
515 |
+
|
516 |
+
def set_chart(self, chart):
|
517 |
+
self.clear()
|
518 |
+
self._chart = chart
|
519 |
+
self.update()
|
520 |
+
|
521 |
+
def set_grammar(self, grammar):
|
522 |
+
self.clear()
|
523 |
+
self._grammar = grammar
|
524 |
+
self.update()
|
525 |
+
|
526 |
+
def destroy(self, *e):
|
527 |
+
if self._root is None:
|
528 |
+
return
|
529 |
+
try:
|
530 |
+
self._root.destroy()
|
531 |
+
except:
|
532 |
+
pass
|
533 |
+
self._root = None
|
534 |
+
|
535 |
+
def pack(self, *args, **kwargs):
|
536 |
+
self._root.pack(*args, **kwargs)
|
537 |
+
|
538 |
+
|
539 |
+
#######################################################################
|
540 |
+
# Chart Comparer
|
541 |
+
#######################################################################
|
542 |
+
|
543 |
+
|
544 |
+
class ChartComparer:
|
545 |
+
"""
|
546 |
+
|
547 |
+
:ivar _root: The root window
|
548 |
+
|
549 |
+
:ivar _charts: A dictionary mapping names to charts. When
|
550 |
+
charts are loaded, they are added to this dictionary.
|
551 |
+
|
552 |
+
:ivar _left_chart: The left ``Chart``.
|
553 |
+
:ivar _left_name: The name ``_left_chart`` (derived from filename)
|
554 |
+
:ivar _left_matrix: The ``ChartMatrixView`` for ``_left_chart``
|
555 |
+
:ivar _left_selector: The drop-down ``MutableOptionsMenu`` used
|
556 |
+
to select ``_left_chart``.
|
557 |
+
|
558 |
+
:ivar _right_chart: The right ``Chart``.
|
559 |
+
:ivar _right_name: The name ``_right_chart`` (derived from filename)
|
560 |
+
:ivar _right_matrix: The ``ChartMatrixView`` for ``_right_chart``
|
561 |
+
:ivar _right_selector: The drop-down ``MutableOptionsMenu`` used
|
562 |
+
to select ``_right_chart``.
|
563 |
+
|
564 |
+
:ivar _out_chart: The out ``Chart``.
|
565 |
+
:ivar _out_name: The name ``_out_chart`` (derived from filename)
|
566 |
+
:ivar _out_matrix: The ``ChartMatrixView`` for ``_out_chart``
|
567 |
+
:ivar _out_label: The label for ``_out_chart``.
|
568 |
+
|
569 |
+
:ivar _op_label: A Label containing the most recent operation.
|
570 |
+
"""
|
571 |
+
|
572 |
+
_OPSYMBOL = {
|
573 |
+
"-": "-",
|
574 |
+
"and": SymbolWidget.SYMBOLS["intersection"],
|
575 |
+
"or": SymbolWidget.SYMBOLS["union"],
|
576 |
+
}
|
577 |
+
|
578 |
+
def __init__(self, *chart_filenames):
|
579 |
+
# This chart is displayed when we don't have a value (eg
|
580 |
+
# before any chart is loaded).
|
581 |
+
faketok = [""] * 8
|
582 |
+
self._emptychart = Chart(faketok)
|
583 |
+
|
584 |
+
# The left & right charts start out empty.
|
585 |
+
self._left_name = "None"
|
586 |
+
self._right_name = "None"
|
587 |
+
self._left_chart = self._emptychart
|
588 |
+
self._right_chart = self._emptychart
|
589 |
+
|
590 |
+
# The charts that have been loaded.
|
591 |
+
self._charts = {"None": self._emptychart}
|
592 |
+
|
593 |
+
# The output chart.
|
594 |
+
self._out_chart = self._emptychart
|
595 |
+
|
596 |
+
# The most recent operation
|
597 |
+
self._operator = None
|
598 |
+
|
599 |
+
# Set up the root window.
|
600 |
+
self._root = Tk()
|
601 |
+
self._root.title("Chart Comparison")
|
602 |
+
self._root.bind("<Control-q>", self.destroy)
|
603 |
+
self._root.bind("<Control-x>", self.destroy)
|
604 |
+
|
605 |
+
# Initialize all widgets, etc.
|
606 |
+
self._init_menubar(self._root)
|
607 |
+
self._init_chartviews(self._root)
|
608 |
+
self._init_divider(self._root)
|
609 |
+
self._init_buttons(self._root)
|
610 |
+
self._init_bindings(self._root)
|
611 |
+
|
612 |
+
# Load any specified charts.
|
613 |
+
for filename in chart_filenames:
|
614 |
+
self.load_chart(filename)
|
615 |
+
|
616 |
+
def destroy(self, *e):
|
617 |
+
if self._root is None:
|
618 |
+
return
|
619 |
+
try:
|
620 |
+
self._root.destroy()
|
621 |
+
except:
|
622 |
+
pass
|
623 |
+
self._root = None
|
624 |
+
|
625 |
+
def mainloop(self, *args, **kwargs):
|
626 |
+
return
|
627 |
+
self._root.mainloop(*args, **kwargs)
|
628 |
+
|
629 |
+
# ////////////////////////////////////////////////////////////
|
630 |
+
# Initialization
|
631 |
+
# ////////////////////////////////////////////////////////////
|
632 |
+
|
633 |
+
def _init_menubar(self, root):
|
634 |
+
menubar = Menu(root)
|
635 |
+
|
636 |
+
# File menu
|
637 |
+
filemenu = Menu(menubar, tearoff=0)
|
638 |
+
filemenu.add_command(
|
639 |
+
label="Load Chart",
|
640 |
+
accelerator="Ctrl-o",
|
641 |
+
underline=0,
|
642 |
+
command=self.load_chart_dialog,
|
643 |
+
)
|
644 |
+
filemenu.add_command(
|
645 |
+
label="Save Output",
|
646 |
+
accelerator="Ctrl-s",
|
647 |
+
underline=0,
|
648 |
+
command=self.save_chart_dialog,
|
649 |
+
)
|
650 |
+
filemenu.add_separator()
|
651 |
+
filemenu.add_command(
|
652 |
+
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
|
653 |
+
)
|
654 |
+
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
655 |
+
|
656 |
+
# Compare menu
|
657 |
+
opmenu = Menu(menubar, tearoff=0)
|
658 |
+
opmenu.add_command(
|
659 |
+
label="Intersection", command=self._intersection, accelerator="+"
|
660 |
+
)
|
661 |
+
opmenu.add_command(label="Union", command=self._union, accelerator="*")
|
662 |
+
opmenu.add_command(
|
663 |
+
label="Difference", command=self._difference, accelerator="-"
|
664 |
+
)
|
665 |
+
opmenu.add_separator()
|
666 |
+
opmenu.add_command(label="Swap Charts", command=self._swapcharts)
|
667 |
+
menubar.add_cascade(label="Compare", underline=0, menu=opmenu)
|
668 |
+
|
669 |
+
# Add the menu
|
670 |
+
self._root.config(menu=menubar)
|
671 |
+
|
672 |
+
def _init_divider(self, root):
|
673 |
+
divider = Frame(root, border=2, relief="sunken")
|
674 |
+
divider.pack(side="top", fill="x", ipady=2)
|
675 |
+
|
676 |
+
def _init_chartviews(self, root):
|
677 |
+
opfont = ("symbol", -36) # Font for operator.
|
678 |
+
eqfont = ("helvetica", -36) # Font for equals sign.
|
679 |
+
|
680 |
+
frame = Frame(root, background="#c0c0c0")
|
681 |
+
frame.pack(side="top", expand=1, fill="both")
|
682 |
+
|
683 |
+
# The left matrix.
|
684 |
+
cv1_frame = Frame(frame, border=3, relief="groove")
|
685 |
+
cv1_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both")
|
686 |
+
self._left_selector = MutableOptionMenu(
|
687 |
+
cv1_frame, list(self._charts.keys()), command=self._select_left
|
688 |
+
)
|
689 |
+
self._left_selector.pack(side="top", pady=5, fill="x")
|
690 |
+
self._left_matrix = ChartMatrixView(
|
691 |
+
cv1_frame, self._emptychart, toplevel=False, show_numedges=True
|
692 |
+
)
|
693 |
+
self._left_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both")
|
694 |
+
self._left_matrix.add_callback("select", self.select_edge)
|
695 |
+
self._left_matrix.add_callback("select_cell", self.select_cell)
|
696 |
+
self._left_matrix.inactivate()
|
697 |
+
|
698 |
+
# The operator.
|
699 |
+
self._op_label = Label(
|
700 |
+
frame, text=" ", width=3, background="#c0c0c0", font=opfont
|
701 |
+
)
|
702 |
+
self._op_label.pack(side="left", padx=5, pady=5)
|
703 |
+
|
704 |
+
# The right matrix.
|
705 |
+
cv2_frame = Frame(frame, border=3, relief="groove")
|
706 |
+
cv2_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both")
|
707 |
+
self._right_selector = MutableOptionMenu(
|
708 |
+
cv2_frame, list(self._charts.keys()), command=self._select_right
|
709 |
+
)
|
710 |
+
self._right_selector.pack(side="top", pady=5, fill="x")
|
711 |
+
self._right_matrix = ChartMatrixView(
|
712 |
+
cv2_frame, self._emptychart, toplevel=False, show_numedges=True
|
713 |
+
)
|
714 |
+
self._right_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both")
|
715 |
+
self._right_matrix.add_callback("select", self.select_edge)
|
716 |
+
self._right_matrix.add_callback("select_cell", self.select_cell)
|
717 |
+
self._right_matrix.inactivate()
|
718 |
+
|
719 |
+
# The equals sign
|
720 |
+
Label(frame, text="=", width=3, background="#c0c0c0", font=eqfont).pack(
|
721 |
+
side="left", padx=5, pady=5
|
722 |
+
)
|
723 |
+
|
724 |
+
# The output matrix.
|
725 |
+
out_frame = Frame(frame, border=3, relief="groove")
|
726 |
+
out_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both")
|
727 |
+
self._out_label = Label(out_frame, text="Output")
|
728 |
+
self._out_label.pack(side="top", pady=9)
|
729 |
+
self._out_matrix = ChartMatrixView(
|
730 |
+
out_frame, self._emptychart, toplevel=False, show_numedges=True
|
731 |
+
)
|
732 |
+
self._out_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both")
|
733 |
+
self._out_matrix.add_callback("select", self.select_edge)
|
734 |
+
self._out_matrix.add_callback("select_cell", self.select_cell)
|
735 |
+
self._out_matrix.inactivate()
|
736 |
+
|
737 |
+
def _init_buttons(self, root):
|
738 |
+
buttons = Frame(root)
|
739 |
+
buttons.pack(side="bottom", pady=5, fill="x", expand=0)
|
740 |
+
Button(buttons, text="Intersection", command=self._intersection).pack(
|
741 |
+
side="left"
|
742 |
+
)
|
743 |
+
Button(buttons, text="Union", command=self._union).pack(side="left")
|
744 |
+
Button(buttons, text="Difference", command=self._difference).pack(side="left")
|
745 |
+
Frame(buttons, width=20).pack(side="left")
|
746 |
+
Button(buttons, text="Swap Charts", command=self._swapcharts).pack(side="left")
|
747 |
+
|
748 |
+
Button(buttons, text="Detach Output", command=self._detach_out).pack(
|
749 |
+
side="right"
|
750 |
+
)
|
751 |
+
|
752 |
+
def _init_bindings(self, root):
|
753 |
+
# root.bind('<Control-s>', self.save_chart)
|
754 |
+
root.bind("<Control-o>", self.load_chart_dialog)
|
755 |
+
# root.bind('<Control-r>', self.reset)
|
756 |
+
|
757 |
+
# ////////////////////////////////////////////////////////////
|
758 |
+
# Input Handling
|
759 |
+
# ////////////////////////////////////////////////////////////
|
760 |
+
|
761 |
+
def _select_left(self, name):
|
762 |
+
self._left_name = name
|
763 |
+
self._left_chart = self._charts[name]
|
764 |
+
self._left_matrix.set_chart(self._left_chart)
|
765 |
+
if name == "None":
|
766 |
+
self._left_matrix.inactivate()
|
767 |
+
self._apply_op()
|
768 |
+
|
769 |
+
def _select_right(self, name):
|
770 |
+
self._right_name = name
|
771 |
+
self._right_chart = self._charts[name]
|
772 |
+
self._right_matrix.set_chart(self._right_chart)
|
773 |
+
if name == "None":
|
774 |
+
self._right_matrix.inactivate()
|
775 |
+
self._apply_op()
|
776 |
+
|
777 |
+
def _apply_op(self):
|
778 |
+
if self._operator == "-":
|
779 |
+
self._difference()
|
780 |
+
elif self._operator == "or":
|
781 |
+
self._union()
|
782 |
+
elif self._operator == "and":
|
783 |
+
self._intersection()
|
784 |
+
|
785 |
+
# ////////////////////////////////////////////////////////////
|
786 |
+
# File
|
787 |
+
# ////////////////////////////////////////////////////////////
|
788 |
+
CHART_FILE_TYPES = [("Pickle file", ".pickle"), ("All files", "*")]
|
789 |
+
|
790 |
+
def save_chart_dialog(self, *args):
|
791 |
+
filename = asksaveasfilename(
|
792 |
+
filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
|
793 |
+
)
|
794 |
+
if not filename:
|
795 |
+
return
|
796 |
+
try:
|
797 |
+
with open(filename, "wb") as outfile:
|
798 |
+
pickle.dump(self._out_chart, outfile)
|
799 |
+
except Exception as e:
|
800 |
+
showerror("Error Saving Chart", f"Unable to open file: {filename!r}\n{e}")
|
801 |
+
|
802 |
+
def load_chart_dialog(self, *args):
|
803 |
+
filename = askopenfilename(
|
804 |
+
filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
|
805 |
+
)
|
806 |
+
if not filename:
|
807 |
+
return
|
808 |
+
try:
|
809 |
+
self.load_chart(filename)
|
810 |
+
except Exception as e:
|
811 |
+
showerror("Error Loading Chart", f"Unable to open file: {filename!r}\n{e}")
|
812 |
+
|
813 |
+
def load_chart(self, filename):
|
814 |
+
with open(filename, "rb") as infile:
|
815 |
+
chart = pickle.load(infile)
|
816 |
+
name = os.path.basename(filename)
|
817 |
+
if name.endswith(".pickle"):
|
818 |
+
name = name[:-7]
|
819 |
+
if name.endswith(".chart"):
|
820 |
+
name = name[:-6]
|
821 |
+
self._charts[name] = chart
|
822 |
+
self._left_selector.add(name)
|
823 |
+
self._right_selector.add(name)
|
824 |
+
|
825 |
+
# If either left_matrix or right_matrix is empty, then
|
826 |
+
# display the new chart.
|
827 |
+
if self._left_chart is self._emptychart:
|
828 |
+
self._left_selector.set(name)
|
829 |
+
elif self._right_chart is self._emptychart:
|
830 |
+
self._right_selector.set(name)
|
831 |
+
|
832 |
+
def _update_chartviews(self):
|
833 |
+
self._left_matrix.update()
|
834 |
+
self._right_matrix.update()
|
835 |
+
self._out_matrix.update()
|
836 |
+
|
837 |
+
# ////////////////////////////////////////////////////////////
|
838 |
+
# Selection
|
839 |
+
# ////////////////////////////////////////////////////////////
|
840 |
+
|
841 |
+
def select_edge(self, edge):
|
842 |
+
if edge in self._left_chart:
|
843 |
+
self._left_matrix.markonly_edge(edge)
|
844 |
+
else:
|
845 |
+
self._left_matrix.unmark_edge()
|
846 |
+
if edge in self._right_chart:
|
847 |
+
self._right_matrix.markonly_edge(edge)
|
848 |
+
else:
|
849 |
+
self._right_matrix.unmark_edge()
|
850 |
+
if edge in self._out_chart:
|
851 |
+
self._out_matrix.markonly_edge(edge)
|
852 |
+
else:
|
853 |
+
self._out_matrix.unmark_edge()
|
854 |
+
|
855 |
+
def select_cell(self, i, j):
|
856 |
+
self._left_matrix.select_cell(i, j)
|
857 |
+
self._right_matrix.select_cell(i, j)
|
858 |
+
self._out_matrix.select_cell(i, j)
|
859 |
+
|
860 |
+
# ////////////////////////////////////////////////////////////
|
861 |
+
# Operations
|
862 |
+
# ////////////////////////////////////////////////////////////
|
863 |
+
|
864 |
+
def _difference(self):
|
865 |
+
if not self._checkcompat():
|
866 |
+
return
|
867 |
+
|
868 |
+
out_chart = Chart(self._left_chart.tokens())
|
869 |
+
for edge in self._left_chart:
|
870 |
+
if edge not in self._right_chart:
|
871 |
+
out_chart.insert(edge, [])
|
872 |
+
|
873 |
+
self._update("-", out_chart)
|
874 |
+
|
875 |
+
def _intersection(self):
|
876 |
+
if not self._checkcompat():
|
877 |
+
return
|
878 |
+
|
879 |
+
out_chart = Chart(self._left_chart.tokens())
|
880 |
+
for edge in self._left_chart:
|
881 |
+
if edge in self._right_chart:
|
882 |
+
out_chart.insert(edge, [])
|
883 |
+
|
884 |
+
self._update("and", out_chart)
|
885 |
+
|
886 |
+
def _union(self):
|
887 |
+
if not self._checkcompat():
|
888 |
+
return
|
889 |
+
|
890 |
+
out_chart = Chart(self._left_chart.tokens())
|
891 |
+
for edge in self._left_chart:
|
892 |
+
out_chart.insert(edge, [])
|
893 |
+
for edge in self._right_chart:
|
894 |
+
out_chart.insert(edge, [])
|
895 |
+
|
896 |
+
self._update("or", out_chart)
|
897 |
+
|
898 |
+
def _swapcharts(self):
|
899 |
+
left, right = self._left_name, self._right_name
|
900 |
+
self._left_selector.set(right)
|
901 |
+
self._right_selector.set(left)
|
902 |
+
|
903 |
+
def _checkcompat(self):
|
904 |
+
if (
|
905 |
+
self._left_chart.tokens() != self._right_chart.tokens()
|
906 |
+
or self._left_chart.property_names() != self._right_chart.property_names()
|
907 |
+
or self._left_chart == self._emptychart
|
908 |
+
or self._right_chart == self._emptychart
|
909 |
+
):
|
910 |
+
# Clear & inactivate the output chart.
|
911 |
+
self._out_chart = self._emptychart
|
912 |
+
self._out_matrix.set_chart(self._out_chart)
|
913 |
+
self._out_matrix.inactivate()
|
914 |
+
self._out_label["text"] = "Output"
|
915 |
+
# Issue some other warning?
|
916 |
+
return False
|
917 |
+
else:
|
918 |
+
return True
|
919 |
+
|
920 |
+
def _update(self, operator, out_chart):
|
921 |
+
self._operator = operator
|
922 |
+
self._op_label["text"] = self._OPSYMBOL[operator]
|
923 |
+
self._out_chart = out_chart
|
924 |
+
self._out_matrix.set_chart(out_chart)
|
925 |
+
self._out_label["text"] = "{} {} {}".format(
|
926 |
+
self._left_name,
|
927 |
+
self._operator,
|
928 |
+
self._right_name,
|
929 |
+
)
|
930 |
+
|
931 |
+
def _clear_out_chart(self):
|
932 |
+
self._out_chart = self._emptychart
|
933 |
+
self._out_matrix.set_chart(self._out_chart)
|
934 |
+
self._op_label["text"] = " "
|
935 |
+
self._out_matrix.inactivate()
|
936 |
+
|
937 |
+
def _detach_out(self):
|
938 |
+
ChartMatrixView(self._root, self._out_chart, title=self._out_label["text"])
|
939 |
+
|
940 |
+
|
941 |
+
#######################################################################
|
942 |
+
# Chart View
|
943 |
+
#######################################################################
|
944 |
+
|
945 |
+
|
946 |
+
class ChartView:
|
947 |
+
"""
|
948 |
+
A component for viewing charts. This is used by ``ChartParserApp`` to
|
949 |
+
allow students to interactively experiment with various chart
|
950 |
+
parsing techniques. It is also used by ``Chart.draw()``.
|
951 |
+
|
952 |
+
:ivar _chart: The chart that we are giving a view of. This chart
|
953 |
+
may be modified; after it is modified, you should call
|
954 |
+
``update``.
|
955 |
+
:ivar _sentence: The list of tokens that the chart spans.
|
956 |
+
|
957 |
+
:ivar _root: The root window.
|
958 |
+
:ivar _chart_canvas: The canvas we're using to display the chart
|
959 |
+
itself.
|
960 |
+
:ivar _tree_canvas: The canvas we're using to display the tree
|
961 |
+
that each edge spans. May be None, if we're not displaying
|
962 |
+
trees.
|
963 |
+
:ivar _sentence_canvas: The canvas we're using to display the sentence
|
964 |
+
text. May be None, if we're not displaying the sentence text.
|
965 |
+
:ivar _edgetags: A dictionary mapping from edges to the tags of
|
966 |
+
the canvas elements (lines, etc) used to display that edge.
|
967 |
+
The values of this dictionary have the form
|
968 |
+
``(linetag, rhstag1, dottag, rhstag2, lhstag)``.
|
969 |
+
:ivar _treetags: A list of all the tags that make up the tree;
|
970 |
+
used to erase the tree (without erasing the loclines).
|
971 |
+
:ivar _chart_height: The height of the chart canvas.
|
972 |
+
:ivar _sentence_height: The height of the sentence canvas.
|
973 |
+
:ivar _tree_height: The height of the tree
|
974 |
+
|
975 |
+
:ivar _text_height: The height of a text string (in the normal
|
976 |
+
font).
|
977 |
+
|
978 |
+
:ivar _edgelevels: A list of edges at each level of the chart (the
|
979 |
+
top level is the 0th element). This list is used to remember
|
980 |
+
where edges should be drawn; and to make sure that no edges
|
981 |
+
are overlapping on the chart view.
|
982 |
+
|
983 |
+
:ivar _unitsize: Pixel size of one unit (from the location). This
|
984 |
+
is determined by the span of the chart's location, and the
|
985 |
+
width of the chart display canvas.
|
986 |
+
|
987 |
+
:ivar _fontsize: The current font size
|
988 |
+
|
989 |
+
:ivar _marks: A dictionary from edges to marks. Marks are
|
990 |
+
strings, specifying colors (e.g. 'green').
|
991 |
+
"""
|
992 |
+
|
993 |
+
_LEAF_SPACING = 10
|
994 |
+
_MARGIN = 10
|
995 |
+
_TREE_LEVEL_SIZE = 12
|
996 |
+
_CHART_LEVEL_SIZE = 40
|
997 |
+
|
998 |
+
def __init__(self, chart, root=None, **kw):
|
999 |
+
"""
|
1000 |
+
Construct a new ``Chart`` display.
|
1001 |
+
"""
|
1002 |
+
# Process keyword args.
|
1003 |
+
draw_tree = kw.get("draw_tree", 0)
|
1004 |
+
draw_sentence = kw.get("draw_sentence", 1)
|
1005 |
+
self._fontsize = kw.get("fontsize", -12)
|
1006 |
+
|
1007 |
+
# The chart!
|
1008 |
+
self._chart = chart
|
1009 |
+
|
1010 |
+
# Callback functions
|
1011 |
+
self._callbacks = {}
|
1012 |
+
|
1013 |
+
# Keep track of drawn edges
|
1014 |
+
self._edgelevels = []
|
1015 |
+
self._edgetags = {}
|
1016 |
+
|
1017 |
+
# Keep track of which edges are marked.
|
1018 |
+
self._marks = {}
|
1019 |
+
|
1020 |
+
# These are used to keep track of the set of tree tokens
|
1021 |
+
# currently displayed in the tree canvas.
|
1022 |
+
self._treetoks = []
|
1023 |
+
self._treetoks_edge = None
|
1024 |
+
self._treetoks_index = 0
|
1025 |
+
|
1026 |
+
# Keep track of the tags used to draw the tree
|
1027 |
+
self._tree_tags = []
|
1028 |
+
|
1029 |
+
# Put multiple edges on each level?
|
1030 |
+
self._compact = 0
|
1031 |
+
|
1032 |
+
# If they didn't provide a main window, then set one up.
|
1033 |
+
if root is None:
|
1034 |
+
top = Tk()
|
1035 |
+
top.title("Chart View")
|
1036 |
+
|
1037 |
+
def destroy1(e, top=top):
|
1038 |
+
top.destroy()
|
1039 |
+
|
1040 |
+
def destroy2(top=top):
|
1041 |
+
top.destroy()
|
1042 |
+
|
1043 |
+
top.bind("q", destroy1)
|
1044 |
+
b = Button(top, text="Done", command=destroy2)
|
1045 |
+
b.pack(side="bottom")
|
1046 |
+
self._root = top
|
1047 |
+
else:
|
1048 |
+
self._root = root
|
1049 |
+
|
1050 |
+
# Create some fonts.
|
1051 |
+
self._init_fonts(root)
|
1052 |
+
|
1053 |
+
# Create the chart canvas.
|
1054 |
+
(self._chart_sb, self._chart_canvas) = self._sb_canvas(self._root)
|
1055 |
+
self._chart_canvas["height"] = 300
|
1056 |
+
self._chart_canvas["closeenough"] = 15
|
1057 |
+
|
1058 |
+
# Create the sentence canvas.
|
1059 |
+
if draw_sentence:
|
1060 |
+
cframe = Frame(self._root, relief="sunk", border=2)
|
1061 |
+
cframe.pack(fill="both", side="bottom")
|
1062 |
+
self._sentence_canvas = Canvas(cframe, height=50)
|
1063 |
+
self._sentence_canvas["background"] = "#e0e0e0"
|
1064 |
+
self._sentence_canvas.pack(fill="both")
|
1065 |
+
# self._sentence_canvas['height'] = self._sentence_height
|
1066 |
+
else:
|
1067 |
+
self._sentence_canvas = None
|
1068 |
+
|
1069 |
+
# Create the tree canvas.
|
1070 |
+
if draw_tree:
|
1071 |
+
(sb, canvas) = self._sb_canvas(self._root, "n", "x")
|
1072 |
+
(self._tree_sb, self._tree_canvas) = (sb, canvas)
|
1073 |
+
self._tree_canvas["height"] = 200
|
1074 |
+
else:
|
1075 |
+
self._tree_canvas = None
|
1076 |
+
|
1077 |
+
# Do some analysis to figure out how big the window should be
|
1078 |
+
self._analyze()
|
1079 |
+
self.draw()
|
1080 |
+
self._resize()
|
1081 |
+
self._grow()
|
1082 |
+
|
1083 |
+
# Set up the configure callback, which will be called whenever
|
1084 |
+
# the window is resized.
|
1085 |
+
self._chart_canvas.bind("<Configure>", self._configure)
|
1086 |
+
|
1087 |
+
def _init_fonts(self, root):
|
1088 |
+
self._boldfont = Font(family="helvetica", weight="bold", size=self._fontsize)
|
1089 |
+
self._font = Font(family="helvetica", size=self._fontsize)
|
1090 |
+
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
|
1091 |
+
self._sysfont = Font(font=Button()["font"])
|
1092 |
+
root.option_add("*Font", self._sysfont)
|
1093 |
+
|
1094 |
+
def _sb_canvas(self, root, expand="y", fill="both", side="bottom"):
|
1095 |
+
"""
|
1096 |
+
Helper for __init__: construct a canvas with a scrollbar.
|
1097 |
+
"""
|
1098 |
+
cframe = Frame(root, relief="sunk", border=2)
|
1099 |
+
cframe.pack(fill=fill, expand=expand, side=side)
|
1100 |
+
canvas = Canvas(cframe, background="#e0e0e0")
|
1101 |
+
|
1102 |
+
# Give the canvas a scrollbar.
|
1103 |
+
sb = Scrollbar(cframe, orient="vertical")
|
1104 |
+
sb.pack(side="right", fill="y")
|
1105 |
+
canvas.pack(side="left", fill=fill, expand="yes")
|
1106 |
+
|
1107 |
+
# Connect the scrollbars to the canvas.
|
1108 |
+
sb["command"] = canvas.yview
|
1109 |
+
canvas["yscrollcommand"] = sb.set
|
1110 |
+
|
1111 |
+
return (sb, canvas)
|
1112 |
+
|
1113 |
+
def scroll_up(self, *e):
|
1114 |
+
self._chart_canvas.yview("scroll", -1, "units")
|
1115 |
+
|
1116 |
+
def scroll_down(self, *e):
|
1117 |
+
self._chart_canvas.yview("scroll", 1, "units")
|
1118 |
+
|
1119 |
+
def page_up(self, *e):
|
1120 |
+
self._chart_canvas.yview("scroll", -1, "pages")
|
1121 |
+
|
1122 |
+
def page_down(self, *e):
|
1123 |
+
self._chart_canvas.yview("scroll", 1, "pages")
|
1124 |
+
|
1125 |
+
def _grow(self):
|
1126 |
+
"""
|
1127 |
+
Grow the window, if necessary
|
1128 |
+
"""
|
1129 |
+
# Grow, if need-be
|
1130 |
+
N = self._chart.num_leaves()
|
1131 |
+
width = max(
|
1132 |
+
int(self._chart_canvas["width"]), N * self._unitsize + ChartView._MARGIN * 2
|
1133 |
+
)
|
1134 |
+
|
1135 |
+
# It won't resize without the second (height) line, but I
|
1136 |
+
# don't understand why not.
|
1137 |
+
self._chart_canvas.configure(width=width)
|
1138 |
+
self._chart_canvas.configure(height=self._chart_canvas["height"])
|
1139 |
+
|
1140 |
+
self._unitsize = (width - 2 * ChartView._MARGIN) / N
|
1141 |
+
|
1142 |
+
# Reset the height for the sentence window.
|
1143 |
+
if self._sentence_canvas is not None:
|
1144 |
+
self._sentence_canvas["height"] = self._sentence_height
|
1145 |
+
|
1146 |
+
def set_font_size(self, size):
|
1147 |
+
self._font.configure(size=-abs(size))
|
1148 |
+
self._boldfont.configure(size=-abs(size))
|
1149 |
+
self._sysfont.configure(size=-abs(size))
|
1150 |
+
self._analyze()
|
1151 |
+
self._grow()
|
1152 |
+
self.draw()
|
1153 |
+
|
1154 |
+
def get_font_size(self):
|
1155 |
+
return abs(self._fontsize)
|
1156 |
+
|
1157 |
+
def _configure(self, e):
|
1158 |
+
"""
|
1159 |
+
The configure callback. This is called whenever the window is
|
1160 |
+
resized. It is also called when the window is first mapped.
|
1161 |
+
It figures out the unit size, and redraws the contents of each
|
1162 |
+
canvas.
|
1163 |
+
"""
|
1164 |
+
N = self._chart.num_leaves()
|
1165 |
+
self._unitsize = (e.width - 2 * ChartView._MARGIN) / N
|
1166 |
+
self.draw()
|
1167 |
+
|
1168 |
+
def update(self, chart=None):
|
1169 |
+
"""
|
1170 |
+
Draw any edges that have not been drawn. This is typically
|
1171 |
+
called when a after modifies the canvas that a CanvasView is
|
1172 |
+
displaying. ``update`` will cause any edges that have been
|
1173 |
+
added to the chart to be drawn.
|
1174 |
+
|
1175 |
+
If update is given a ``chart`` argument, then it will replace
|
1176 |
+
the current chart with the given chart.
|
1177 |
+
"""
|
1178 |
+
if chart is not None:
|
1179 |
+
self._chart = chart
|
1180 |
+
self._edgelevels = []
|
1181 |
+
self._marks = {}
|
1182 |
+
self._analyze()
|
1183 |
+
self._grow()
|
1184 |
+
self.draw()
|
1185 |
+
self.erase_tree()
|
1186 |
+
self._resize()
|
1187 |
+
else:
|
1188 |
+
for edge in self._chart:
|
1189 |
+
if edge not in self._edgetags:
|
1190 |
+
self._add_edge(edge)
|
1191 |
+
self._resize()
|
1192 |
+
|
1193 |
+
def _edge_conflict(self, edge, lvl):
|
1194 |
+
"""
|
1195 |
+
Return True if the given edge overlaps with any edge on the given
|
1196 |
+
level. This is used by _add_edge to figure out what level a
|
1197 |
+
new edge should be added to.
|
1198 |
+
"""
|
1199 |
+
(s1, e1) = edge.span()
|
1200 |
+
for otheredge in self._edgelevels[lvl]:
|
1201 |
+
(s2, e2) = otheredge.span()
|
1202 |
+
if (s1 <= s2 < e1) or (s2 <= s1 < e2) or (s1 == s2 == e1 == e2):
|
1203 |
+
return True
|
1204 |
+
return False
|
1205 |
+
|
1206 |
+
def _analyze_edge(self, edge):
|
1207 |
+
"""
|
1208 |
+
Given a new edge, recalculate:
|
1209 |
+
|
1210 |
+
- _text_height
|
1211 |
+
- _unitsize (if the edge text is too big for the current
|
1212 |
+
_unitsize, then increase _unitsize)
|
1213 |
+
"""
|
1214 |
+
c = self._chart_canvas
|
1215 |
+
|
1216 |
+
if isinstance(edge, TreeEdge):
|
1217 |
+
lhs = edge.lhs()
|
1218 |
+
rhselts = []
|
1219 |
+
for elt in edge.rhs():
|
1220 |
+
if isinstance(elt, Nonterminal):
|
1221 |
+
rhselts.append(str(elt.symbol()))
|
1222 |
+
else:
|
1223 |
+
rhselts.append(repr(elt))
|
1224 |
+
rhs = " ".join(rhselts)
|
1225 |
+
else:
|
1226 |
+
lhs = edge.lhs()
|
1227 |
+
rhs = ""
|
1228 |
+
|
1229 |
+
for s in (lhs, rhs):
|
1230 |
+
tag = c.create_text(
|
1231 |
+
0, 0, text=s, font=self._boldfont, anchor="nw", justify="left"
|
1232 |
+
)
|
1233 |
+
bbox = c.bbox(tag)
|
1234 |
+
c.delete(tag)
|
1235 |
+
width = bbox[2] # + ChartView._LEAF_SPACING
|
1236 |
+
edgelen = max(edge.length(), 1)
|
1237 |
+
self._unitsize = max(self._unitsize, width / edgelen)
|
1238 |
+
self._text_height = max(self._text_height, bbox[3] - bbox[1])
|
1239 |
+
|
1240 |
+
def _add_edge(self, edge, minlvl=0):
|
1241 |
+
"""
|
1242 |
+
Add a single edge to the ChartView:
|
1243 |
+
|
1244 |
+
- Call analyze_edge to recalculate display parameters
|
1245 |
+
- Find an available level
|
1246 |
+
- Call _draw_edge
|
1247 |
+
"""
|
1248 |
+
# Do NOT show leaf edges in the chart.
|
1249 |
+
if isinstance(edge, LeafEdge):
|
1250 |
+
return
|
1251 |
+
|
1252 |
+
if edge in self._edgetags:
|
1253 |
+
return
|
1254 |
+
self._analyze_edge(edge)
|
1255 |
+
self._grow()
|
1256 |
+
|
1257 |
+
if not self._compact:
|
1258 |
+
self._edgelevels.append([edge])
|
1259 |
+
lvl = len(self._edgelevels) - 1
|
1260 |
+
self._draw_edge(edge, lvl)
|
1261 |
+
self._resize()
|
1262 |
+
return
|
1263 |
+
|
1264 |
+
# Figure out what level to draw the edge on.
|
1265 |
+
lvl = 0
|
1266 |
+
while True:
|
1267 |
+
# If this level doesn't exist yet, create it.
|
1268 |
+
while lvl >= len(self._edgelevels):
|
1269 |
+
self._edgelevels.append([])
|
1270 |
+
self._resize()
|
1271 |
+
|
1272 |
+
# Check if we can fit the edge in this level.
|
1273 |
+
if lvl >= minlvl and not self._edge_conflict(edge, lvl):
|
1274 |
+
# Go ahead and draw it.
|
1275 |
+
self._edgelevels[lvl].append(edge)
|
1276 |
+
break
|
1277 |
+
|
1278 |
+
# Try the next level.
|
1279 |
+
lvl += 1
|
1280 |
+
|
1281 |
+
self._draw_edge(edge, lvl)
|
1282 |
+
|
1283 |
+
def view_edge(self, edge):
|
1284 |
+
level = None
|
1285 |
+
for i in range(len(self._edgelevels)):
|
1286 |
+
if edge in self._edgelevels[i]:
|
1287 |
+
level = i
|
1288 |
+
break
|
1289 |
+
if level is None:
|
1290 |
+
return
|
1291 |
+
# Try to view the new edge..
|
1292 |
+
y = (level + 1) * self._chart_level_size
|
1293 |
+
dy = self._text_height + 10
|
1294 |
+
self._chart_canvas.yview("moveto", 1.0)
|
1295 |
+
if self._chart_height != 0:
|
1296 |
+
self._chart_canvas.yview("moveto", (y - dy) / self._chart_height)
|
1297 |
+
|
1298 |
+
def _draw_edge(self, edge, lvl):
|
1299 |
+
"""
|
1300 |
+
Draw a single edge on the ChartView.
|
1301 |
+
"""
|
1302 |
+
c = self._chart_canvas
|
1303 |
+
|
1304 |
+
# Draw the arrow.
|
1305 |
+
x1 = edge.start() * self._unitsize + ChartView._MARGIN
|
1306 |
+
x2 = edge.end() * self._unitsize + ChartView._MARGIN
|
1307 |
+
if x2 == x1:
|
1308 |
+
x2 += max(4, self._unitsize / 5)
|
1309 |
+
y = (lvl + 1) * self._chart_level_size
|
1310 |
+
linetag = c.create_line(x1, y, x2, y, arrow="last", width=3)
|
1311 |
+
|
1312 |
+
# Draw a label for the edge.
|
1313 |
+
if isinstance(edge, TreeEdge):
|
1314 |
+
rhs = []
|
1315 |
+
for elt in edge.rhs():
|
1316 |
+
if isinstance(elt, Nonterminal):
|
1317 |
+
rhs.append(str(elt.symbol()))
|
1318 |
+
else:
|
1319 |
+
rhs.append(repr(elt))
|
1320 |
+
pos = edge.dot()
|
1321 |
+
else:
|
1322 |
+
rhs = []
|
1323 |
+
pos = 0
|
1324 |
+
|
1325 |
+
rhs1 = " ".join(rhs[:pos])
|
1326 |
+
rhs2 = " ".join(rhs[pos:])
|
1327 |
+
rhstag1 = c.create_text(x1 + 3, y, text=rhs1, font=self._font, anchor="nw")
|
1328 |
+
dotx = c.bbox(rhstag1)[2] + 6
|
1329 |
+
doty = (c.bbox(rhstag1)[1] + c.bbox(rhstag1)[3]) / 2
|
1330 |
+
dottag = c.create_oval(dotx - 2, doty - 2, dotx + 2, doty + 2)
|
1331 |
+
rhstag2 = c.create_text(dotx + 6, y, text=rhs2, font=self._font, anchor="nw")
|
1332 |
+
lhstag = c.create_text(
|
1333 |
+
(x1 + x2) / 2, y, text=str(edge.lhs()), anchor="s", font=self._boldfont
|
1334 |
+
)
|
1335 |
+
|
1336 |
+
# Keep track of the edge's tags.
|
1337 |
+
self._edgetags[edge] = (linetag, rhstag1, dottag, rhstag2, lhstag)
|
1338 |
+
|
1339 |
+
# Register a callback for clicking on the edge.
|
1340 |
+
def cb(event, self=self, edge=edge):
|
1341 |
+
self._fire_callbacks("select", edge)
|
1342 |
+
|
1343 |
+
c.tag_bind(rhstag1, "<Button-1>", cb)
|
1344 |
+
c.tag_bind(rhstag2, "<Button-1>", cb)
|
1345 |
+
c.tag_bind(linetag, "<Button-1>", cb)
|
1346 |
+
c.tag_bind(dottag, "<Button-1>", cb)
|
1347 |
+
c.tag_bind(lhstag, "<Button-1>", cb)
|
1348 |
+
|
1349 |
+
self._color_edge(edge)
|
1350 |
+
|
1351 |
+
def _color_edge(self, edge, linecolor=None, textcolor=None):
|
1352 |
+
"""
|
1353 |
+
Color in an edge with the given colors.
|
1354 |
+
If no colors are specified, use intelligent defaults
|
1355 |
+
(dependent on selection, etc.)
|
1356 |
+
"""
|
1357 |
+
if edge not in self._edgetags:
|
1358 |
+
return
|
1359 |
+
c = self._chart_canvas
|
1360 |
+
|
1361 |
+
if linecolor is not None and textcolor is not None:
|
1362 |
+
if edge in self._marks:
|
1363 |
+
linecolor = self._marks[edge]
|
1364 |
+
tags = self._edgetags[edge]
|
1365 |
+
c.itemconfig(tags[0], fill=linecolor)
|
1366 |
+
c.itemconfig(tags[1], fill=textcolor)
|
1367 |
+
c.itemconfig(tags[2], fill=textcolor, outline=textcolor)
|
1368 |
+
c.itemconfig(tags[3], fill=textcolor)
|
1369 |
+
c.itemconfig(tags[4], fill=textcolor)
|
1370 |
+
return
|
1371 |
+
else:
|
1372 |
+
N = self._chart.num_leaves()
|
1373 |
+
if edge in self._marks:
|
1374 |
+
self._color_edge(self._marks[edge])
|
1375 |
+
if edge.is_complete() and edge.span() == (0, N):
|
1376 |
+
self._color_edge(edge, "#084", "#042")
|
1377 |
+
elif isinstance(edge, LeafEdge):
|
1378 |
+
self._color_edge(edge, "#48c", "#246")
|
1379 |
+
else:
|
1380 |
+
self._color_edge(edge, "#00f", "#008")
|
1381 |
+
|
1382 |
+
def mark_edge(self, edge, mark="#0df"):
|
1383 |
+
"""
|
1384 |
+
Mark an edge
|
1385 |
+
"""
|
1386 |
+
self._marks[edge] = mark
|
1387 |
+
self._color_edge(edge)
|
1388 |
+
|
1389 |
+
def unmark_edge(self, edge=None):
|
1390 |
+
"""
|
1391 |
+
Unmark an edge (or all edges)
|
1392 |
+
"""
|
1393 |
+
if edge is None:
|
1394 |
+
old_marked_edges = list(self._marks.keys())
|
1395 |
+
self._marks = {}
|
1396 |
+
for edge in old_marked_edges:
|
1397 |
+
self._color_edge(edge)
|
1398 |
+
else:
|
1399 |
+
del self._marks[edge]
|
1400 |
+
self._color_edge(edge)
|
1401 |
+
|
1402 |
+
def markonly_edge(self, edge, mark="#0df"):
|
1403 |
+
self.unmark_edge()
|
1404 |
+
self.mark_edge(edge, mark)
|
1405 |
+
|
1406 |
+
def _analyze(self):
|
1407 |
+
"""
|
1408 |
+
Analyze the sentence string, to figure out how big a unit needs
|
1409 |
+
to be, How big the tree should be, etc.
|
1410 |
+
"""
|
1411 |
+
# Figure out the text height and the unit size.
|
1412 |
+
unitsize = 70 # min unitsize
|
1413 |
+
text_height = 0
|
1414 |
+
c = self._chart_canvas
|
1415 |
+
|
1416 |
+
# Check against all tokens
|
1417 |
+
for leaf in self._chart.leaves():
|
1418 |
+
tag = c.create_text(
|
1419 |
+
0, 0, text=repr(leaf), font=self._font, anchor="nw", justify="left"
|
1420 |
+
)
|
1421 |
+
bbox = c.bbox(tag)
|
1422 |
+
c.delete(tag)
|
1423 |
+
width = bbox[2] + ChartView._LEAF_SPACING
|
1424 |
+
unitsize = max(width, unitsize)
|
1425 |
+
text_height = max(text_height, bbox[3] - bbox[1])
|
1426 |
+
|
1427 |
+
self._unitsize = unitsize
|
1428 |
+
self._text_height = text_height
|
1429 |
+
self._sentence_height = self._text_height + 2 * ChartView._MARGIN
|
1430 |
+
|
1431 |
+
# Check against edges.
|
1432 |
+
for edge in self._chart.edges():
|
1433 |
+
self._analyze_edge(edge)
|
1434 |
+
|
1435 |
+
# Size of chart levels
|
1436 |
+
self._chart_level_size = self._text_height * 2
|
1437 |
+
|
1438 |
+
# Default tree size..
|
1439 |
+
self._tree_height = 3 * (ChartView._TREE_LEVEL_SIZE + self._text_height)
|
1440 |
+
|
1441 |
+
# Resize the scrollregions.
|
1442 |
+
self._resize()
|
1443 |
+
|
1444 |
+
def _resize(self):
|
1445 |
+
"""
|
1446 |
+
Update the scroll-regions for each canvas. This ensures that
|
1447 |
+
everything is within a scroll-region, so the user can use the
|
1448 |
+
scrollbars to view the entire display. This does *not*
|
1449 |
+
resize the window.
|
1450 |
+
"""
|
1451 |
+
c = self._chart_canvas
|
1452 |
+
|
1453 |
+
# Reset the chart scroll region
|
1454 |
+
width = self._chart.num_leaves() * self._unitsize + ChartView._MARGIN * 2
|
1455 |
+
|
1456 |
+
levels = len(self._edgelevels)
|
1457 |
+
self._chart_height = (levels + 2) * self._chart_level_size
|
1458 |
+
c["scrollregion"] = (0, 0, width, self._chart_height)
|
1459 |
+
|
1460 |
+
# Reset the tree scroll region
|
1461 |
+
if self._tree_canvas:
|
1462 |
+
self._tree_canvas["scrollregion"] = (0, 0, width, self._tree_height)
|
1463 |
+
|
1464 |
+
def _draw_loclines(self):
|
1465 |
+
"""
|
1466 |
+
Draw location lines. These are vertical gridlines used to
|
1467 |
+
show where each location unit is.
|
1468 |
+
"""
|
1469 |
+
BOTTOM = 50000
|
1470 |
+
c1 = self._tree_canvas
|
1471 |
+
c2 = self._sentence_canvas
|
1472 |
+
c3 = self._chart_canvas
|
1473 |
+
margin = ChartView._MARGIN
|
1474 |
+
self._loclines = []
|
1475 |
+
for i in range(0, self._chart.num_leaves() + 1):
|
1476 |
+
x = i * self._unitsize + margin
|
1477 |
+
|
1478 |
+
if c1:
|
1479 |
+
t1 = c1.create_line(x, 0, x, BOTTOM)
|
1480 |
+
c1.tag_lower(t1)
|
1481 |
+
if c2:
|
1482 |
+
t2 = c2.create_line(x, 0, x, self._sentence_height)
|
1483 |
+
c2.tag_lower(t2)
|
1484 |
+
t3 = c3.create_line(x, 0, x, BOTTOM)
|
1485 |
+
c3.tag_lower(t3)
|
1486 |
+
t4 = c3.create_text(x + 2, 0, text=repr(i), anchor="nw", font=self._font)
|
1487 |
+
c3.tag_lower(t4)
|
1488 |
+
# if i % 4 == 0:
|
1489 |
+
# if c1: c1.itemconfig(t1, width=2, fill='gray60')
|
1490 |
+
# if c2: c2.itemconfig(t2, width=2, fill='gray60')
|
1491 |
+
# c3.itemconfig(t3, width=2, fill='gray60')
|
1492 |
+
if i % 2 == 0:
|
1493 |
+
if c1:
|
1494 |
+
c1.itemconfig(t1, fill="gray60")
|
1495 |
+
if c2:
|
1496 |
+
c2.itemconfig(t2, fill="gray60")
|
1497 |
+
c3.itemconfig(t3, fill="gray60")
|
1498 |
+
else:
|
1499 |
+
if c1:
|
1500 |
+
c1.itemconfig(t1, fill="gray80")
|
1501 |
+
if c2:
|
1502 |
+
c2.itemconfig(t2, fill="gray80")
|
1503 |
+
c3.itemconfig(t3, fill="gray80")
|
1504 |
+
|
1505 |
+
def _draw_sentence(self):
|
1506 |
+
"""Draw the sentence string."""
|
1507 |
+
if self._chart.num_leaves() == 0:
|
1508 |
+
return
|
1509 |
+
c = self._sentence_canvas
|
1510 |
+
margin = ChartView._MARGIN
|
1511 |
+
y = ChartView._MARGIN
|
1512 |
+
|
1513 |
+
for i, leaf in enumerate(self._chart.leaves()):
|
1514 |
+
x1 = i * self._unitsize + margin
|
1515 |
+
x2 = x1 + self._unitsize
|
1516 |
+
x = (x1 + x2) / 2
|
1517 |
+
tag = c.create_text(
|
1518 |
+
x, y, text=repr(leaf), font=self._font, anchor="n", justify="left"
|
1519 |
+
)
|
1520 |
+
bbox = c.bbox(tag)
|
1521 |
+
rt = c.create_rectangle(
|
1522 |
+
x1 + 2,
|
1523 |
+
bbox[1] - (ChartView._LEAF_SPACING / 2),
|
1524 |
+
x2 - 2,
|
1525 |
+
bbox[3] + (ChartView._LEAF_SPACING / 2),
|
1526 |
+
fill="#f0f0f0",
|
1527 |
+
outline="#f0f0f0",
|
1528 |
+
)
|
1529 |
+
c.tag_lower(rt)
|
1530 |
+
|
1531 |
+
def erase_tree(self):
|
1532 |
+
for tag in self._tree_tags:
|
1533 |
+
self._tree_canvas.delete(tag)
|
1534 |
+
self._treetoks = []
|
1535 |
+
self._treetoks_edge = None
|
1536 |
+
self._treetoks_index = 0
|
1537 |
+
|
1538 |
+
def draw_tree(self, edge=None):
|
1539 |
+
if edge is None and self._treetoks_edge is None:
|
1540 |
+
return
|
1541 |
+
if edge is None:
|
1542 |
+
edge = self._treetoks_edge
|
1543 |
+
|
1544 |
+
# If it's a new edge, then get a new list of treetoks.
|
1545 |
+
if self._treetoks_edge != edge:
|
1546 |
+
self._treetoks = [t for t in self._chart.trees(edge) if isinstance(t, Tree)]
|
1547 |
+
self._treetoks_edge = edge
|
1548 |
+
self._treetoks_index = 0
|
1549 |
+
|
1550 |
+
# Make sure there's something to draw.
|
1551 |
+
if len(self._treetoks) == 0:
|
1552 |
+
return
|
1553 |
+
|
1554 |
+
# Erase the old tree.
|
1555 |
+
for tag in self._tree_tags:
|
1556 |
+
self._tree_canvas.delete(tag)
|
1557 |
+
|
1558 |
+
# Draw the new tree.
|
1559 |
+
tree = self._treetoks[self._treetoks_index]
|
1560 |
+
self._draw_treetok(tree, edge.start())
|
1561 |
+
|
1562 |
+
# Show how many trees are available for the edge.
|
1563 |
+
self._draw_treecycle()
|
1564 |
+
|
1565 |
+
# Update the scroll region.
|
1566 |
+
w = self._chart.num_leaves() * self._unitsize + 2 * ChartView._MARGIN
|
1567 |
+
h = tree.height() * (ChartView._TREE_LEVEL_SIZE + self._text_height)
|
1568 |
+
self._tree_canvas["scrollregion"] = (0, 0, w, h)
|
1569 |
+
|
1570 |
+
def cycle_tree(self):
|
1571 |
+
self._treetoks_index = (self._treetoks_index + 1) % len(self._treetoks)
|
1572 |
+
self.draw_tree(self._treetoks_edge)
|
1573 |
+
|
1574 |
+
def _draw_treecycle(self):
|
1575 |
+
if len(self._treetoks) <= 1:
|
1576 |
+
return
|
1577 |
+
|
1578 |
+
# Draw the label.
|
1579 |
+
label = "%d Trees" % len(self._treetoks)
|
1580 |
+
c = self._tree_canvas
|
1581 |
+
margin = ChartView._MARGIN
|
1582 |
+
right = self._chart.num_leaves() * self._unitsize + margin - 2
|
1583 |
+
tag = c.create_text(right, 2, anchor="ne", text=label, font=self._boldfont)
|
1584 |
+
self._tree_tags.append(tag)
|
1585 |
+
_, _, _, y = c.bbox(tag)
|
1586 |
+
|
1587 |
+
# Draw the triangles.
|
1588 |
+
for i in range(len(self._treetoks)):
|
1589 |
+
x = right - 20 * (len(self._treetoks) - i - 1)
|
1590 |
+
if i == self._treetoks_index:
|
1591 |
+
fill = "#084"
|
1592 |
+
else:
|
1593 |
+
fill = "#fff"
|
1594 |
+
tag = c.create_polygon(
|
1595 |
+
x, y + 10, x - 5, y, x - 10, y + 10, fill=fill, outline="black"
|
1596 |
+
)
|
1597 |
+
self._tree_tags.append(tag)
|
1598 |
+
|
1599 |
+
# Set up a callback: show the tree if they click on its
|
1600 |
+
# triangle.
|
1601 |
+
def cb(event, self=self, i=i):
|
1602 |
+
self._treetoks_index = i
|
1603 |
+
self.draw_tree()
|
1604 |
+
|
1605 |
+
c.tag_bind(tag, "<Button-1>", cb)
|
1606 |
+
|
1607 |
+
def _draw_treetok(self, treetok, index, depth=0):
|
1608 |
+
"""
|
1609 |
+
:param index: The index of the first leaf in the tree.
|
1610 |
+
:return: The index of the first leaf after the tree.
|
1611 |
+
"""
|
1612 |
+
c = self._tree_canvas
|
1613 |
+
margin = ChartView._MARGIN
|
1614 |
+
|
1615 |
+
# Draw the children
|
1616 |
+
child_xs = []
|
1617 |
+
for child in treetok:
|
1618 |
+
if isinstance(child, Tree):
|
1619 |
+
child_x, index = self._draw_treetok(child, index, depth + 1)
|
1620 |
+
child_xs.append(child_x)
|
1621 |
+
else:
|
1622 |
+
child_xs.append((2 * index + 1) * self._unitsize / 2 + margin)
|
1623 |
+
index += 1
|
1624 |
+
|
1625 |
+
# If we have children, then get the node's x by averaging their
|
1626 |
+
# node x's. Otherwise, make room for ourselves.
|
1627 |
+
if child_xs:
|
1628 |
+
nodex = sum(child_xs) / len(child_xs)
|
1629 |
+
else:
|
1630 |
+
# [XX] breaks for null productions.
|
1631 |
+
nodex = (2 * index + 1) * self._unitsize / 2 + margin
|
1632 |
+
index += 1
|
1633 |
+
|
1634 |
+
# Draw the node
|
1635 |
+
nodey = depth * (ChartView._TREE_LEVEL_SIZE + self._text_height)
|
1636 |
+
tag = c.create_text(
|
1637 |
+
nodex,
|
1638 |
+
nodey,
|
1639 |
+
anchor="n",
|
1640 |
+
justify="center",
|
1641 |
+
text=str(treetok.label()),
|
1642 |
+
fill="#042",
|
1643 |
+
font=self._boldfont,
|
1644 |
+
)
|
1645 |
+
self._tree_tags.append(tag)
|
1646 |
+
|
1647 |
+
# Draw lines to the children.
|
1648 |
+
childy = nodey + ChartView._TREE_LEVEL_SIZE + self._text_height
|
1649 |
+
for childx, child in zip(child_xs, treetok):
|
1650 |
+
if isinstance(child, Tree) and child:
|
1651 |
+
# A "real" tree token:
|
1652 |
+
tag = c.create_line(
|
1653 |
+
nodex,
|
1654 |
+
nodey + self._text_height,
|
1655 |
+
childx,
|
1656 |
+
childy,
|
1657 |
+
width=2,
|
1658 |
+
fill="#084",
|
1659 |
+
)
|
1660 |
+
self._tree_tags.append(tag)
|
1661 |
+
if isinstance(child, Tree) and not child:
|
1662 |
+
# An unexpanded tree token:
|
1663 |
+
tag = c.create_line(
|
1664 |
+
nodex,
|
1665 |
+
nodey + self._text_height,
|
1666 |
+
childx,
|
1667 |
+
childy,
|
1668 |
+
width=2,
|
1669 |
+
fill="#048",
|
1670 |
+
dash="2 3",
|
1671 |
+
)
|
1672 |
+
self._tree_tags.append(tag)
|
1673 |
+
if not isinstance(child, Tree):
|
1674 |
+
# A leaf:
|
1675 |
+
tag = c.create_line(
|
1676 |
+
nodex,
|
1677 |
+
nodey + self._text_height,
|
1678 |
+
childx,
|
1679 |
+
10000,
|
1680 |
+
width=2,
|
1681 |
+
fill="#084",
|
1682 |
+
)
|
1683 |
+
self._tree_tags.append(tag)
|
1684 |
+
|
1685 |
+
return nodex, index
|
1686 |
+
|
1687 |
+
def draw(self):
|
1688 |
+
"""
|
1689 |
+
Draw everything (from scratch).
|
1690 |
+
"""
|
1691 |
+
if self._tree_canvas:
|
1692 |
+
self._tree_canvas.delete("all")
|
1693 |
+
self.draw_tree()
|
1694 |
+
|
1695 |
+
if self._sentence_canvas:
|
1696 |
+
self._sentence_canvas.delete("all")
|
1697 |
+
self._draw_sentence()
|
1698 |
+
|
1699 |
+
self._chart_canvas.delete("all")
|
1700 |
+
self._edgetags = {}
|
1701 |
+
|
1702 |
+
# Redraw any edges we erased.
|
1703 |
+
for lvl in range(len(self._edgelevels)):
|
1704 |
+
for edge in self._edgelevels[lvl]:
|
1705 |
+
self._draw_edge(edge, lvl)
|
1706 |
+
|
1707 |
+
for edge in self._chart:
|
1708 |
+
self._add_edge(edge)
|
1709 |
+
|
1710 |
+
self._draw_loclines()
|
1711 |
+
|
1712 |
+
def add_callback(self, event, func):
|
1713 |
+
self._callbacks.setdefault(event, {})[func] = 1
|
1714 |
+
|
1715 |
+
def remove_callback(self, event, func=None):
|
1716 |
+
if func is None:
|
1717 |
+
del self._callbacks[event]
|
1718 |
+
else:
|
1719 |
+
try:
|
1720 |
+
del self._callbacks[event][func]
|
1721 |
+
except:
|
1722 |
+
pass
|
1723 |
+
|
1724 |
+
def _fire_callbacks(self, event, *args):
|
1725 |
+
if event not in self._callbacks:
|
1726 |
+
return
|
1727 |
+
for cb_func in list(self._callbacks[event].keys()):
|
1728 |
+
cb_func(*args)
|
1729 |
+
|
1730 |
+
|
1731 |
+
#######################################################################
|
1732 |
+
# Edge Rules
|
1733 |
+
#######################################################################
|
1734 |
+
# These version of the chart rules only apply to a specific edge.
|
1735 |
+
# This lets the user select an edge, and then apply a rule.
|
1736 |
+
|
1737 |
+
|
1738 |
+
class EdgeRule:
|
1739 |
+
"""
|
1740 |
+
To create an edge rule, make an empty base class that uses
|
1741 |
+
EdgeRule as the first base class, and the basic rule as the
|
1742 |
+
second base class. (Order matters!)
|
1743 |
+
"""
|
1744 |
+
|
1745 |
+
def __init__(self, edge):
|
1746 |
+
super = self.__class__.__bases__[1]
|
1747 |
+
self._edge = edge
|
1748 |
+
self.NUM_EDGES = super.NUM_EDGES - 1
|
1749 |
+
|
1750 |
+
def apply(self, chart, grammar, *edges):
|
1751 |
+
super = self.__class__.__bases__[1]
|
1752 |
+
edges += (self._edge,)
|
1753 |
+
yield from super.apply(self, chart, grammar, *edges)
|
1754 |
+
|
1755 |
+
def __str__(self):
|
1756 |
+
super = self.__class__.__bases__[1]
|
1757 |
+
return super.__str__(self)
|
1758 |
+
|
1759 |
+
|
1760 |
+
class TopDownPredictEdgeRule(EdgeRule, TopDownPredictRule):
|
1761 |
+
pass
|
1762 |
+
|
1763 |
+
|
1764 |
+
class BottomUpEdgeRule(EdgeRule, BottomUpPredictRule):
|
1765 |
+
pass
|
1766 |
+
|
1767 |
+
|
1768 |
+
class BottomUpLeftCornerEdgeRule(EdgeRule, BottomUpPredictCombineRule):
|
1769 |
+
pass
|
1770 |
+
|
1771 |
+
|
1772 |
+
class FundamentalEdgeRule(EdgeRule, SingleEdgeFundamentalRule):
|
1773 |
+
pass
|
1774 |
+
|
1775 |
+
|
1776 |
+
#######################################################################
|
1777 |
+
# Chart Parser Application
|
1778 |
+
#######################################################################
|
1779 |
+
|
1780 |
+
|
1781 |
+
class ChartParserApp:
|
1782 |
+
def __init__(self, grammar, tokens, title="Chart Parser Application"):
|
1783 |
+
# Initialize the parser
|
1784 |
+
self._init_parser(grammar, tokens)
|
1785 |
+
|
1786 |
+
self._root = None
|
1787 |
+
try:
|
1788 |
+
# Create the root window.
|
1789 |
+
self._root = Tk()
|
1790 |
+
self._root.title(title)
|
1791 |
+
self._root.bind("<Control-q>", self.destroy)
|
1792 |
+
|
1793 |
+
# Set up some frames.
|
1794 |
+
frame3 = Frame(self._root)
|
1795 |
+
frame2 = Frame(self._root)
|
1796 |
+
frame1 = Frame(self._root)
|
1797 |
+
frame3.pack(side="bottom", fill="none")
|
1798 |
+
frame2.pack(side="bottom", fill="x")
|
1799 |
+
frame1.pack(side="bottom", fill="both", expand=1)
|
1800 |
+
|
1801 |
+
self._init_fonts(self._root)
|
1802 |
+
self._init_animation()
|
1803 |
+
self._init_chartview(frame1)
|
1804 |
+
self._init_rulelabel(frame2)
|
1805 |
+
self._init_buttons(frame3)
|
1806 |
+
self._init_menubar()
|
1807 |
+
|
1808 |
+
self._matrix = None
|
1809 |
+
self._results = None
|
1810 |
+
|
1811 |
+
# Set up keyboard bindings.
|
1812 |
+
self._init_bindings()
|
1813 |
+
|
1814 |
+
except:
|
1815 |
+
print("Error creating Tree View")
|
1816 |
+
self.destroy()
|
1817 |
+
raise
|
1818 |
+
|
1819 |
+
def destroy(self, *args):
|
1820 |
+
if self._root is None:
|
1821 |
+
return
|
1822 |
+
self._root.destroy()
|
1823 |
+
self._root = None
|
1824 |
+
|
1825 |
+
def mainloop(self, *args, **kwargs):
|
1826 |
+
"""
|
1827 |
+
Enter the Tkinter mainloop. This function must be called if
|
1828 |
+
this demo is created from a non-interactive program (e.g.
|
1829 |
+
from a secript); otherwise, the demo will close as soon as
|
1830 |
+
the script completes.
|
1831 |
+
"""
|
1832 |
+
if in_idle():
|
1833 |
+
return
|
1834 |
+
self._root.mainloop(*args, **kwargs)
|
1835 |
+
|
1836 |
+
# ////////////////////////////////////////////////////////////
|
1837 |
+
# Initialization Helpers
|
1838 |
+
# ////////////////////////////////////////////////////////////
|
1839 |
+
|
1840 |
+
def _init_parser(self, grammar, tokens):
|
1841 |
+
self._grammar = grammar
|
1842 |
+
self._tokens = tokens
|
1843 |
+
self._reset_parser()
|
1844 |
+
|
1845 |
+
def _reset_parser(self):
|
1846 |
+
self._cp = SteppingChartParser(self._grammar)
|
1847 |
+
self._cp.initialize(self._tokens)
|
1848 |
+
self._chart = self._cp.chart()
|
1849 |
+
|
1850 |
+
# Insert LeafEdges before the parsing starts.
|
1851 |
+
for _new_edge in LeafInitRule().apply(self._chart, self._grammar):
|
1852 |
+
pass
|
1853 |
+
|
1854 |
+
# The step iterator -- use this to generate new edges
|
1855 |
+
self._cpstep = self._cp.step()
|
1856 |
+
|
1857 |
+
# The currently selected edge
|
1858 |
+
self._selection = None
|
1859 |
+
|
1860 |
+
def _init_fonts(self, root):
|
1861 |
+
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
|
1862 |
+
self._sysfont = Font(font=Button()["font"])
|
1863 |
+
root.option_add("*Font", self._sysfont)
|
1864 |
+
|
1865 |
+
# TWhat's our font size (default=same as sysfont)
|
1866 |
+
self._size = IntVar(root)
|
1867 |
+
self._size.set(self._sysfont.cget("size"))
|
1868 |
+
|
1869 |
+
self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
|
1870 |
+
self._font = Font(family="helvetica", size=self._size.get())
|
1871 |
+
|
1872 |
+
def _init_animation(self):
|
1873 |
+
# Are we stepping? (default=yes)
|
1874 |
+
self._step = IntVar(self._root)
|
1875 |
+
self._step.set(1)
|
1876 |
+
|
1877 |
+
# What's our animation speed (default=fast)
|
1878 |
+
self._animate = IntVar(self._root)
|
1879 |
+
self._animate.set(3) # Default speed = fast
|
1880 |
+
|
1881 |
+
# Are we currently animating?
|
1882 |
+
self._animating = 0
|
1883 |
+
|
1884 |
+
def _init_chartview(self, parent):
|
1885 |
+
self._cv = ChartView(self._chart, parent, draw_tree=1, draw_sentence=1)
|
1886 |
+
self._cv.add_callback("select", self._click_cv_edge)
|
1887 |
+
|
1888 |
+
def _init_rulelabel(self, parent):
|
1889 |
+
ruletxt = "Last edge generated by:"
|
1890 |
+
|
1891 |
+
self._rulelabel1 = Label(parent, text=ruletxt, font=self._boldfont)
|
1892 |
+
self._rulelabel2 = Label(
|
1893 |
+
parent, width=40, relief="groove", anchor="w", font=self._boldfont
|
1894 |
+
)
|
1895 |
+
self._rulelabel1.pack(side="left")
|
1896 |
+
self._rulelabel2.pack(side="left")
|
1897 |
+
step = Checkbutton(parent, variable=self._step, text="Step")
|
1898 |
+
step.pack(side="right")
|
1899 |
+
|
1900 |
+
def _init_buttons(self, parent):
|
1901 |
+
frame1 = Frame(parent)
|
1902 |
+
frame2 = Frame(parent)
|
1903 |
+
frame1.pack(side="bottom", fill="x")
|
1904 |
+
frame2.pack(side="top", fill="none")
|
1905 |
+
|
1906 |
+
Button(
|
1907 |
+
frame1,
|
1908 |
+
text="Reset\nParser",
|
1909 |
+
background="#90c0d0",
|
1910 |
+
foreground="black",
|
1911 |
+
command=self.reset,
|
1912 |
+
).pack(side="right")
|
1913 |
+
# Button(frame1, text='Pause',
|
1914 |
+
# background='#90c0d0', foreground='black',
|
1915 |
+
# command=self.pause).pack(side='left')
|
1916 |
+
|
1917 |
+
Button(
|
1918 |
+
frame1,
|
1919 |
+
text="Top Down\nStrategy",
|
1920 |
+
background="#90c0d0",
|
1921 |
+
foreground="black",
|
1922 |
+
command=self.top_down_strategy,
|
1923 |
+
).pack(side="left")
|
1924 |
+
Button(
|
1925 |
+
frame1,
|
1926 |
+
text="Bottom Up\nStrategy",
|
1927 |
+
background="#90c0d0",
|
1928 |
+
foreground="black",
|
1929 |
+
command=self.bottom_up_strategy,
|
1930 |
+
).pack(side="left")
|
1931 |
+
Button(
|
1932 |
+
frame1,
|
1933 |
+
text="Bottom Up\nLeft-Corner Strategy",
|
1934 |
+
background="#90c0d0",
|
1935 |
+
foreground="black",
|
1936 |
+
command=self.bottom_up_leftcorner_strategy,
|
1937 |
+
).pack(side="left")
|
1938 |
+
|
1939 |
+
Button(
|
1940 |
+
frame2,
|
1941 |
+
text="Top Down Init\nRule",
|
1942 |
+
background="#90f090",
|
1943 |
+
foreground="black",
|
1944 |
+
command=self.top_down_init,
|
1945 |
+
).pack(side="left")
|
1946 |
+
Button(
|
1947 |
+
frame2,
|
1948 |
+
text="Top Down Predict\nRule",
|
1949 |
+
background="#90f090",
|
1950 |
+
foreground="black",
|
1951 |
+
command=self.top_down_predict,
|
1952 |
+
).pack(side="left")
|
1953 |
+
Frame(frame2, width=20).pack(side="left")
|
1954 |
+
|
1955 |
+
Button(
|
1956 |
+
frame2,
|
1957 |
+
text="Bottom Up Predict\nRule",
|
1958 |
+
background="#90f090",
|
1959 |
+
foreground="black",
|
1960 |
+
command=self.bottom_up,
|
1961 |
+
).pack(side="left")
|
1962 |
+
Frame(frame2, width=20).pack(side="left")
|
1963 |
+
|
1964 |
+
Button(
|
1965 |
+
frame2,
|
1966 |
+
text="Bottom Up Left-Corner\nPredict Rule",
|
1967 |
+
background="#90f090",
|
1968 |
+
foreground="black",
|
1969 |
+
command=self.bottom_up_leftcorner,
|
1970 |
+
).pack(side="left")
|
1971 |
+
Frame(frame2, width=20).pack(side="left")
|
1972 |
+
|
1973 |
+
Button(
|
1974 |
+
frame2,
|
1975 |
+
text="Fundamental\nRule",
|
1976 |
+
background="#90f090",
|
1977 |
+
foreground="black",
|
1978 |
+
command=self.fundamental,
|
1979 |
+
).pack(side="left")
|
1980 |
+
|
1981 |
+
def _init_bindings(self):
|
1982 |
+
self._root.bind("<Up>", self._cv.scroll_up)
|
1983 |
+
self._root.bind("<Down>", self._cv.scroll_down)
|
1984 |
+
self._root.bind("<Prior>", self._cv.page_up)
|
1985 |
+
self._root.bind("<Next>", self._cv.page_down)
|
1986 |
+
self._root.bind("<Control-q>", self.destroy)
|
1987 |
+
self._root.bind("<Control-x>", self.destroy)
|
1988 |
+
self._root.bind("<F1>", self.help)
|
1989 |
+
|
1990 |
+
self._root.bind("<Control-s>", self.save_chart)
|
1991 |
+
self._root.bind("<Control-o>", self.load_chart)
|
1992 |
+
self._root.bind("<Control-r>", self.reset)
|
1993 |
+
|
1994 |
+
self._root.bind("t", self.top_down_strategy)
|
1995 |
+
self._root.bind("b", self.bottom_up_strategy)
|
1996 |
+
self._root.bind("c", self.bottom_up_leftcorner_strategy)
|
1997 |
+
self._root.bind("<space>", self._stop_animation)
|
1998 |
+
|
1999 |
+
self._root.bind("<Control-g>", self.edit_grammar)
|
2000 |
+
self._root.bind("<Control-t>", self.edit_sentence)
|
2001 |
+
|
2002 |
+
# Animation speed control
|
2003 |
+
self._root.bind("-", lambda e, a=self._animate: a.set(1))
|
2004 |
+
self._root.bind("=", lambda e, a=self._animate: a.set(2))
|
2005 |
+
self._root.bind("+", lambda e, a=self._animate: a.set(3))
|
2006 |
+
|
2007 |
+
# Step control
|
2008 |
+
self._root.bind("s", lambda e, s=self._step: s.set(not s.get()))
|
2009 |
+
|
2010 |
+
def _init_menubar(self):
|
2011 |
+
menubar = Menu(self._root)
|
2012 |
+
|
2013 |
+
filemenu = Menu(menubar, tearoff=0)
|
2014 |
+
filemenu.add_command(
|
2015 |
+
label="Save Chart",
|
2016 |
+
underline=0,
|
2017 |
+
command=self.save_chart,
|
2018 |
+
accelerator="Ctrl-s",
|
2019 |
+
)
|
2020 |
+
filemenu.add_command(
|
2021 |
+
label="Load Chart",
|
2022 |
+
underline=0,
|
2023 |
+
command=self.load_chart,
|
2024 |
+
accelerator="Ctrl-o",
|
2025 |
+
)
|
2026 |
+
filemenu.add_command(
|
2027 |
+
label="Reset Chart", underline=0, command=self.reset, accelerator="Ctrl-r"
|
2028 |
+
)
|
2029 |
+
filemenu.add_separator()
|
2030 |
+
filemenu.add_command(label="Save Grammar", command=self.save_grammar)
|
2031 |
+
filemenu.add_command(label="Load Grammar", command=self.load_grammar)
|
2032 |
+
filemenu.add_separator()
|
2033 |
+
filemenu.add_command(
|
2034 |
+
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
|
2035 |
+
)
|
2036 |
+
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
2037 |
+
|
2038 |
+
editmenu = Menu(menubar, tearoff=0)
|
2039 |
+
editmenu.add_command(
|
2040 |
+
label="Edit Grammar",
|
2041 |
+
underline=5,
|
2042 |
+
command=self.edit_grammar,
|
2043 |
+
accelerator="Ctrl-g",
|
2044 |
+
)
|
2045 |
+
editmenu.add_command(
|
2046 |
+
label="Edit Text",
|
2047 |
+
underline=5,
|
2048 |
+
command=self.edit_sentence,
|
2049 |
+
accelerator="Ctrl-t",
|
2050 |
+
)
|
2051 |
+
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
2052 |
+
|
2053 |
+
viewmenu = Menu(menubar, tearoff=0)
|
2054 |
+
viewmenu.add_command(
|
2055 |
+
label="Chart Matrix", underline=6, command=self.view_matrix
|
2056 |
+
)
|
2057 |
+
viewmenu.add_command(label="Results", underline=0, command=self.view_results)
|
2058 |
+
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
|
2059 |
+
|
2060 |
+
rulemenu = Menu(menubar, tearoff=0)
|
2061 |
+
rulemenu.add_command(
|
2062 |
+
label="Top Down Strategy",
|
2063 |
+
underline=0,
|
2064 |
+
command=self.top_down_strategy,
|
2065 |
+
accelerator="t",
|
2066 |
+
)
|
2067 |
+
rulemenu.add_command(
|
2068 |
+
label="Bottom Up Strategy",
|
2069 |
+
underline=0,
|
2070 |
+
command=self.bottom_up_strategy,
|
2071 |
+
accelerator="b",
|
2072 |
+
)
|
2073 |
+
rulemenu.add_command(
|
2074 |
+
label="Bottom Up Left-Corner Strategy",
|
2075 |
+
underline=0,
|
2076 |
+
command=self.bottom_up_leftcorner_strategy,
|
2077 |
+
accelerator="c",
|
2078 |
+
)
|
2079 |
+
rulemenu.add_separator()
|
2080 |
+
rulemenu.add_command(label="Bottom Up Rule", command=self.bottom_up)
|
2081 |
+
rulemenu.add_command(
|
2082 |
+
label="Bottom Up Left-Corner Rule", command=self.bottom_up_leftcorner
|
2083 |
+
)
|
2084 |
+
rulemenu.add_command(label="Top Down Init Rule", command=self.top_down_init)
|
2085 |
+
rulemenu.add_command(
|
2086 |
+
label="Top Down Predict Rule", command=self.top_down_predict
|
2087 |
+
)
|
2088 |
+
rulemenu.add_command(label="Fundamental Rule", command=self.fundamental)
|
2089 |
+
menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
|
2090 |
+
|
2091 |
+
animatemenu = Menu(menubar, tearoff=0)
|
2092 |
+
animatemenu.add_checkbutton(
|
2093 |
+
label="Step", underline=0, variable=self._step, accelerator="s"
|
2094 |
+
)
|
2095 |
+
animatemenu.add_separator()
|
2096 |
+
animatemenu.add_radiobutton(
|
2097 |
+
label="No Animation", underline=0, variable=self._animate, value=0
|
2098 |
+
)
|
2099 |
+
animatemenu.add_radiobutton(
|
2100 |
+
label="Slow Animation",
|
2101 |
+
underline=0,
|
2102 |
+
variable=self._animate,
|
2103 |
+
value=1,
|
2104 |
+
accelerator="-",
|
2105 |
+
)
|
2106 |
+
animatemenu.add_radiobutton(
|
2107 |
+
label="Normal Animation",
|
2108 |
+
underline=0,
|
2109 |
+
variable=self._animate,
|
2110 |
+
value=2,
|
2111 |
+
accelerator="=",
|
2112 |
+
)
|
2113 |
+
animatemenu.add_radiobutton(
|
2114 |
+
label="Fast Animation",
|
2115 |
+
underline=0,
|
2116 |
+
variable=self._animate,
|
2117 |
+
value=3,
|
2118 |
+
accelerator="+",
|
2119 |
+
)
|
2120 |
+
menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
|
2121 |
+
|
2122 |
+
zoommenu = Menu(menubar, tearoff=0)
|
2123 |
+
zoommenu.add_radiobutton(
|
2124 |
+
label="Tiny",
|
2125 |
+
variable=self._size,
|
2126 |
+
underline=0,
|
2127 |
+
value=10,
|
2128 |
+
command=self.resize,
|
2129 |
+
)
|
2130 |
+
zoommenu.add_radiobutton(
|
2131 |
+
label="Small",
|
2132 |
+
variable=self._size,
|
2133 |
+
underline=0,
|
2134 |
+
value=12,
|
2135 |
+
command=self.resize,
|
2136 |
+
)
|
2137 |
+
zoommenu.add_radiobutton(
|
2138 |
+
label="Medium",
|
2139 |
+
variable=self._size,
|
2140 |
+
underline=0,
|
2141 |
+
value=14,
|
2142 |
+
command=self.resize,
|
2143 |
+
)
|
2144 |
+
zoommenu.add_radiobutton(
|
2145 |
+
label="Large",
|
2146 |
+
variable=self._size,
|
2147 |
+
underline=0,
|
2148 |
+
value=18,
|
2149 |
+
command=self.resize,
|
2150 |
+
)
|
2151 |
+
zoommenu.add_radiobutton(
|
2152 |
+
label="Huge",
|
2153 |
+
variable=self._size,
|
2154 |
+
underline=0,
|
2155 |
+
value=24,
|
2156 |
+
command=self.resize,
|
2157 |
+
)
|
2158 |
+
menubar.add_cascade(label="Zoom", underline=0, menu=zoommenu)
|
2159 |
+
|
2160 |
+
helpmenu = Menu(menubar, tearoff=0)
|
2161 |
+
helpmenu.add_command(label="About", underline=0, command=self.about)
|
2162 |
+
helpmenu.add_command(
|
2163 |
+
label="Instructions", underline=0, command=self.help, accelerator="F1"
|
2164 |
+
)
|
2165 |
+
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
|
2166 |
+
|
2167 |
+
self._root.config(menu=menubar)
|
2168 |
+
|
2169 |
+
# ////////////////////////////////////////////////////////////
|
2170 |
+
# Selection Handling
|
2171 |
+
# ////////////////////////////////////////////////////////////
|
2172 |
+
|
2173 |
+
def _click_cv_edge(self, edge):
|
2174 |
+
if edge != self._selection:
|
2175 |
+
# Clicking on a new edge selects it.
|
2176 |
+
self._select_edge(edge)
|
2177 |
+
else:
|
2178 |
+
# Repeated clicks on one edge cycle its trees.
|
2179 |
+
self._cv.cycle_tree()
|
2180 |
+
# [XX] this can get confused if animation is running
|
2181 |
+
# faster than the callbacks...
|
2182 |
+
|
2183 |
+
def _select_matrix_edge(self, edge):
|
2184 |
+
self._select_edge(edge)
|
2185 |
+
self._cv.view_edge(edge)
|
2186 |
+
|
2187 |
+
def _select_edge(self, edge):
|
2188 |
+
self._selection = edge
|
2189 |
+
# Update the chart view.
|
2190 |
+
self._cv.markonly_edge(edge, "#f00")
|
2191 |
+
self._cv.draw_tree(edge)
|
2192 |
+
# Update the matrix view.
|
2193 |
+
if self._matrix:
|
2194 |
+
self._matrix.markonly_edge(edge)
|
2195 |
+
if self._matrix:
|
2196 |
+
self._matrix.view_edge(edge)
|
2197 |
+
|
2198 |
+
def _deselect_edge(self):
|
2199 |
+
self._selection = None
|
2200 |
+
# Update the chart view.
|
2201 |
+
self._cv.unmark_edge()
|
2202 |
+
self._cv.erase_tree()
|
2203 |
+
# Update the matrix view
|
2204 |
+
if self._matrix:
|
2205 |
+
self._matrix.unmark_edge()
|
2206 |
+
|
2207 |
+
def _show_new_edge(self, edge):
|
2208 |
+
self._display_rule(self._cp.current_chartrule())
|
2209 |
+
# Update the chart view.
|
2210 |
+
self._cv.update()
|
2211 |
+
self._cv.draw_tree(edge)
|
2212 |
+
self._cv.markonly_edge(edge, "#0df")
|
2213 |
+
self._cv.view_edge(edge)
|
2214 |
+
# Update the matrix view.
|
2215 |
+
if self._matrix:
|
2216 |
+
self._matrix.update()
|
2217 |
+
if self._matrix:
|
2218 |
+
self._matrix.markonly_edge(edge)
|
2219 |
+
if self._matrix:
|
2220 |
+
self._matrix.view_edge(edge)
|
2221 |
+
# Update the results view.
|
2222 |
+
if self._results:
|
2223 |
+
self._results.update(edge)
|
2224 |
+
|
2225 |
+
# ////////////////////////////////////////////////////////////
|
2226 |
+
# Help/usage
|
2227 |
+
# ////////////////////////////////////////////////////////////
|
2228 |
+
|
2229 |
+
def help(self, *e):
|
2230 |
+
self._animating = 0
|
2231 |
+
# The default font's not very legible; try using 'fixed' instead.
|
2232 |
+
try:
|
2233 |
+
ShowText(
|
2234 |
+
self._root,
|
2235 |
+
"Help: Chart Parser Application",
|
2236 |
+
(__doc__ or "").strip(),
|
2237 |
+
width=75,
|
2238 |
+
font="fixed",
|
2239 |
+
)
|
2240 |
+
except:
|
2241 |
+
ShowText(
|
2242 |
+
self._root,
|
2243 |
+
"Help: Chart Parser Application",
|
2244 |
+
(__doc__ or "").strip(),
|
2245 |
+
width=75,
|
2246 |
+
)
|
2247 |
+
|
2248 |
+
def about(self, *e):
|
2249 |
+
ABOUT = "NLTK Chart Parser Application\n" + "Written by Edward Loper"
|
2250 |
+
showinfo("About: Chart Parser Application", ABOUT)
|
2251 |
+
|
2252 |
+
# ////////////////////////////////////////////////////////////
|
2253 |
+
# File Menu
|
2254 |
+
# ////////////////////////////////////////////////////////////
|
2255 |
+
|
2256 |
+
CHART_FILE_TYPES = [("Pickle file", ".pickle"), ("All files", "*")]
|
2257 |
+
GRAMMAR_FILE_TYPES = [
|
2258 |
+
("Plaintext grammar file", ".cfg"),
|
2259 |
+
("Pickle file", ".pickle"),
|
2260 |
+
("All files", "*"),
|
2261 |
+
]
|
2262 |
+
|
2263 |
+
def load_chart(self, *args):
|
2264 |
+
"Load a chart from a pickle file"
|
2265 |
+
filename = askopenfilename(
|
2266 |
+
filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
|
2267 |
+
)
|
2268 |
+
if not filename:
|
2269 |
+
return
|
2270 |
+
try:
|
2271 |
+
with open(filename, "rb") as infile:
|
2272 |
+
chart = pickle.load(infile)
|
2273 |
+
self._chart = chart
|
2274 |
+
self._cv.update(chart)
|
2275 |
+
if self._matrix:
|
2276 |
+
self._matrix.set_chart(chart)
|
2277 |
+
if self._matrix:
|
2278 |
+
self._matrix.deselect_cell()
|
2279 |
+
if self._results:
|
2280 |
+
self._results.set_chart(chart)
|
2281 |
+
self._cp.set_chart(chart)
|
2282 |
+
except Exception as e:
|
2283 |
+
raise
|
2284 |
+
showerror("Error Loading Chart", "Unable to open file: %r" % filename)
|
2285 |
+
|
2286 |
+
def save_chart(self, *args):
|
2287 |
+
"Save a chart to a pickle file"
|
2288 |
+
filename = asksaveasfilename(
|
2289 |
+
filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
|
2290 |
+
)
|
2291 |
+
if not filename:
|
2292 |
+
return
|
2293 |
+
try:
|
2294 |
+
with open(filename, "wb") as outfile:
|
2295 |
+
pickle.dump(self._chart, outfile)
|
2296 |
+
except Exception as e:
|
2297 |
+
raise
|
2298 |
+
showerror("Error Saving Chart", "Unable to open file: %r" % filename)
|
2299 |
+
|
2300 |
+
def load_grammar(self, *args):
|
2301 |
+
"Load a grammar from a pickle file"
|
2302 |
+
filename = askopenfilename(
|
2303 |
+
filetypes=self.GRAMMAR_FILE_TYPES, defaultextension=".cfg"
|
2304 |
+
)
|
2305 |
+
if not filename:
|
2306 |
+
return
|
2307 |
+
try:
|
2308 |
+
if filename.endswith(".pickle"):
|
2309 |
+
with open(filename, "rb") as infile:
|
2310 |
+
grammar = pickle.load(infile)
|
2311 |
+
else:
|
2312 |
+
with open(filename) as infile:
|
2313 |
+
grammar = CFG.fromstring(infile.read())
|
2314 |
+
self.set_grammar(grammar)
|
2315 |
+
except Exception as e:
|
2316 |
+
showerror("Error Loading Grammar", "Unable to open file: %r" % filename)
|
2317 |
+
|
2318 |
+
def save_grammar(self, *args):
|
2319 |
+
filename = asksaveasfilename(
|
2320 |
+
filetypes=self.GRAMMAR_FILE_TYPES, defaultextension=".cfg"
|
2321 |
+
)
|
2322 |
+
if not filename:
|
2323 |
+
return
|
2324 |
+
try:
|
2325 |
+
if filename.endswith(".pickle"):
|
2326 |
+
with open(filename, "wb") as outfile:
|
2327 |
+
pickle.dump((self._chart, self._tokens), outfile)
|
2328 |
+
else:
|
2329 |
+
with open(filename, "w") as outfile:
|
2330 |
+
prods = self._grammar.productions()
|
2331 |
+
start = [p for p in prods if p.lhs() == self._grammar.start()]
|
2332 |
+
rest = [p for p in prods if p.lhs() != self._grammar.start()]
|
2333 |
+
for prod in start:
|
2334 |
+
outfile.write("%s\n" % prod)
|
2335 |
+
for prod in rest:
|
2336 |
+
outfile.write("%s\n" % prod)
|
2337 |
+
except Exception as e:
|
2338 |
+
showerror("Error Saving Grammar", "Unable to open file: %r" % filename)
|
2339 |
+
|
2340 |
+
def reset(self, *args):
|
2341 |
+
self._animating = 0
|
2342 |
+
self._reset_parser()
|
2343 |
+
self._cv.update(self._chart)
|
2344 |
+
if self._matrix:
|
2345 |
+
self._matrix.set_chart(self._chart)
|
2346 |
+
if self._matrix:
|
2347 |
+
self._matrix.deselect_cell()
|
2348 |
+
if self._results:
|
2349 |
+
self._results.set_chart(self._chart)
|
2350 |
+
|
2351 |
+
# ////////////////////////////////////////////////////////////
|
2352 |
+
# Edit
|
2353 |
+
# ////////////////////////////////////////////////////////////
|
2354 |
+
|
2355 |
+
def edit_grammar(self, *e):
|
2356 |
+
CFGEditor(self._root, self._grammar, self.set_grammar)
|
2357 |
+
|
2358 |
+
def set_grammar(self, grammar):
|
2359 |
+
self._grammar = grammar
|
2360 |
+
self._cp.set_grammar(grammar)
|
2361 |
+
if self._results:
|
2362 |
+
self._results.set_grammar(grammar)
|
2363 |
+
|
2364 |
+
def edit_sentence(self, *e):
|
2365 |
+
sentence = " ".join(self._tokens)
|
2366 |
+
title = "Edit Text"
|
2367 |
+
instr = "Enter a new sentence to parse."
|
2368 |
+
EntryDialog(self._root, sentence, instr, self.set_sentence, title)
|
2369 |
+
|
2370 |
+
def set_sentence(self, sentence):
|
2371 |
+
self._tokens = list(sentence.split())
|
2372 |
+
self.reset()
|
2373 |
+
|
2374 |
+
# ////////////////////////////////////////////////////////////
|
2375 |
+
# View Menu
|
2376 |
+
# ////////////////////////////////////////////////////////////
|
2377 |
+
|
2378 |
+
def view_matrix(self, *e):
|
2379 |
+
if self._matrix is not None:
|
2380 |
+
self._matrix.destroy()
|
2381 |
+
self._matrix = ChartMatrixView(self._root, self._chart)
|
2382 |
+
self._matrix.add_callback("select", self._select_matrix_edge)
|
2383 |
+
|
2384 |
+
def view_results(self, *e):
|
2385 |
+
if self._results is not None:
|
2386 |
+
self._results.destroy()
|
2387 |
+
self._results = ChartResultsView(self._root, self._chart, self._grammar)
|
2388 |
+
|
2389 |
+
# ////////////////////////////////////////////////////////////
|
2390 |
+
# Zoom Menu
|
2391 |
+
# ////////////////////////////////////////////////////////////
|
2392 |
+
|
2393 |
+
def resize(self):
|
2394 |
+
self._animating = 0
|
2395 |
+
self.set_font_size(self._size.get())
|
2396 |
+
|
2397 |
+
def set_font_size(self, size):
|
2398 |
+
self._cv.set_font_size(size)
|
2399 |
+
self._font.configure(size=-abs(size))
|
2400 |
+
self._boldfont.configure(size=-abs(size))
|
2401 |
+
self._sysfont.configure(size=-abs(size))
|
2402 |
+
|
2403 |
+
def get_font_size(self):
|
2404 |
+
return abs(self._size.get())
|
2405 |
+
|
2406 |
+
# ////////////////////////////////////////////////////////////
|
2407 |
+
# Parsing
|
2408 |
+
# ////////////////////////////////////////////////////////////
|
2409 |
+
|
2410 |
+
def apply_strategy(self, strategy, edge_strategy=None):
|
2411 |
+
# If we're animating, then stop.
|
2412 |
+
if self._animating:
|
2413 |
+
self._animating = 0
|
2414 |
+
return
|
2415 |
+
|
2416 |
+
# Clear the rule display & mark.
|
2417 |
+
self._display_rule(None)
|
2418 |
+
# self._cv.unmark_edge()
|
2419 |
+
|
2420 |
+
if self._step.get():
|
2421 |
+
selection = self._selection
|
2422 |
+
if (selection is not None) and (edge_strategy is not None):
|
2423 |
+
# Apply the given strategy to the selected edge.
|
2424 |
+
self._cp.set_strategy([edge_strategy(selection)])
|
2425 |
+
newedge = self._apply_strategy()
|
2426 |
+
|
2427 |
+
# If it failed, then clear the selection.
|
2428 |
+
if newedge is None:
|
2429 |
+
self._cv.unmark_edge()
|
2430 |
+
self._selection = None
|
2431 |
+
else:
|
2432 |
+
self._cp.set_strategy(strategy)
|
2433 |
+
self._apply_strategy()
|
2434 |
+
|
2435 |
+
else:
|
2436 |
+
self._cp.set_strategy(strategy)
|
2437 |
+
if self._animate.get():
|
2438 |
+
self._animating = 1
|
2439 |
+
self._animate_strategy()
|
2440 |
+
else:
|
2441 |
+
for edge in self._cpstep:
|
2442 |
+
if edge is None:
|
2443 |
+
break
|
2444 |
+
self._cv.update()
|
2445 |
+
if self._matrix:
|
2446 |
+
self._matrix.update()
|
2447 |
+
if self._results:
|
2448 |
+
self._results.update()
|
2449 |
+
|
2450 |
+
def _stop_animation(self, *e):
|
2451 |
+
self._animating = 0
|
2452 |
+
|
2453 |
+
def _animate_strategy(self, speed=1):
|
2454 |
+
if self._animating == 0:
|
2455 |
+
return
|
2456 |
+
if self._apply_strategy() is not None:
|
2457 |
+
if self._animate.get() == 0 or self._step.get() == 1:
|
2458 |
+
return
|
2459 |
+
if self._animate.get() == 1:
|
2460 |
+
self._root.after(3000, self._animate_strategy)
|
2461 |
+
elif self._animate.get() == 2:
|
2462 |
+
self._root.after(1000, self._animate_strategy)
|
2463 |
+
else:
|
2464 |
+
self._root.after(20, self._animate_strategy)
|
2465 |
+
|
2466 |
+
def _apply_strategy(self):
|
2467 |
+
new_edge = next(self._cpstep)
|
2468 |
+
|
2469 |
+
if new_edge is not None:
|
2470 |
+
self._show_new_edge(new_edge)
|
2471 |
+
return new_edge
|
2472 |
+
|
2473 |
+
def _display_rule(self, rule):
|
2474 |
+
if rule is None:
|
2475 |
+
self._rulelabel2["text"] = ""
|
2476 |
+
else:
|
2477 |
+
name = str(rule)
|
2478 |
+
self._rulelabel2["text"] = name
|
2479 |
+
size = self._cv.get_font_size()
|
2480 |
+
|
2481 |
+
# ////////////////////////////////////////////////////////////
|
2482 |
+
# Parsing Strategies
|
2483 |
+
# ////////////////////////////////////////////////////////////
|
2484 |
+
|
2485 |
+
# Basic rules:
|
2486 |
+
_TD_INIT = [TopDownInitRule()]
|
2487 |
+
_TD_PREDICT = [TopDownPredictRule()]
|
2488 |
+
_BU_RULE = [BottomUpPredictRule()]
|
2489 |
+
_BU_LC_RULE = [BottomUpPredictCombineRule()]
|
2490 |
+
_FUNDAMENTAL = [SingleEdgeFundamentalRule()]
|
2491 |
+
|
2492 |
+
# Complete strategies:
|
2493 |
+
_TD_STRATEGY = _TD_INIT + _TD_PREDICT + _FUNDAMENTAL
|
2494 |
+
_BU_STRATEGY = _BU_RULE + _FUNDAMENTAL
|
2495 |
+
_BU_LC_STRATEGY = _BU_LC_RULE + _FUNDAMENTAL
|
2496 |
+
|
2497 |
+
# Button callback functions:
|
2498 |
+
def top_down_init(self, *e):
|
2499 |
+
self.apply_strategy(self._TD_INIT, None)
|
2500 |
+
|
2501 |
+
def top_down_predict(self, *e):
|
2502 |
+
self.apply_strategy(self._TD_PREDICT, TopDownPredictEdgeRule)
|
2503 |
+
|
2504 |
+
def bottom_up(self, *e):
|
2505 |
+
self.apply_strategy(self._BU_RULE, BottomUpEdgeRule)
|
2506 |
+
|
2507 |
+
def bottom_up_leftcorner(self, *e):
|
2508 |
+
self.apply_strategy(self._BU_LC_RULE, BottomUpLeftCornerEdgeRule)
|
2509 |
+
|
2510 |
+
def fundamental(self, *e):
|
2511 |
+
self.apply_strategy(self._FUNDAMENTAL, FundamentalEdgeRule)
|
2512 |
+
|
2513 |
+
def bottom_up_strategy(self, *e):
|
2514 |
+
self.apply_strategy(self._BU_STRATEGY, BottomUpEdgeRule)
|
2515 |
+
|
2516 |
+
def bottom_up_leftcorner_strategy(self, *e):
|
2517 |
+
self.apply_strategy(self._BU_LC_STRATEGY, BottomUpLeftCornerEdgeRule)
|
2518 |
+
|
2519 |
+
def top_down_strategy(self, *e):
|
2520 |
+
self.apply_strategy(self._TD_STRATEGY, TopDownPredictEdgeRule)
|
2521 |
+
|
2522 |
+
|
2523 |
+
def app():
|
2524 |
+
grammar = CFG.fromstring(
|
2525 |
+
"""
|
2526 |
+
# Grammatical productions.
|
2527 |
+
S -> NP VP
|
2528 |
+
VP -> VP PP | V NP | V
|
2529 |
+
NP -> Det N | NP PP
|
2530 |
+
PP -> P NP
|
2531 |
+
# Lexical productions.
|
2532 |
+
NP -> 'John' | 'I'
|
2533 |
+
Det -> 'the' | 'my' | 'a'
|
2534 |
+
N -> 'dog' | 'cookie' | 'table' | 'cake' | 'fork'
|
2535 |
+
V -> 'ate' | 'saw'
|
2536 |
+
P -> 'on' | 'under' | 'with'
|
2537 |
+
"""
|
2538 |
+
)
|
2539 |
+
|
2540 |
+
sent = "John ate the cake on the table with a fork"
|
2541 |
+
sent = "John ate the cake on the table"
|
2542 |
+
tokens = list(sent.split())
|
2543 |
+
|
2544 |
+
print("grammar= (")
|
2545 |
+
for rule in grammar.productions():
|
2546 |
+
print((" ", repr(rule) + ","))
|
2547 |
+
print(")")
|
2548 |
+
print("tokens = %r" % tokens)
|
2549 |
+
print('Calling "ChartParserApp(grammar, tokens)"...')
|
2550 |
+
ChartParserApp(grammar, tokens).mainloop()
|
2551 |
+
|
2552 |
+
|
2553 |
+
if __name__ == "__main__":
|
2554 |
+
app()
|
2555 |
+
|
2556 |
+
# Chart comparer:
|
2557 |
+
# charts = ['/tmp/earley.pickle',
|
2558 |
+
# '/tmp/topdown.pickle',
|
2559 |
+
# '/tmp/bottomup.pickle']
|
2560 |
+
# ChartComparer(*charts).mainloop()
|
2561 |
+
|
2562 |
+
# import profile
|
2563 |
+
# profile.run('demo2()', '/tmp/profile.out')
|
2564 |
+
# import pstats
|
2565 |
+
# p = pstats.Stats('/tmp/profile.out')
|
2566 |
+
# p.strip_dirs().sort_stats('time', 'cum').print_stats(60)
|
2567 |
+
# p.strip_dirs().sort_stats('cum', 'time').print_stats(60)
|
2568 |
+
|
2569 |
+
__all__ = ["app"]
|
pipeline/nltk/app/chunkparser_app.py
ADDED
@@ -0,0 +1,1500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Regexp Chunk Parser Application
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Edward Loper <[email protected]>
|
5 |
+
# URL: <https://www.nltk.org/>
|
6 |
+
# For license information, see LICENSE.TXT
|
7 |
+
|
8 |
+
"""
|
9 |
+
A graphical tool for exploring the regular expression based chunk
|
10 |
+
parser ``nltk.chunk.RegexpChunkParser``.
|
11 |
+
"""
|
12 |
+
|
13 |
+
# Todo: Add a way to select the development set from the menubar. This
|
14 |
+
# might just need to be a selection box (conll vs treebank etc) plus
|
15 |
+
# configuration parameters to select what's being chunked (eg VP vs NP)
|
16 |
+
# and what part of the data is being used as the development set.
|
17 |
+
|
18 |
+
import random
|
19 |
+
import re
|
20 |
+
import textwrap
|
21 |
+
import time
|
22 |
+
from tkinter import (
|
23 |
+
Button,
|
24 |
+
Canvas,
|
25 |
+
Checkbutton,
|
26 |
+
Frame,
|
27 |
+
IntVar,
|
28 |
+
Label,
|
29 |
+
Menu,
|
30 |
+
Scrollbar,
|
31 |
+
Text,
|
32 |
+
Tk,
|
33 |
+
)
|
34 |
+
from tkinter.filedialog import askopenfilename, asksaveasfilename
|
35 |
+
from tkinter.font import Font
|
36 |
+
|
37 |
+
from nltk.chunk import ChunkScore, RegexpChunkParser
|
38 |
+
from nltk.chunk.regexp import RegexpChunkRule
|
39 |
+
from nltk.corpus import conll2000, treebank_chunk
|
40 |
+
from nltk.draw.util import ShowText
|
41 |
+
from nltk.tree import Tree
|
42 |
+
from nltk.util import in_idle
|
43 |
+
|
44 |
+
|
45 |
+
class RegexpChunkApp:
|
46 |
+
"""
|
47 |
+
A graphical tool for exploring the regular expression based chunk
|
48 |
+
parser ``nltk.chunk.RegexpChunkParser``.
|
49 |
+
|
50 |
+
See ``HELP`` for instructional text.
|
51 |
+
"""
|
52 |
+
|
53 |
+
##/////////////////////////////////////////////////////////////////
|
54 |
+
## Help Text
|
55 |
+
##/////////////////////////////////////////////////////////////////
|
56 |
+
|
57 |
+
#: A dictionary mapping from part of speech tags to descriptions,
|
58 |
+
#: which is used in the help text. (This should probably live with
|
59 |
+
#: the conll and/or treebank corpus instead.)
|
60 |
+
TAGSET = {
|
61 |
+
"CC": "Coordinating conjunction",
|
62 |
+
"PRP$": "Possessive pronoun",
|
63 |
+
"CD": "Cardinal number",
|
64 |
+
"RB": "Adverb",
|
65 |
+
"DT": "Determiner",
|
66 |
+
"RBR": "Adverb, comparative",
|
67 |
+
"EX": "Existential there",
|
68 |
+
"RBS": "Adverb, superlative",
|
69 |
+
"FW": "Foreign word",
|
70 |
+
"RP": "Particle",
|
71 |
+
"JJ": "Adjective",
|
72 |
+
"TO": "to",
|
73 |
+
"JJR": "Adjective, comparative",
|
74 |
+
"UH": "Interjection",
|
75 |
+
"JJS": "Adjective, superlative",
|
76 |
+
"VB": "Verb, base form",
|
77 |
+
"LS": "List item marker",
|
78 |
+
"VBD": "Verb, past tense",
|
79 |
+
"MD": "Modal",
|
80 |
+
"NNS": "Noun, plural",
|
81 |
+
"NN": "Noun, singular or masps",
|
82 |
+
"VBN": "Verb, past participle",
|
83 |
+
"VBZ": "Verb,3rd ps. sing. present",
|
84 |
+
"NNP": "Proper noun, singular",
|
85 |
+
"NNPS": "Proper noun plural",
|
86 |
+
"WDT": "wh-determiner",
|
87 |
+
"PDT": "Predeterminer",
|
88 |
+
"WP": "wh-pronoun",
|
89 |
+
"POS": "Possessive ending",
|
90 |
+
"WP$": "Possessive wh-pronoun",
|
91 |
+
"PRP": "Personal pronoun",
|
92 |
+
"WRB": "wh-adverb",
|
93 |
+
"(": "open parenthesis",
|
94 |
+
")": "close parenthesis",
|
95 |
+
"``": "open quote",
|
96 |
+
",": "comma",
|
97 |
+
"''": "close quote",
|
98 |
+
".": "period",
|
99 |
+
"#": "pound sign (currency marker)",
|
100 |
+
"$": "dollar sign (currency marker)",
|
101 |
+
"IN": "Preposition/subord. conjunction",
|
102 |
+
"SYM": "Symbol (mathematical or scientific)",
|
103 |
+
"VBG": "Verb, gerund/present participle",
|
104 |
+
"VBP": "Verb, non-3rd ps. sing. present",
|
105 |
+
":": "colon",
|
106 |
+
}
|
107 |
+
|
108 |
+
#: Contents for the help box. This is a list of tuples, one for
|
109 |
+
#: each help page, where each tuple has four elements:
|
110 |
+
#: - A title (displayed as a tab)
|
111 |
+
#: - A string description of tabstops (see Tkinter.Text for details)
|
112 |
+
#: - The text contents for the help page. You can use expressions
|
113 |
+
#: like <red>...</red> to colorize the text; see ``HELP_AUTOTAG``
|
114 |
+
#: for a list of tags you can use for colorizing.
|
115 |
+
HELP = [
|
116 |
+
(
|
117 |
+
"Help",
|
118 |
+
"20",
|
119 |
+
"Welcome to the regular expression chunk-parser grammar editor. "
|
120 |
+
"You can use this editor to develop and test chunk parser grammars "
|
121 |
+
"based on NLTK's RegexpChunkParser class.\n\n"
|
122 |
+
# Help box.
|
123 |
+
"Use this box ('Help') to learn more about the editor; click on the "
|
124 |
+
"tabs for help on specific topics:"
|
125 |
+
"<indent>\n"
|
126 |
+
"Rules: grammar rule types\n"
|
127 |
+
"Regexps: regular expression syntax\n"
|
128 |
+
"Tags: part of speech tags\n</indent>\n"
|
129 |
+
# Grammar.
|
130 |
+
"Use the upper-left box ('Grammar') to edit your grammar. "
|
131 |
+
"Each line of your grammar specifies a single 'rule', "
|
132 |
+
"which performs an action such as creating a chunk or merging "
|
133 |
+
"two chunks.\n\n"
|
134 |
+
# Dev set.
|
135 |
+
"The lower-left box ('Development Set') runs your grammar on the "
|
136 |
+
"development set, and displays the results. "
|
137 |
+
"Your grammar's chunks are <highlight>highlighted</highlight>, and "
|
138 |
+
"the correct (gold standard) chunks are "
|
139 |
+
"<underline>underlined</underline>. If they "
|
140 |
+
"match, they are displayed in <green>green</green>; otherwise, "
|
141 |
+
"they are displayed in <red>red</red>. The box displays a single "
|
142 |
+
"sentence from the development set at a time; use the scrollbar or "
|
143 |
+
"the next/previous buttons view additional sentences.\n\n"
|
144 |
+
# Performance
|
145 |
+
"The lower-right box ('Evaluation') tracks the performance of "
|
146 |
+
"your grammar on the development set. The 'precision' axis "
|
147 |
+
"indicates how many of your grammar's chunks are correct; and "
|
148 |
+
"the 'recall' axis indicates how many of the gold standard "
|
149 |
+
"chunks your system generated. Typically, you should try to "
|
150 |
+
"design a grammar that scores high on both metrics. The "
|
151 |
+
"exact precision and recall of the current grammar, as well "
|
152 |
+
"as their harmonic mean (the 'f-score'), are displayed in "
|
153 |
+
"the status bar at the bottom of the window.",
|
154 |
+
),
|
155 |
+
(
|
156 |
+
"Rules",
|
157 |
+
"10",
|
158 |
+
"<h1>{...regexp...}</h1>"
|
159 |
+
"<indent>\nChunk rule: creates new chunks from words matching "
|
160 |
+
"regexp.</indent>\n\n"
|
161 |
+
"<h1>}...regexp...{</h1>"
|
162 |
+
"<indent>\nStrip rule: removes words matching regexp from existing "
|
163 |
+
"chunks.</indent>\n\n"
|
164 |
+
"<h1>...regexp1...}{...regexp2...</h1>"
|
165 |
+
"<indent>\nSplit rule: splits chunks that match regexp1 followed by "
|
166 |
+
"regexp2 in two.</indent>\n\n"
|
167 |
+
"<h1>...regexp...{}...regexp...</h1>"
|
168 |
+
"<indent>\nMerge rule: joins consecutive chunks that match regexp1 "
|
169 |
+
"and regexp2</indent>\n",
|
170 |
+
),
|
171 |
+
(
|
172 |
+
"Regexps",
|
173 |
+
"10 60",
|
174 |
+
# "Regular Expression Syntax Summary:\n\n"
|
175 |
+
"<h1>Pattern\t\tMatches...</h1>\n"
|
176 |
+
"<hangindent>"
|
177 |
+
"\t<<var>T</var>>\ta word with tag <var>T</var> "
|
178 |
+
"(where <var>T</var> may be a regexp).\n"
|
179 |
+
"\t<var>x</var>?\tan optional <var>x</var>\n"
|
180 |
+
"\t<var>x</var>+\ta sequence of 1 or more <var>x</var>'s\n"
|
181 |
+
"\t<var>x</var>*\ta sequence of 0 or more <var>x</var>'s\n"
|
182 |
+
"\t<var>x</var>|<var>y</var>\t<var>x</var> or <var>y</var>\n"
|
183 |
+
"\t.\tmatches any character\n"
|
184 |
+
"\t(<var>x</var>)\tTreats <var>x</var> as a group\n"
|
185 |
+
"\t# <var>x...</var>\tTreats <var>x...</var> "
|
186 |
+
"(to the end of the line) as a comment\n"
|
187 |
+
"\t\\<var>C</var>\tmatches character <var>C</var> "
|
188 |
+
"(useful when <var>C</var> is a special character "
|
189 |
+
"like + or #)\n"
|
190 |
+
"</hangindent>"
|
191 |
+
"\n<h1>Examples:</h1>\n"
|
192 |
+
"<hangindent>"
|
193 |
+
"\t<regexp><NN></regexp>\n"
|
194 |
+
'\t\tMatches <match>"cow/NN"</match>\n'
|
195 |
+
'\t\tMatches <match>"green/NN"</match>\n'
|
196 |
+
"\t<regexp><VB.*></regexp>\n"
|
197 |
+
'\t\tMatches <match>"eating/VBG"</match>\n'
|
198 |
+
'\t\tMatches <match>"ate/VBD"</match>\n'
|
199 |
+
"\t<regexp><IN><DT><NN></regexp>\n"
|
200 |
+
'\t\tMatches <match>"on/IN the/DT car/NN"</match>\n'
|
201 |
+
"\t<regexp><RB>?<VBD></regexp>\n"
|
202 |
+
'\t\tMatches <match>"ran/VBD"</match>\n'
|
203 |
+
'\t\tMatches <match>"slowly/RB ate/VBD"</match>\n'
|
204 |
+
r"\t<regexp><\#><CD> # This is a comment...</regexp>\n"
|
205 |
+
'\t\tMatches <match>"#/# 100/CD"</match>\n'
|
206 |
+
"</hangindent>",
|
207 |
+
),
|
208 |
+
(
|
209 |
+
"Tags",
|
210 |
+
"10 60",
|
211 |
+
"<h1>Part of Speech Tags:</h1>\n"
|
212 |
+
+ "<hangindent>"
|
213 |
+
+ "<<TAGSET>>"
|
214 |
+
+ "</hangindent>\n", # this gets auto-substituted w/ self.TAGSET
|
215 |
+
),
|
216 |
+
]
|
217 |
+
|
218 |
+
HELP_AUTOTAG = [
|
219 |
+
("red", dict(foreground="#a00")),
|
220 |
+
("green", dict(foreground="#080")),
|
221 |
+
("highlight", dict(background="#ddd")),
|
222 |
+
("underline", dict(underline=True)),
|
223 |
+
("h1", dict(underline=True)),
|
224 |
+
("indent", dict(lmargin1=20, lmargin2=20)),
|
225 |
+
("hangindent", dict(lmargin1=0, lmargin2=60)),
|
226 |
+
("var", dict(foreground="#88f")),
|
227 |
+
("regexp", dict(foreground="#ba7")),
|
228 |
+
("match", dict(foreground="#6a6")),
|
229 |
+
]
|
230 |
+
|
231 |
+
##/////////////////////////////////////////////////////////////////
|
232 |
+
## Config Parameters
|
233 |
+
##/////////////////////////////////////////////////////////////////
|
234 |
+
|
235 |
+
_EVAL_DELAY = 1
|
236 |
+
"""If the user has not pressed any key for this amount of time (in
|
237 |
+
seconds), and the current grammar has not been evaluated, then
|
238 |
+
the eval demon will evaluate it."""
|
239 |
+
|
240 |
+
_EVAL_CHUNK = 15
|
241 |
+
"""The number of sentences that should be evaluated by the eval
|
242 |
+
demon each time it runs."""
|
243 |
+
_EVAL_FREQ = 0.2
|
244 |
+
"""The frequency (in seconds) at which the eval demon is run"""
|
245 |
+
_EVAL_DEMON_MIN = 0.02
|
246 |
+
"""The minimum amount of time that the eval demon should take each time
|
247 |
+
it runs -- if it takes less than this time, _EVAL_CHUNK will be
|
248 |
+
modified upwards."""
|
249 |
+
_EVAL_DEMON_MAX = 0.04
|
250 |
+
"""The maximum amount of time that the eval demon should take each time
|
251 |
+
it runs -- if it takes more than this time, _EVAL_CHUNK will be
|
252 |
+
modified downwards."""
|
253 |
+
|
254 |
+
_GRAMMARBOX_PARAMS = dict(
|
255 |
+
width=40,
|
256 |
+
height=12,
|
257 |
+
background="#efe",
|
258 |
+
highlightbackground="#efe",
|
259 |
+
highlightthickness=1,
|
260 |
+
relief="groove",
|
261 |
+
border=2,
|
262 |
+
wrap="word",
|
263 |
+
)
|
264 |
+
_HELPBOX_PARAMS = dict(
|
265 |
+
width=15,
|
266 |
+
height=15,
|
267 |
+
background="#efe",
|
268 |
+
highlightbackground="#efe",
|
269 |
+
foreground="#555",
|
270 |
+
highlightthickness=1,
|
271 |
+
relief="groove",
|
272 |
+
border=2,
|
273 |
+
wrap="word",
|
274 |
+
)
|
275 |
+
_DEVSETBOX_PARAMS = dict(
|
276 |
+
width=70,
|
277 |
+
height=10,
|
278 |
+
background="#eef",
|
279 |
+
highlightbackground="#eef",
|
280 |
+
highlightthickness=1,
|
281 |
+
relief="groove",
|
282 |
+
border=2,
|
283 |
+
wrap="word",
|
284 |
+
tabs=(30,),
|
285 |
+
)
|
286 |
+
_STATUS_PARAMS = dict(background="#9bb", relief="groove", border=2)
|
287 |
+
_FONT_PARAMS = dict(family="helvetica", size=-20)
|
288 |
+
_FRAME_PARAMS = dict(background="#777", padx=2, pady=2, border=3)
|
289 |
+
_EVALBOX_PARAMS = dict(
|
290 |
+
background="#eef",
|
291 |
+
highlightbackground="#eef",
|
292 |
+
highlightthickness=1,
|
293 |
+
relief="groove",
|
294 |
+
border=2,
|
295 |
+
width=300,
|
296 |
+
height=280,
|
297 |
+
)
|
298 |
+
_BUTTON_PARAMS = dict(
|
299 |
+
background="#777", activebackground="#777", highlightbackground="#777"
|
300 |
+
)
|
301 |
+
_HELPTAB_BG_COLOR = "#aba"
|
302 |
+
_HELPTAB_FG_COLOR = "#efe"
|
303 |
+
|
304 |
+
_HELPTAB_FG_PARAMS = dict(background="#efe")
|
305 |
+
_HELPTAB_BG_PARAMS = dict(background="#aba")
|
306 |
+
_HELPTAB_SPACER = 6
|
307 |
+
|
308 |
+
def normalize_grammar(self, grammar):
|
309 |
+
# Strip comments
|
310 |
+
grammar = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", grammar)
|
311 |
+
# Normalize whitespace
|
312 |
+
grammar = re.sub(" +", " ", grammar)
|
313 |
+
grammar = re.sub(r"\n\s+", r"\n", grammar)
|
314 |
+
grammar = grammar.strip()
|
315 |
+
# [xx] Hack: automatically backslash $!
|
316 |
+
grammar = re.sub(r"([^\\])\$", r"\1\\$", grammar)
|
317 |
+
return grammar
|
318 |
+
|
319 |
+
def __init__(
|
320 |
+
self,
|
321 |
+
devset_name="conll2000",
|
322 |
+
devset=None,
|
323 |
+
grammar="",
|
324 |
+
chunk_label="NP",
|
325 |
+
tagset=None,
|
326 |
+
):
|
327 |
+
"""
|
328 |
+
:param devset_name: The name of the development set; used for
|
329 |
+
display & for save files. If either the name 'treebank'
|
330 |
+
or the name 'conll2000' is used, and devset is None, then
|
331 |
+
devset will be set automatically.
|
332 |
+
:param devset: A list of chunked sentences
|
333 |
+
:param grammar: The initial grammar to display.
|
334 |
+
:param tagset: Dictionary from tags to string descriptions, used
|
335 |
+
for the help page. Defaults to ``self.TAGSET``.
|
336 |
+
"""
|
337 |
+
self._chunk_label = chunk_label
|
338 |
+
|
339 |
+
if tagset is None:
|
340 |
+
tagset = self.TAGSET
|
341 |
+
self.tagset = tagset
|
342 |
+
|
343 |
+
# Named development sets:
|
344 |
+
if devset is None:
|
345 |
+
if devset_name == "conll2000":
|
346 |
+
devset = conll2000.chunked_sents("train.txt") # [:100]
|
347 |
+
elif devset == "treebank":
|
348 |
+
devset = treebank_chunk.chunked_sents() # [:100]
|
349 |
+
else:
|
350 |
+
raise ValueError("Unknown development set %s" % devset_name)
|
351 |
+
|
352 |
+
self.chunker = None
|
353 |
+
"""The chunker built from the grammar string"""
|
354 |
+
|
355 |
+
self.grammar = grammar
|
356 |
+
"""The unparsed grammar string"""
|
357 |
+
|
358 |
+
self.normalized_grammar = None
|
359 |
+
"""A normalized version of ``self.grammar``."""
|
360 |
+
|
361 |
+
self.grammar_changed = 0
|
362 |
+
"""The last time() that the grammar was changed."""
|
363 |
+
|
364 |
+
self.devset = devset
|
365 |
+
"""The development set -- a list of chunked sentences."""
|
366 |
+
|
367 |
+
self.devset_name = devset_name
|
368 |
+
"""The name of the development set (for save files)."""
|
369 |
+
|
370 |
+
self.devset_index = -1
|
371 |
+
"""The index into the development set of the first instance
|
372 |
+
that's currently being viewed."""
|
373 |
+
|
374 |
+
self._last_keypress = 0
|
375 |
+
"""The time() when a key was most recently pressed"""
|
376 |
+
|
377 |
+
self._history = []
|
378 |
+
"""A list of (grammar, precision, recall, fscore) tuples for
|
379 |
+
grammars that the user has already tried."""
|
380 |
+
|
381 |
+
self._history_index = 0
|
382 |
+
"""When the user is scrolling through previous grammars, this
|
383 |
+
is used to keep track of which grammar they're looking at."""
|
384 |
+
|
385 |
+
self._eval_grammar = None
|
386 |
+
"""The grammar that is being currently evaluated by the eval
|
387 |
+
demon."""
|
388 |
+
|
389 |
+
self._eval_normalized_grammar = None
|
390 |
+
"""A normalized copy of ``_eval_grammar``."""
|
391 |
+
|
392 |
+
self._eval_index = 0
|
393 |
+
"""The index of the next sentence in the development set that
|
394 |
+
should be looked at by the eval demon."""
|
395 |
+
|
396 |
+
self._eval_score = ChunkScore(chunk_label=chunk_label)
|
397 |
+
"""The ``ChunkScore`` object that's used to keep track of the score
|
398 |
+
of the current grammar on the development set."""
|
399 |
+
|
400 |
+
# Set up the main window.
|
401 |
+
top = self.top = Tk()
|
402 |
+
top.geometry("+50+50")
|
403 |
+
top.title("Regexp Chunk Parser App")
|
404 |
+
top.bind("<Control-q>", self.destroy)
|
405 |
+
|
406 |
+
# Variable that restricts how much of the devset we look at.
|
407 |
+
self._devset_size = IntVar(top)
|
408 |
+
self._devset_size.set(100)
|
409 |
+
|
410 |
+
# Set up all the tkinter widgets
|
411 |
+
self._init_fonts(top)
|
412 |
+
self._init_widgets(top)
|
413 |
+
self._init_bindings(top)
|
414 |
+
self._init_menubar(top)
|
415 |
+
self.grammarbox.focus()
|
416 |
+
|
417 |
+
# If a grammar was given, then display it.
|
418 |
+
if grammar:
|
419 |
+
self.grammarbox.insert("end", grammar + "\n")
|
420 |
+
self.grammarbox.mark_set("insert", "1.0")
|
421 |
+
|
422 |
+
# Display the first item in the development set
|
423 |
+
self.show_devset(0)
|
424 |
+
self.update()
|
425 |
+
|
426 |
+
def _init_bindings(self, top):
|
427 |
+
top.bind("<Control-n>", self._devset_next)
|
428 |
+
top.bind("<Control-p>", self._devset_prev)
|
429 |
+
top.bind("<Control-t>", self.toggle_show_trace)
|
430 |
+
top.bind("<KeyPress>", self.update)
|
431 |
+
top.bind("<Control-s>", lambda e: self.save_grammar())
|
432 |
+
top.bind("<Control-o>", lambda e: self.load_grammar())
|
433 |
+
self.grammarbox.bind("<Control-t>", self.toggle_show_trace)
|
434 |
+
self.grammarbox.bind("<Control-n>", self._devset_next)
|
435 |
+
self.grammarbox.bind("<Control-p>", self._devset_prev)
|
436 |
+
|
437 |
+
# Redraw the eval graph when the window size changes
|
438 |
+
self.evalbox.bind("<Configure>", self._eval_plot)
|
439 |
+
|
440 |
+
def _init_fonts(self, top):
|
441 |
+
# TWhat's our font size (default=same as sysfont)
|
442 |
+
self._size = IntVar(top)
|
443 |
+
self._size.set(20)
|
444 |
+
self._font = Font(family="helvetica", size=-self._size.get())
|
445 |
+
self._smallfont = Font(
|
446 |
+
family="helvetica", size=-(int(self._size.get() * 14 // 20))
|
447 |
+
)
|
448 |
+
|
449 |
+
def _init_menubar(self, parent):
|
450 |
+
menubar = Menu(parent)
|
451 |
+
|
452 |
+
filemenu = Menu(menubar, tearoff=0)
|
453 |
+
filemenu.add_command(label="Reset Application", underline=0, command=self.reset)
|
454 |
+
filemenu.add_command(
|
455 |
+
label="Save Current Grammar",
|
456 |
+
underline=0,
|
457 |
+
accelerator="Ctrl-s",
|
458 |
+
command=self.save_grammar,
|
459 |
+
)
|
460 |
+
filemenu.add_command(
|
461 |
+
label="Load Grammar",
|
462 |
+
underline=0,
|
463 |
+
accelerator="Ctrl-o",
|
464 |
+
command=self.load_grammar,
|
465 |
+
)
|
466 |
+
|
467 |
+
filemenu.add_command(
|
468 |
+
label="Save Grammar History", underline=13, command=self.save_history
|
469 |
+
)
|
470 |
+
|
471 |
+
filemenu.add_command(
|
472 |
+
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
|
473 |
+
)
|
474 |
+
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
475 |
+
|
476 |
+
viewmenu = Menu(menubar, tearoff=0)
|
477 |
+
viewmenu.add_radiobutton(
|
478 |
+
label="Tiny",
|
479 |
+
variable=self._size,
|
480 |
+
underline=0,
|
481 |
+
value=10,
|
482 |
+
command=self.resize,
|
483 |
+
)
|
484 |
+
viewmenu.add_radiobutton(
|
485 |
+
label="Small",
|
486 |
+
variable=self._size,
|
487 |
+
underline=0,
|
488 |
+
value=16,
|
489 |
+
command=self.resize,
|
490 |
+
)
|
491 |
+
viewmenu.add_radiobutton(
|
492 |
+
label="Medium",
|
493 |
+
variable=self._size,
|
494 |
+
underline=0,
|
495 |
+
value=20,
|
496 |
+
command=self.resize,
|
497 |
+
)
|
498 |
+
viewmenu.add_radiobutton(
|
499 |
+
label="Large",
|
500 |
+
variable=self._size,
|
501 |
+
underline=0,
|
502 |
+
value=24,
|
503 |
+
command=self.resize,
|
504 |
+
)
|
505 |
+
viewmenu.add_radiobutton(
|
506 |
+
label="Huge",
|
507 |
+
variable=self._size,
|
508 |
+
underline=0,
|
509 |
+
value=34,
|
510 |
+
command=self.resize,
|
511 |
+
)
|
512 |
+
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
|
513 |
+
|
514 |
+
devsetmenu = Menu(menubar, tearoff=0)
|
515 |
+
devsetmenu.add_radiobutton(
|
516 |
+
label="50 sentences",
|
517 |
+
variable=self._devset_size,
|
518 |
+
value=50,
|
519 |
+
command=self.set_devset_size,
|
520 |
+
)
|
521 |
+
devsetmenu.add_radiobutton(
|
522 |
+
label="100 sentences",
|
523 |
+
variable=self._devset_size,
|
524 |
+
value=100,
|
525 |
+
command=self.set_devset_size,
|
526 |
+
)
|
527 |
+
devsetmenu.add_radiobutton(
|
528 |
+
label="200 sentences",
|
529 |
+
variable=self._devset_size,
|
530 |
+
value=200,
|
531 |
+
command=self.set_devset_size,
|
532 |
+
)
|
533 |
+
devsetmenu.add_radiobutton(
|
534 |
+
label="500 sentences",
|
535 |
+
variable=self._devset_size,
|
536 |
+
value=500,
|
537 |
+
command=self.set_devset_size,
|
538 |
+
)
|
539 |
+
menubar.add_cascade(label="Development-Set", underline=0, menu=devsetmenu)
|
540 |
+
|
541 |
+
helpmenu = Menu(menubar, tearoff=0)
|
542 |
+
helpmenu.add_command(label="About", underline=0, command=self.about)
|
543 |
+
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
|
544 |
+
|
545 |
+
parent.config(menu=menubar)
|
546 |
+
|
547 |
+
def toggle_show_trace(self, *e):
|
548 |
+
if self._showing_trace:
|
549 |
+
self.show_devset()
|
550 |
+
else:
|
551 |
+
self.show_trace()
|
552 |
+
return "break"
|
553 |
+
|
554 |
+
_SCALE_N = 5 # center on the last 5 examples.
|
555 |
+
_DRAW_LINES = False
|
556 |
+
|
557 |
+
def _eval_plot(self, *e, **config):
|
558 |
+
width = config.get("width", self.evalbox.winfo_width())
|
559 |
+
height = config.get("height", self.evalbox.winfo_height())
|
560 |
+
|
561 |
+
# Clear the canvas
|
562 |
+
self.evalbox.delete("all")
|
563 |
+
|
564 |
+
# Draw the precision & recall labels.
|
565 |
+
tag = self.evalbox.create_text(
|
566 |
+
10, height // 2 - 10, justify="left", anchor="w", text="Precision"
|
567 |
+
)
|
568 |
+
left, right = self.evalbox.bbox(tag)[2] + 5, width - 10
|
569 |
+
tag = self.evalbox.create_text(
|
570 |
+
left + (width - left) // 2,
|
571 |
+
height - 10,
|
572 |
+
anchor="s",
|
573 |
+
text="Recall",
|
574 |
+
justify="center",
|
575 |
+
)
|
576 |
+
top, bot = 10, self.evalbox.bbox(tag)[1] - 10
|
577 |
+
|
578 |
+
# Draw masks for clipping the plot.
|
579 |
+
bg = self._EVALBOX_PARAMS["background"]
|
580 |
+
self.evalbox.lower(
|
581 |
+
self.evalbox.create_rectangle(0, 0, left - 1, 5000, fill=bg, outline=bg)
|
582 |
+
)
|
583 |
+
self.evalbox.lower(
|
584 |
+
self.evalbox.create_rectangle(0, bot + 1, 5000, 5000, fill=bg, outline=bg)
|
585 |
+
)
|
586 |
+
|
587 |
+
# Calculate the plot's scale.
|
588 |
+
if self._autoscale.get() and len(self._history) > 1:
|
589 |
+
max_precision = max_recall = 0
|
590 |
+
min_precision = min_recall = 1
|
591 |
+
for i in range(1, min(len(self._history), self._SCALE_N + 1)):
|
592 |
+
grammar, precision, recall, fmeasure = self._history[-i]
|
593 |
+
min_precision = min(precision, min_precision)
|
594 |
+
min_recall = min(recall, min_recall)
|
595 |
+
max_precision = max(precision, max_precision)
|
596 |
+
max_recall = max(recall, max_recall)
|
597 |
+
# if max_precision-min_precision > max_recall-min_recall:
|
598 |
+
# min_recall -= (max_precision-min_precision)/2
|
599 |
+
# max_recall += (max_precision-min_precision)/2
|
600 |
+
# else:
|
601 |
+
# min_precision -= (max_recall-min_recall)/2
|
602 |
+
# max_precision += (max_recall-min_recall)/2
|
603 |
+
# if min_recall < 0:
|
604 |
+
# max_recall -= min_recall
|
605 |
+
# min_recall = 0
|
606 |
+
# if min_precision < 0:
|
607 |
+
# max_precision -= min_precision
|
608 |
+
# min_precision = 0
|
609 |
+
min_precision = max(min_precision - 0.01, 0)
|
610 |
+
min_recall = max(min_recall - 0.01, 0)
|
611 |
+
max_precision = min(max_precision + 0.01, 1)
|
612 |
+
max_recall = min(max_recall + 0.01, 1)
|
613 |
+
else:
|
614 |
+
min_precision = min_recall = 0
|
615 |
+
max_precision = max_recall = 1
|
616 |
+
|
617 |
+
# Draw the axis lines & grid lines
|
618 |
+
for i in range(11):
|
619 |
+
x = left + (right - left) * (
|
620 |
+
(i / 10.0 - min_recall) / (max_recall - min_recall)
|
621 |
+
)
|
622 |
+
y = bot - (bot - top) * (
|
623 |
+
(i / 10.0 - min_precision) / (max_precision - min_precision)
|
624 |
+
)
|
625 |
+
if left < x < right:
|
626 |
+
self.evalbox.create_line(x, top, x, bot, fill="#888")
|
627 |
+
if top < y < bot:
|
628 |
+
self.evalbox.create_line(left, y, right, y, fill="#888")
|
629 |
+
self.evalbox.create_line(left, top, left, bot)
|
630 |
+
self.evalbox.create_line(left, bot, right, bot)
|
631 |
+
|
632 |
+
# Display the plot's scale
|
633 |
+
self.evalbox.create_text(
|
634 |
+
left - 3,
|
635 |
+
bot,
|
636 |
+
justify="right",
|
637 |
+
anchor="se",
|
638 |
+
text="%d%%" % (100 * min_precision),
|
639 |
+
)
|
640 |
+
self.evalbox.create_text(
|
641 |
+
left - 3,
|
642 |
+
top,
|
643 |
+
justify="right",
|
644 |
+
anchor="ne",
|
645 |
+
text="%d%%" % (100 * max_precision),
|
646 |
+
)
|
647 |
+
self.evalbox.create_text(
|
648 |
+
left,
|
649 |
+
bot + 3,
|
650 |
+
justify="center",
|
651 |
+
anchor="nw",
|
652 |
+
text="%d%%" % (100 * min_recall),
|
653 |
+
)
|
654 |
+
self.evalbox.create_text(
|
655 |
+
right,
|
656 |
+
bot + 3,
|
657 |
+
justify="center",
|
658 |
+
anchor="ne",
|
659 |
+
text="%d%%" % (100 * max_recall),
|
660 |
+
)
|
661 |
+
|
662 |
+
# Display the scores.
|
663 |
+
prev_x = prev_y = None
|
664 |
+
for i, (_, precision, recall, fscore) in enumerate(self._history):
|
665 |
+
x = left + (right - left) * (
|
666 |
+
(recall - min_recall) / (max_recall - min_recall)
|
667 |
+
)
|
668 |
+
y = bot - (bot - top) * (
|
669 |
+
(precision - min_precision) / (max_precision - min_precision)
|
670 |
+
)
|
671 |
+
if i == self._history_index:
|
672 |
+
self.evalbox.create_oval(
|
673 |
+
x - 2, y - 2, x + 2, y + 2, fill="#0f0", outline="#000"
|
674 |
+
)
|
675 |
+
self.status["text"] = (
|
676 |
+
"Precision: %.2f%%\t" % (precision * 100)
|
677 |
+
+ "Recall: %.2f%%\t" % (recall * 100)
|
678 |
+
+ "F-score: %.2f%%" % (fscore * 100)
|
679 |
+
)
|
680 |
+
else:
|
681 |
+
self.evalbox.lower(
|
682 |
+
self.evalbox.create_oval(
|
683 |
+
x - 2, y - 2, x + 2, y + 2, fill="#afa", outline="#8c8"
|
684 |
+
)
|
685 |
+
)
|
686 |
+
if prev_x is not None and self._eval_lines.get():
|
687 |
+
self.evalbox.lower(
|
688 |
+
self.evalbox.create_line(prev_x, prev_y, x, y, fill="#8c8")
|
689 |
+
)
|
690 |
+
prev_x, prev_y = x, y
|
691 |
+
|
692 |
+
_eval_demon_running = False
|
693 |
+
|
694 |
+
def _eval_demon(self):
|
695 |
+
if self.top is None:
|
696 |
+
return
|
697 |
+
if self.chunker is None:
|
698 |
+
self._eval_demon_running = False
|
699 |
+
return
|
700 |
+
|
701 |
+
# Note our starting time.
|
702 |
+
t0 = time.time()
|
703 |
+
|
704 |
+
# If are still typing, then wait for them to finish.
|
705 |
+
if (
|
706 |
+
time.time() - self._last_keypress < self._EVAL_DELAY
|
707 |
+
and self.normalized_grammar != self._eval_normalized_grammar
|
708 |
+
):
|
709 |
+
self._eval_demon_running = True
|
710 |
+
return self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)
|
711 |
+
|
712 |
+
# If the grammar changed, restart the evaluation.
|
713 |
+
if self.normalized_grammar != self._eval_normalized_grammar:
|
714 |
+
# Check if we've seen this grammar already. If so, then
|
715 |
+
# just use the old evaluation values.
|
716 |
+
for (g, p, r, f) in self._history:
|
717 |
+
if self.normalized_grammar == self.normalize_grammar(g):
|
718 |
+
self._history.append((g, p, r, f))
|
719 |
+
self._history_index = len(self._history) - 1
|
720 |
+
self._eval_plot()
|
721 |
+
self._eval_demon_running = False
|
722 |
+
self._eval_normalized_grammar = None
|
723 |
+
return
|
724 |
+
self._eval_index = 0
|
725 |
+
self._eval_score = ChunkScore(chunk_label=self._chunk_label)
|
726 |
+
self._eval_grammar = self.grammar
|
727 |
+
self._eval_normalized_grammar = self.normalized_grammar
|
728 |
+
|
729 |
+
# If the grammar is empty, the don't bother evaluating it, or
|
730 |
+
# recording it in history -- the score will just be 0.
|
731 |
+
if self.normalized_grammar.strip() == "":
|
732 |
+
# self._eval_index = self._devset_size.get()
|
733 |
+
self._eval_demon_running = False
|
734 |
+
return
|
735 |
+
|
736 |
+
# Score the next set of examples
|
737 |
+
for gold in self.devset[
|
738 |
+
self._eval_index : min(
|
739 |
+
self._eval_index + self._EVAL_CHUNK, self._devset_size.get()
|
740 |
+
)
|
741 |
+
]:
|
742 |
+
guess = self._chunkparse(gold.leaves())
|
743 |
+
self._eval_score.score(gold, guess)
|
744 |
+
|
745 |
+
# update our index in the devset.
|
746 |
+
self._eval_index += self._EVAL_CHUNK
|
747 |
+
|
748 |
+
# Check if we're done
|
749 |
+
if self._eval_index >= self._devset_size.get():
|
750 |
+
self._history.append(
|
751 |
+
(
|
752 |
+
self._eval_grammar,
|
753 |
+
self._eval_score.precision(),
|
754 |
+
self._eval_score.recall(),
|
755 |
+
self._eval_score.f_measure(),
|
756 |
+
)
|
757 |
+
)
|
758 |
+
self._history_index = len(self._history) - 1
|
759 |
+
self._eval_plot()
|
760 |
+
self._eval_demon_running = False
|
761 |
+
self._eval_normalized_grammar = None
|
762 |
+
else:
|
763 |
+
progress = 100 * self._eval_index / self._devset_size.get()
|
764 |
+
self.status["text"] = "Evaluating on Development Set (%d%%)" % progress
|
765 |
+
self._eval_demon_running = True
|
766 |
+
self._adaptively_modify_eval_chunk(time.time() - t0)
|
767 |
+
self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)
|
768 |
+
|
769 |
+
def _adaptively_modify_eval_chunk(self, t):
|
770 |
+
"""
|
771 |
+
Modify _EVAL_CHUNK to try to keep the amount of time that the
|
772 |
+
eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.
|
773 |
+
|
774 |
+
:param t: The amount of time that the eval demon took.
|
775 |
+
"""
|
776 |
+
if t > self._EVAL_DEMON_MAX and self._EVAL_CHUNK > 5:
|
777 |
+
self._EVAL_CHUNK = min(
|
778 |
+
self._EVAL_CHUNK - 1,
|
779 |
+
max(
|
780 |
+
int(self._EVAL_CHUNK * (self._EVAL_DEMON_MAX / t)),
|
781 |
+
self._EVAL_CHUNK - 10,
|
782 |
+
),
|
783 |
+
)
|
784 |
+
elif t < self._EVAL_DEMON_MIN:
|
785 |
+
self._EVAL_CHUNK = max(
|
786 |
+
self._EVAL_CHUNK + 1,
|
787 |
+
min(
|
788 |
+
int(self._EVAL_CHUNK * (self._EVAL_DEMON_MIN / t)),
|
789 |
+
self._EVAL_CHUNK + 10,
|
790 |
+
),
|
791 |
+
)
|
792 |
+
|
793 |
+
def _init_widgets(self, top):
|
794 |
+
frame0 = Frame(top, **self._FRAME_PARAMS)
|
795 |
+
frame0.grid_columnconfigure(0, weight=4)
|
796 |
+
frame0.grid_columnconfigure(3, weight=2)
|
797 |
+
frame0.grid_rowconfigure(1, weight=1)
|
798 |
+
frame0.grid_rowconfigure(5, weight=1)
|
799 |
+
|
800 |
+
# The grammar
|
801 |
+
self.grammarbox = Text(frame0, font=self._font, **self._GRAMMARBOX_PARAMS)
|
802 |
+
self.grammarlabel = Label(
|
803 |
+
frame0,
|
804 |
+
font=self._font,
|
805 |
+
text="Grammar:",
|
806 |
+
highlightcolor="black",
|
807 |
+
background=self._GRAMMARBOX_PARAMS["background"],
|
808 |
+
)
|
809 |
+
self.grammarlabel.grid(column=0, row=0, sticky="SW")
|
810 |
+
self.grammarbox.grid(column=0, row=1, sticky="NEWS")
|
811 |
+
|
812 |
+
# Scroll bar for grammar
|
813 |
+
grammar_scrollbar = Scrollbar(frame0, command=self.grammarbox.yview)
|
814 |
+
grammar_scrollbar.grid(column=1, row=1, sticky="NWS")
|
815 |
+
self.grammarbox.config(yscrollcommand=grammar_scrollbar.set)
|
816 |
+
|
817 |
+
# grammar buttons
|
818 |
+
bg = self._FRAME_PARAMS["background"]
|
819 |
+
frame3 = Frame(frame0, background=bg)
|
820 |
+
frame3.grid(column=0, row=2, sticky="EW")
|
821 |
+
Button(
|
822 |
+
frame3,
|
823 |
+
text="Prev Grammar",
|
824 |
+
command=self._history_prev,
|
825 |
+
**self._BUTTON_PARAMS,
|
826 |
+
).pack(side="left")
|
827 |
+
Button(
|
828 |
+
frame3,
|
829 |
+
text="Next Grammar",
|
830 |
+
command=self._history_next,
|
831 |
+
**self._BUTTON_PARAMS,
|
832 |
+
).pack(side="left")
|
833 |
+
|
834 |
+
# Help box
|
835 |
+
self.helpbox = Text(frame0, font=self._smallfont, **self._HELPBOX_PARAMS)
|
836 |
+
self.helpbox.grid(column=3, row=1, sticky="NEWS")
|
837 |
+
self.helptabs = {}
|
838 |
+
bg = self._FRAME_PARAMS["background"]
|
839 |
+
helptab_frame = Frame(frame0, background=bg)
|
840 |
+
helptab_frame.grid(column=3, row=0, sticky="SW")
|
841 |
+
for i, (tab, tabstops, text) in enumerate(self.HELP):
|
842 |
+
label = Label(helptab_frame, text=tab, font=self._smallfont)
|
843 |
+
label.grid(column=i * 2, row=0, sticky="S")
|
844 |
+
# help_frame.grid_columnconfigure(i, weight=1)
|
845 |
+
# label.pack(side='left')
|
846 |
+
label.bind("<ButtonPress>", lambda e, tab=tab: self.show_help(tab))
|
847 |
+
self.helptabs[tab] = label
|
848 |
+
Frame(
|
849 |
+
helptab_frame, height=1, width=self._HELPTAB_SPACER, background=bg
|
850 |
+
).grid(column=i * 2 + 1, row=0)
|
851 |
+
self.helptabs[self.HELP[0][0]].configure(font=self._font)
|
852 |
+
self.helpbox.tag_config("elide", elide=True)
|
853 |
+
for (tag, params) in self.HELP_AUTOTAG:
|
854 |
+
self.helpbox.tag_config("tag-%s" % tag, **params)
|
855 |
+
self.show_help(self.HELP[0][0])
|
856 |
+
|
857 |
+
# Scroll bar for helpbox
|
858 |
+
help_scrollbar = Scrollbar(frame0, command=self.helpbox.yview)
|
859 |
+
self.helpbox.config(yscrollcommand=help_scrollbar.set)
|
860 |
+
help_scrollbar.grid(column=4, row=1, sticky="NWS")
|
861 |
+
|
862 |
+
# The dev set
|
863 |
+
frame4 = Frame(frame0, background=self._FRAME_PARAMS["background"])
|
864 |
+
self.devsetbox = Text(frame4, font=self._font, **self._DEVSETBOX_PARAMS)
|
865 |
+
self.devsetbox.pack(expand=True, fill="both")
|
866 |
+
self.devsetlabel = Label(
|
867 |
+
frame0,
|
868 |
+
font=self._font,
|
869 |
+
text="Development Set:",
|
870 |
+
justify="right",
|
871 |
+
background=self._DEVSETBOX_PARAMS["background"],
|
872 |
+
)
|
873 |
+
self.devsetlabel.grid(column=0, row=4, sticky="SW")
|
874 |
+
frame4.grid(column=0, row=5, sticky="NEWS")
|
875 |
+
|
876 |
+
# dev set scrollbars
|
877 |
+
self.devset_scroll = Scrollbar(frame0, command=self._devset_scroll)
|
878 |
+
self.devset_scroll.grid(column=1, row=5, sticky="NWS")
|
879 |
+
self.devset_xscroll = Scrollbar(
|
880 |
+
frame4, command=self.devsetbox.xview, orient="horiz"
|
881 |
+
)
|
882 |
+
self.devsetbox["xscrollcommand"] = self.devset_xscroll.set
|
883 |
+
self.devset_xscroll.pack(side="bottom", fill="x")
|
884 |
+
|
885 |
+
# dev set buttons
|
886 |
+
bg = self._FRAME_PARAMS["background"]
|
887 |
+
frame1 = Frame(frame0, background=bg)
|
888 |
+
frame1.grid(column=0, row=7, sticky="EW")
|
889 |
+
Button(
|
890 |
+
frame1,
|
891 |
+
text="Prev Example (Ctrl-p)",
|
892 |
+
command=self._devset_prev,
|
893 |
+
**self._BUTTON_PARAMS,
|
894 |
+
).pack(side="left")
|
895 |
+
Button(
|
896 |
+
frame1,
|
897 |
+
text="Next Example (Ctrl-n)",
|
898 |
+
command=self._devset_next,
|
899 |
+
**self._BUTTON_PARAMS,
|
900 |
+
).pack(side="left")
|
901 |
+
self.devset_button = Button(
|
902 |
+
frame1,
|
903 |
+
text="Show example",
|
904 |
+
command=self.show_devset,
|
905 |
+
state="disabled",
|
906 |
+
**self._BUTTON_PARAMS,
|
907 |
+
)
|
908 |
+
self.devset_button.pack(side="right")
|
909 |
+
self.trace_button = Button(
|
910 |
+
frame1, text="Show trace", command=self.show_trace, **self._BUTTON_PARAMS
|
911 |
+
)
|
912 |
+
self.trace_button.pack(side="right")
|
913 |
+
|
914 |
+
# evaluation box
|
915 |
+
self.evalbox = Canvas(frame0, **self._EVALBOX_PARAMS)
|
916 |
+
label = Label(
|
917 |
+
frame0,
|
918 |
+
font=self._font,
|
919 |
+
text="Evaluation:",
|
920 |
+
justify="right",
|
921 |
+
background=self._EVALBOX_PARAMS["background"],
|
922 |
+
)
|
923 |
+
label.grid(column=3, row=4, sticky="SW")
|
924 |
+
self.evalbox.grid(column=3, row=5, sticky="NEWS", columnspan=2)
|
925 |
+
|
926 |
+
# evaluation box buttons
|
927 |
+
bg = self._FRAME_PARAMS["background"]
|
928 |
+
frame2 = Frame(frame0, background=bg)
|
929 |
+
frame2.grid(column=3, row=7, sticky="EW")
|
930 |
+
self._autoscale = IntVar(self.top)
|
931 |
+
self._autoscale.set(False)
|
932 |
+
Checkbutton(
|
933 |
+
frame2,
|
934 |
+
variable=self._autoscale,
|
935 |
+
command=self._eval_plot,
|
936 |
+
text="Zoom",
|
937 |
+
**self._BUTTON_PARAMS,
|
938 |
+
).pack(side="left")
|
939 |
+
self._eval_lines = IntVar(self.top)
|
940 |
+
self._eval_lines.set(False)
|
941 |
+
Checkbutton(
|
942 |
+
frame2,
|
943 |
+
variable=self._eval_lines,
|
944 |
+
command=self._eval_plot,
|
945 |
+
text="Lines",
|
946 |
+
**self._BUTTON_PARAMS,
|
947 |
+
).pack(side="left")
|
948 |
+
Button(frame2, text="History", **self._BUTTON_PARAMS).pack(side="right")
|
949 |
+
|
950 |
+
# The status label
|
951 |
+
self.status = Label(frame0, font=self._font, **self._STATUS_PARAMS)
|
952 |
+
self.status.grid(column=0, row=9, sticky="NEW", padx=3, pady=2, columnspan=5)
|
953 |
+
|
954 |
+
# Help box & devset box can't be edited.
|
955 |
+
self.helpbox["state"] = "disabled"
|
956 |
+
self.devsetbox["state"] = "disabled"
|
957 |
+
|
958 |
+
# Spacers
|
959 |
+
bg = self._FRAME_PARAMS["background"]
|
960 |
+
Frame(frame0, height=10, width=0, background=bg).grid(column=0, row=3)
|
961 |
+
Frame(frame0, height=0, width=10, background=bg).grid(column=2, row=0)
|
962 |
+
Frame(frame0, height=6, width=0, background=bg).grid(column=0, row=8)
|
963 |
+
|
964 |
+
# pack the frame.
|
965 |
+
frame0.pack(fill="both", expand=True)
|
966 |
+
|
967 |
+
# Set up colors for the devset box
|
968 |
+
self.devsetbox.tag_config("true-pos", background="#afa", underline="True")
|
969 |
+
self.devsetbox.tag_config("false-neg", underline="True", foreground="#800")
|
970 |
+
self.devsetbox.tag_config("false-pos", background="#faa")
|
971 |
+
self.devsetbox.tag_config("trace", foreground="#666", wrap="none")
|
972 |
+
self.devsetbox.tag_config("wrapindent", lmargin2=30, wrap="none")
|
973 |
+
self.devsetbox.tag_config("error", foreground="#800")
|
974 |
+
|
975 |
+
# And for the grammarbox
|
976 |
+
self.grammarbox.tag_config("error", background="#fec")
|
977 |
+
self.grammarbox.tag_config("comment", foreground="#840")
|
978 |
+
self.grammarbox.tag_config("angle", foreground="#00f")
|
979 |
+
self.grammarbox.tag_config("brace", foreground="#0a0")
|
980 |
+
self.grammarbox.tag_config("hangindent", lmargin1=0, lmargin2=40)
|
981 |
+
|
982 |
+
_showing_trace = False
|
983 |
+
|
984 |
+
def show_trace(self, *e):
|
985 |
+
self._showing_trace = True
|
986 |
+
self.trace_button["state"] = "disabled"
|
987 |
+
self.devset_button["state"] = "normal"
|
988 |
+
|
989 |
+
self.devsetbox["state"] = "normal"
|
990 |
+
# self.devsetbox['wrap'] = 'none'
|
991 |
+
self.devsetbox.delete("1.0", "end")
|
992 |
+
self.devsetlabel["text"] = "Development Set (%d/%d)" % (
|
993 |
+
(self.devset_index + 1, self._devset_size.get())
|
994 |
+
)
|
995 |
+
|
996 |
+
if self.chunker is None:
|
997 |
+
self.devsetbox.insert("1.0", "Trace: waiting for a valid grammar.")
|
998 |
+
self.devsetbox.tag_add("error", "1.0", "end")
|
999 |
+
return # can't do anything more
|
1000 |
+
|
1001 |
+
gold_tree = self.devset[self.devset_index]
|
1002 |
+
rules = self.chunker.rules()
|
1003 |
+
|
1004 |
+
# Calculate the tag sequence
|
1005 |
+
tagseq = "\t"
|
1006 |
+
charnum = [1]
|
1007 |
+
for wordnum, (word, pos) in enumerate(gold_tree.leaves()):
|
1008 |
+
tagseq += "%s " % pos
|
1009 |
+
charnum.append(len(tagseq))
|
1010 |
+
self.charnum = {
|
1011 |
+
(i, j): charnum[j]
|
1012 |
+
for i in range(len(rules) + 1)
|
1013 |
+
for j in range(len(charnum))
|
1014 |
+
}
|
1015 |
+
self.linenum = {i: i * 2 + 2 for i in range(len(rules) + 1)}
|
1016 |
+
|
1017 |
+
for i in range(len(rules) + 1):
|
1018 |
+
if i == 0:
|
1019 |
+
self.devsetbox.insert("end", "Start:\n")
|
1020 |
+
self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
|
1021 |
+
else:
|
1022 |
+
self.devsetbox.insert("end", "Apply %s:\n" % rules[i - 1])
|
1023 |
+
self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
|
1024 |
+
# Display the tag sequence.
|
1025 |
+
self.devsetbox.insert("end", tagseq + "\n")
|
1026 |
+
self.devsetbox.tag_add("wrapindent", "end -2c linestart", "end -2c")
|
1027 |
+
# Run a partial parser, and extract gold & test chunks
|
1028 |
+
chunker = RegexpChunkParser(rules[:i])
|
1029 |
+
test_tree = self._chunkparse(gold_tree.leaves())
|
1030 |
+
gold_chunks = self._chunks(gold_tree)
|
1031 |
+
test_chunks = self._chunks(test_tree)
|
1032 |
+
# Compare them.
|
1033 |
+
for chunk in gold_chunks.intersection(test_chunks):
|
1034 |
+
self._color_chunk(i, chunk, "true-pos")
|
1035 |
+
for chunk in gold_chunks - test_chunks:
|
1036 |
+
self._color_chunk(i, chunk, "false-neg")
|
1037 |
+
for chunk in test_chunks - gold_chunks:
|
1038 |
+
self._color_chunk(i, chunk, "false-pos")
|
1039 |
+
self.devsetbox.insert("end", "Finished.\n")
|
1040 |
+
self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
|
1041 |
+
|
1042 |
+
# This is a hack, because the x-scrollbar isn't updating its
|
1043 |
+
# position right -- I'm not sure what the underlying cause is
|
1044 |
+
# though. (This is on OS X w/ python 2.5)
|
1045 |
+
self.top.after(100, self.devset_xscroll.set, 0, 0.3)
|
1046 |
+
|
1047 |
+
def show_help(self, tab):
|
1048 |
+
self.helpbox["state"] = "normal"
|
1049 |
+
self.helpbox.delete("1.0", "end")
|
1050 |
+
for (name, tabstops, text) in self.HELP:
|
1051 |
+
if name == tab:
|
1052 |
+
text = text.replace(
|
1053 |
+
"<<TAGSET>>",
|
1054 |
+
"\n".join(
|
1055 |
+
"\t%s\t%s" % item
|
1056 |
+
for item in sorted(
|
1057 |
+
list(self.tagset.items()),
|
1058 |
+
key=lambda t_w: re.match(r"\w+", t_w[0])
|
1059 |
+
and (0, t_w[0])
|
1060 |
+
or (1, t_w[0]),
|
1061 |
+
)
|
1062 |
+
),
|
1063 |
+
)
|
1064 |
+
|
1065 |
+
self.helptabs[name].config(**self._HELPTAB_FG_PARAMS)
|
1066 |
+
self.helpbox.config(tabs=tabstops)
|
1067 |
+
self.helpbox.insert("1.0", text + "\n" * 20)
|
1068 |
+
C = "1.0 + %d chars"
|
1069 |
+
for (tag, params) in self.HELP_AUTOTAG:
|
1070 |
+
pattern = f"(?s)(<{tag}>)(.*?)(</{tag}>)"
|
1071 |
+
for m in re.finditer(pattern, text):
|
1072 |
+
self.helpbox.tag_add("elide", C % m.start(1), C % m.end(1))
|
1073 |
+
self.helpbox.tag_add(
|
1074 |
+
"tag-%s" % tag, C % m.start(2), C % m.end(2)
|
1075 |
+
)
|
1076 |
+
self.helpbox.tag_add("elide", C % m.start(3), C % m.end(3))
|
1077 |
+
else:
|
1078 |
+
self.helptabs[name].config(**self._HELPTAB_BG_PARAMS)
|
1079 |
+
self.helpbox["state"] = "disabled"
|
1080 |
+
|
1081 |
+
def _history_prev(self, *e):
|
1082 |
+
self._view_history(self._history_index - 1)
|
1083 |
+
return "break"
|
1084 |
+
|
1085 |
+
def _history_next(self, *e):
|
1086 |
+
self._view_history(self._history_index + 1)
|
1087 |
+
return "break"
|
1088 |
+
|
1089 |
+
def _view_history(self, index):
|
1090 |
+
# Bounds & sanity checking:
|
1091 |
+
index = max(0, min(len(self._history) - 1, index))
|
1092 |
+
if not self._history:
|
1093 |
+
return
|
1094 |
+
# Already viewing the requested history item?
|
1095 |
+
if index == self._history_index:
|
1096 |
+
return
|
1097 |
+
# Show the requested grammar. It will get added to _history
|
1098 |
+
# only if they edit it (causing self.update() to get run.)
|
1099 |
+
self.grammarbox["state"] = "normal"
|
1100 |
+
self.grammarbox.delete("1.0", "end")
|
1101 |
+
self.grammarbox.insert("end", self._history[index][0])
|
1102 |
+
self.grammarbox.mark_set("insert", "1.0")
|
1103 |
+
self._history_index = index
|
1104 |
+
self._syntax_highlight_grammar(self._history[index][0])
|
1105 |
+
# Record the normalized grammar & regenerate the chunker.
|
1106 |
+
self.normalized_grammar = self.normalize_grammar(self._history[index][0])
|
1107 |
+
if self.normalized_grammar:
|
1108 |
+
rules = [
|
1109 |
+
RegexpChunkRule.fromstring(line)
|
1110 |
+
for line in self.normalized_grammar.split("\n")
|
1111 |
+
]
|
1112 |
+
else:
|
1113 |
+
rules = []
|
1114 |
+
self.chunker = RegexpChunkParser(rules)
|
1115 |
+
# Show the score.
|
1116 |
+
self._eval_plot()
|
1117 |
+
# Update the devset box
|
1118 |
+
self._highlight_devset()
|
1119 |
+
if self._showing_trace:
|
1120 |
+
self.show_trace()
|
1121 |
+
# Update the grammar label
|
1122 |
+
if self._history_index < len(self._history) - 1:
|
1123 |
+
self.grammarlabel["text"] = "Grammar {}/{}:".format(
|
1124 |
+
self._history_index + 1,
|
1125 |
+
len(self._history),
|
1126 |
+
)
|
1127 |
+
else:
|
1128 |
+
self.grammarlabel["text"] = "Grammar:"
|
1129 |
+
|
1130 |
+
def _devset_next(self, *e):
|
1131 |
+
self._devset_scroll("scroll", 1, "page")
|
1132 |
+
return "break"
|
1133 |
+
|
1134 |
+
def _devset_prev(self, *e):
|
1135 |
+
self._devset_scroll("scroll", -1, "page")
|
1136 |
+
return "break"
|
1137 |
+
|
1138 |
+
def destroy(self, *e):
|
1139 |
+
if self.top is None:
|
1140 |
+
return
|
1141 |
+
self.top.destroy()
|
1142 |
+
self.top = None
|
1143 |
+
|
1144 |
+
def _devset_scroll(self, command, *args):
|
1145 |
+
N = 1 # size of a page -- one sentence.
|
1146 |
+
showing_trace = self._showing_trace
|
1147 |
+
if command == "scroll" and args[1].startswith("unit"):
|
1148 |
+
self.show_devset(self.devset_index + int(args[0]))
|
1149 |
+
elif command == "scroll" and args[1].startswith("page"):
|
1150 |
+
self.show_devset(self.devset_index + N * int(args[0]))
|
1151 |
+
elif command == "moveto":
|
1152 |
+
self.show_devset(int(float(args[0]) * self._devset_size.get()))
|
1153 |
+
else:
|
1154 |
+
assert 0, f"bad scroll command {command} {args}"
|
1155 |
+
if showing_trace:
|
1156 |
+
self.show_trace()
|
1157 |
+
|
1158 |
+
def show_devset(self, index=None):
|
1159 |
+
if index is None:
|
1160 |
+
index = self.devset_index
|
1161 |
+
|
1162 |
+
# Bounds checking
|
1163 |
+
index = min(max(0, index), self._devset_size.get() - 1)
|
1164 |
+
|
1165 |
+
if index == self.devset_index and not self._showing_trace:
|
1166 |
+
return
|
1167 |
+
self.devset_index = index
|
1168 |
+
|
1169 |
+
self._showing_trace = False
|
1170 |
+
self.trace_button["state"] = "normal"
|
1171 |
+
self.devset_button["state"] = "disabled"
|
1172 |
+
|
1173 |
+
# Clear the text box.
|
1174 |
+
self.devsetbox["state"] = "normal"
|
1175 |
+
self.devsetbox["wrap"] = "word"
|
1176 |
+
self.devsetbox.delete("1.0", "end")
|
1177 |
+
self.devsetlabel["text"] = "Development Set (%d/%d)" % (
|
1178 |
+
(self.devset_index + 1, self._devset_size.get())
|
1179 |
+
)
|
1180 |
+
|
1181 |
+
# Add the sentences
|
1182 |
+
sample = self.devset[self.devset_index : self.devset_index + 1]
|
1183 |
+
self.charnum = {}
|
1184 |
+
self.linenum = {0: 1}
|
1185 |
+
for sentnum, sent in enumerate(sample):
|
1186 |
+
linestr = ""
|
1187 |
+
for wordnum, (word, pos) in enumerate(sent.leaves()):
|
1188 |
+
self.charnum[sentnum, wordnum] = len(linestr)
|
1189 |
+
linestr += f"{word}/{pos} "
|
1190 |
+
self.charnum[sentnum, wordnum + 1] = len(linestr)
|
1191 |
+
self.devsetbox.insert("end", linestr[:-1] + "\n\n")
|
1192 |
+
|
1193 |
+
# Highlight chunks in the dev set
|
1194 |
+
if self.chunker is not None:
|
1195 |
+
self._highlight_devset()
|
1196 |
+
self.devsetbox["state"] = "disabled"
|
1197 |
+
|
1198 |
+
# Update the scrollbar
|
1199 |
+
first = self.devset_index / self._devset_size.get()
|
1200 |
+
last = (self.devset_index + 2) / self._devset_size.get()
|
1201 |
+
self.devset_scroll.set(first, last)
|
1202 |
+
|
1203 |
+
def _chunks(self, tree):
|
1204 |
+
chunks = set()
|
1205 |
+
wordnum = 0
|
1206 |
+
for child in tree:
|
1207 |
+
if isinstance(child, Tree):
|
1208 |
+
if child.label() == self._chunk_label:
|
1209 |
+
chunks.add((wordnum, wordnum + len(child)))
|
1210 |
+
wordnum += len(child)
|
1211 |
+
else:
|
1212 |
+
wordnum += 1
|
1213 |
+
return chunks
|
1214 |
+
|
1215 |
+
def _syntax_highlight_grammar(self, grammar):
|
1216 |
+
if self.top is None:
|
1217 |
+
return
|
1218 |
+
self.grammarbox.tag_remove("comment", "1.0", "end")
|
1219 |
+
self.grammarbox.tag_remove("angle", "1.0", "end")
|
1220 |
+
self.grammarbox.tag_remove("brace", "1.0", "end")
|
1221 |
+
self.grammarbox.tag_add("hangindent", "1.0", "end")
|
1222 |
+
for lineno, line in enumerate(grammar.split("\n")):
|
1223 |
+
if not line.strip():
|
1224 |
+
continue
|
1225 |
+
m = re.match(r"(\\.|[^#])*(#.*)?", line)
|
1226 |
+
comment_start = None
|
1227 |
+
if m.group(2):
|
1228 |
+
comment_start = m.start(2)
|
1229 |
+
s = "%d.%d" % (lineno + 1, m.start(2))
|
1230 |
+
e = "%d.%d" % (lineno + 1, m.end(2))
|
1231 |
+
self.grammarbox.tag_add("comment", s, e)
|
1232 |
+
for m in re.finditer("[<>{}]", line):
|
1233 |
+
if comment_start is not None and m.start() >= comment_start:
|
1234 |
+
break
|
1235 |
+
s = "%d.%d" % (lineno + 1, m.start())
|
1236 |
+
e = "%d.%d" % (lineno + 1, m.end())
|
1237 |
+
if m.group() in "<>":
|
1238 |
+
self.grammarbox.tag_add("angle", s, e)
|
1239 |
+
else:
|
1240 |
+
self.grammarbox.tag_add("brace", s, e)
|
1241 |
+
|
1242 |
+
def _grammarcheck(self, grammar):
|
1243 |
+
if self.top is None:
|
1244 |
+
return
|
1245 |
+
self.grammarbox.tag_remove("error", "1.0", "end")
|
1246 |
+
self._grammarcheck_errs = []
|
1247 |
+
for lineno, line in enumerate(grammar.split("\n")):
|
1248 |
+
line = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", line)
|
1249 |
+
line = line.strip()
|
1250 |
+
if line:
|
1251 |
+
try:
|
1252 |
+
RegexpChunkRule.fromstring(line)
|
1253 |
+
except ValueError as e:
|
1254 |
+
self.grammarbox.tag_add(
|
1255 |
+
"error", "%s.0" % (lineno + 1), "%s.0 lineend" % (lineno + 1)
|
1256 |
+
)
|
1257 |
+
self.status["text"] = ""
|
1258 |
+
|
1259 |
+
def update(self, *event):
|
1260 |
+
# Record when update was called (for grammarcheck)
|
1261 |
+
if event:
|
1262 |
+
self._last_keypress = time.time()
|
1263 |
+
|
1264 |
+
# Read the grammar from the Text box.
|
1265 |
+
self.grammar = grammar = self.grammarbox.get("1.0", "end")
|
1266 |
+
|
1267 |
+
# If the grammar hasn't changed, do nothing:
|
1268 |
+
normalized_grammar = self.normalize_grammar(grammar)
|
1269 |
+
if normalized_grammar == self.normalized_grammar:
|
1270 |
+
return
|
1271 |
+
else:
|
1272 |
+
self.normalized_grammar = normalized_grammar
|
1273 |
+
|
1274 |
+
# If the grammar has changed, and we're looking at history,
|
1275 |
+
# then stop looking at history.
|
1276 |
+
if self._history_index < len(self._history) - 1:
|
1277 |
+
self.grammarlabel["text"] = "Grammar:"
|
1278 |
+
|
1279 |
+
self._syntax_highlight_grammar(grammar)
|
1280 |
+
|
1281 |
+
# The grammar has changed; try parsing it. If it doesn't
|
1282 |
+
# parse, do nothing. (flag error location?)
|
1283 |
+
try:
|
1284 |
+
# Note: the normalized grammar has no blank lines.
|
1285 |
+
if normalized_grammar:
|
1286 |
+
rules = [
|
1287 |
+
RegexpChunkRule.fromstring(line)
|
1288 |
+
for line in normalized_grammar.split("\n")
|
1289 |
+
]
|
1290 |
+
else:
|
1291 |
+
rules = []
|
1292 |
+
except ValueError as e:
|
1293 |
+
# Use the un-normalized grammar for error highlighting.
|
1294 |
+
self._grammarcheck(grammar)
|
1295 |
+
self.chunker = None
|
1296 |
+
return
|
1297 |
+
|
1298 |
+
self.chunker = RegexpChunkParser(rules)
|
1299 |
+
self.grammarbox.tag_remove("error", "1.0", "end")
|
1300 |
+
self.grammar_changed = time.time()
|
1301 |
+
# Display the results
|
1302 |
+
if self._showing_trace:
|
1303 |
+
self.show_trace()
|
1304 |
+
else:
|
1305 |
+
self._highlight_devset()
|
1306 |
+
# Start the eval demon
|
1307 |
+
if not self._eval_demon_running:
|
1308 |
+
self._eval_demon()
|
1309 |
+
|
1310 |
+
def _highlight_devset(self, sample=None):
|
1311 |
+
if sample is None:
|
1312 |
+
sample = self.devset[self.devset_index : self.devset_index + 1]
|
1313 |
+
|
1314 |
+
self.devsetbox.tag_remove("true-pos", "1.0", "end")
|
1315 |
+
self.devsetbox.tag_remove("false-neg", "1.0", "end")
|
1316 |
+
self.devsetbox.tag_remove("false-pos", "1.0", "end")
|
1317 |
+
|
1318 |
+
# Run the grammar on the test cases.
|
1319 |
+
for sentnum, gold_tree in enumerate(sample):
|
1320 |
+
# Run the chunk parser
|
1321 |
+
test_tree = self._chunkparse(gold_tree.leaves())
|
1322 |
+
# Extract gold & test chunks
|
1323 |
+
gold_chunks = self._chunks(gold_tree)
|
1324 |
+
test_chunks = self._chunks(test_tree)
|
1325 |
+
# Compare them.
|
1326 |
+
for chunk in gold_chunks.intersection(test_chunks):
|
1327 |
+
self._color_chunk(sentnum, chunk, "true-pos")
|
1328 |
+
for chunk in gold_chunks - test_chunks:
|
1329 |
+
self._color_chunk(sentnum, chunk, "false-neg")
|
1330 |
+
for chunk in test_chunks - gold_chunks:
|
1331 |
+
self._color_chunk(sentnum, chunk, "false-pos")
|
1332 |
+
|
1333 |
+
def _chunkparse(self, words):
|
1334 |
+
try:
|
1335 |
+
return self.chunker.parse(words)
|
1336 |
+
except (ValueError, IndexError) as e:
|
1337 |
+
# There's an error somewhere in the grammar, but we're not sure
|
1338 |
+
# exactly where, so just mark the whole grammar as bad.
|
1339 |
+
# E.g., this is caused by: "({<NN>})"
|
1340 |
+
self.grammarbox.tag_add("error", "1.0", "end")
|
1341 |
+
# Treat it as tagging nothing:
|
1342 |
+
return words
|
1343 |
+
|
1344 |
+
def _color_chunk(self, sentnum, chunk, tag):
|
1345 |
+
start, end = chunk
|
1346 |
+
self.devsetbox.tag_add(
|
1347 |
+
tag,
|
1348 |
+
f"{self.linenum[sentnum]}.{self.charnum[sentnum, start]}",
|
1349 |
+
f"{self.linenum[sentnum]}.{self.charnum[sentnum, end] - 1}",
|
1350 |
+
)
|
1351 |
+
|
1352 |
+
def reset(self):
|
1353 |
+
# Clear various variables
|
1354 |
+
self.chunker = None
|
1355 |
+
self.grammar = None
|
1356 |
+
self.normalized_grammar = None
|
1357 |
+
self.grammar_changed = 0
|
1358 |
+
self._history = []
|
1359 |
+
self._history_index = 0
|
1360 |
+
# Update the on-screen display.
|
1361 |
+
self.grammarbox.delete("1.0", "end")
|
1362 |
+
self.show_devset(0)
|
1363 |
+
self.update()
|
1364 |
+
# self._eval_plot()
|
1365 |
+
|
1366 |
+
SAVE_GRAMMAR_TEMPLATE = (
|
1367 |
+
"# Regexp Chunk Parsing Grammar\n"
|
1368 |
+
"# Saved %(date)s\n"
|
1369 |
+
"#\n"
|
1370 |
+
"# Development set: %(devset)s\n"
|
1371 |
+
"# Precision: %(precision)s\n"
|
1372 |
+
"# Recall: %(recall)s\n"
|
1373 |
+
"# F-score: %(fscore)s\n\n"
|
1374 |
+
"%(grammar)s\n"
|
1375 |
+
)
|
1376 |
+
|
1377 |
+
def save_grammar(self, filename=None):
|
1378 |
+
if not filename:
|
1379 |
+
ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")]
|
1380 |
+
filename = asksaveasfilename(filetypes=ftypes, defaultextension=".chunk")
|
1381 |
+
if not filename:
|
1382 |
+
return
|
1383 |
+
if self._history and self.normalized_grammar == self.normalize_grammar(
|
1384 |
+
self._history[-1][0]
|
1385 |
+
):
|
1386 |
+
precision, recall, fscore = (
|
1387 |
+
"%.2f%%" % (100 * v) for v in self._history[-1][1:]
|
1388 |
+
)
|
1389 |
+
elif self.chunker is None:
|
1390 |
+
precision = recall = fscore = "Grammar not well formed"
|
1391 |
+
else:
|
1392 |
+
precision = recall = fscore = "Not finished evaluation yet"
|
1393 |
+
|
1394 |
+
with open(filename, "w") as outfile:
|
1395 |
+
outfile.write(
|
1396 |
+
self.SAVE_GRAMMAR_TEMPLATE
|
1397 |
+
% dict(
|
1398 |
+
date=time.ctime(),
|
1399 |
+
devset=self.devset_name,
|
1400 |
+
precision=precision,
|
1401 |
+
recall=recall,
|
1402 |
+
fscore=fscore,
|
1403 |
+
grammar=self.grammar.strip(),
|
1404 |
+
)
|
1405 |
+
)
|
1406 |
+
|
1407 |
+
def load_grammar(self, filename=None):
|
1408 |
+
if not filename:
|
1409 |
+
ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")]
|
1410 |
+
filename = askopenfilename(filetypes=ftypes, defaultextension=".chunk")
|
1411 |
+
if not filename:
|
1412 |
+
return
|
1413 |
+
self.grammarbox.delete("1.0", "end")
|
1414 |
+
self.update()
|
1415 |
+
with open(filename) as infile:
|
1416 |
+
grammar = infile.read()
|
1417 |
+
grammar = re.sub(
|
1418 |
+
r"^\# Regexp Chunk Parsing Grammar[\s\S]*" "F-score:.*\n", "", grammar
|
1419 |
+
).lstrip()
|
1420 |
+
self.grammarbox.insert("1.0", grammar)
|
1421 |
+
self.update()
|
1422 |
+
|
1423 |
+
def save_history(self, filename=None):
|
1424 |
+
if not filename:
|
1425 |
+
ftypes = [("Chunk Gramamr History", ".txt"), ("All files", "*")]
|
1426 |
+
filename = asksaveasfilename(filetypes=ftypes, defaultextension=".txt")
|
1427 |
+
if not filename:
|
1428 |
+
return
|
1429 |
+
|
1430 |
+
with open(filename, "w") as outfile:
|
1431 |
+
outfile.write("# Regexp Chunk Parsing Grammar History\n")
|
1432 |
+
outfile.write("# Saved %s\n" % time.ctime())
|
1433 |
+
outfile.write("# Development set: %s\n" % self.devset_name)
|
1434 |
+
for i, (g, p, r, f) in enumerate(self._history):
|
1435 |
+
hdr = (
|
1436 |
+
"Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, "
|
1437 |
+
"fscore=%.2f%%)"
|
1438 |
+
% (i + 1, len(self._history), p * 100, r * 100, f * 100)
|
1439 |
+
)
|
1440 |
+
outfile.write("\n%s\n" % hdr)
|
1441 |
+
outfile.write("".join(" %s\n" % line for line in g.strip().split()))
|
1442 |
+
|
1443 |
+
if not (
|
1444 |
+
self._history
|
1445 |
+
and self.normalized_grammar
|
1446 |
+
== self.normalize_grammar(self._history[-1][0])
|
1447 |
+
):
|
1448 |
+
if self.chunker is None:
|
1449 |
+
outfile.write("\nCurrent Grammar (not well-formed)\n")
|
1450 |
+
else:
|
1451 |
+
outfile.write("\nCurrent Grammar (not evaluated)\n")
|
1452 |
+
outfile.write(
|
1453 |
+
"".join(" %s\n" % line for line in self.grammar.strip().split())
|
1454 |
+
)
|
1455 |
+
|
1456 |
+
def about(self, *e):
|
1457 |
+
ABOUT = "NLTK RegExp Chunk Parser Application\n" + "Written by Edward Loper"
|
1458 |
+
TITLE = "About: Regular Expression Chunk Parser Application"
|
1459 |
+
try:
|
1460 |
+
from tkinter.messagebox import Message
|
1461 |
+
|
1462 |
+
Message(message=ABOUT, title=TITLE).show()
|
1463 |
+
except:
|
1464 |
+
ShowText(self.top, TITLE, ABOUT)
|
1465 |
+
|
1466 |
+
def set_devset_size(self, size=None):
|
1467 |
+
if size is not None:
|
1468 |
+
self._devset_size.set(size)
|
1469 |
+
self._devset_size.set(min(len(self.devset), self._devset_size.get()))
|
1470 |
+
self.show_devset(1)
|
1471 |
+
self.show_devset(0)
|
1472 |
+
# what about history? Evaluated at diff dev set sizes!
|
1473 |
+
|
1474 |
+
def resize(self, size=None):
|
1475 |
+
if size is not None:
|
1476 |
+
self._size.set(size)
|
1477 |
+
size = self._size.get()
|
1478 |
+
self._font.configure(size=-(abs(size)))
|
1479 |
+
self._smallfont.configure(size=min(-10, -(abs(size)) * 14 // 20))
|
1480 |
+
|
1481 |
+
def mainloop(self, *args, **kwargs):
|
1482 |
+
"""
|
1483 |
+
Enter the Tkinter mainloop. This function must be called if
|
1484 |
+
this demo is created from a non-interactive program (e.g.
|
1485 |
+
from a secript); otherwise, the demo will close as soon as
|
1486 |
+
the script completes.
|
1487 |
+
"""
|
1488 |
+
if in_idle():
|
1489 |
+
return
|
1490 |
+
self.top.mainloop(*args, **kwargs)
|
1491 |
+
|
1492 |
+
|
1493 |
+
def app():
|
1494 |
+
RegexpChunkApp().mainloop()
|
1495 |
+
|
1496 |
+
|
1497 |
+
if __name__ == "__main__":
|
1498 |
+
app()
|
1499 |
+
|
1500 |
+
__all__ = ["app"]
|
pipeline/nltk/app/collocations_app.py
ADDED
@@ -0,0 +1,438 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Collocations Application
|
2 |
+
# Much of the GUI code is imported from concordance.py; We intend to merge these tools together
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Sumukh Ghodke <[email protected]>
|
5 |
+
# URL: <https://www.nltk.org/>
|
6 |
+
# For license information, see LICENSE.TXT
|
7 |
+
#
|
8 |
+
|
9 |
+
|
10 |
+
import queue as q
|
11 |
+
import threading
|
12 |
+
from tkinter import (
|
13 |
+
END,
|
14 |
+
LEFT,
|
15 |
+
SUNKEN,
|
16 |
+
Button,
|
17 |
+
Frame,
|
18 |
+
IntVar,
|
19 |
+
Label,
|
20 |
+
Menu,
|
21 |
+
OptionMenu,
|
22 |
+
Scrollbar,
|
23 |
+
StringVar,
|
24 |
+
Text,
|
25 |
+
Tk,
|
26 |
+
)
|
27 |
+
from tkinter.font import Font
|
28 |
+
|
29 |
+
from nltk.corpus import (
|
30 |
+
alpino,
|
31 |
+
brown,
|
32 |
+
cess_cat,
|
33 |
+
cess_esp,
|
34 |
+
floresta,
|
35 |
+
indian,
|
36 |
+
mac_morpho,
|
37 |
+
machado,
|
38 |
+
nps_chat,
|
39 |
+
sinica_treebank,
|
40 |
+
treebank,
|
41 |
+
)
|
42 |
+
from nltk.probability import FreqDist
|
43 |
+
from nltk.util import in_idle
|
44 |
+
|
45 |
+
CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
|
46 |
+
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
|
47 |
+
POLL_INTERVAL = 100
|
48 |
+
|
49 |
+
_DEFAULT = "English: Brown Corpus (Humor)"
|
50 |
+
_CORPORA = {
|
51 |
+
"Catalan: CESS-CAT Corpus": lambda: cess_cat.words(),
|
52 |
+
"English: Brown Corpus": lambda: brown.words(),
|
53 |
+
"English: Brown Corpus (Press)": lambda: brown.words(
|
54 |
+
categories=["news", "editorial", "reviews"]
|
55 |
+
),
|
56 |
+
"English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"),
|
57 |
+
"English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"),
|
58 |
+
"English: Brown Corpus (Science Fiction)": lambda: brown.words(
|
59 |
+
categories="science_fiction"
|
60 |
+
),
|
61 |
+
"English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"),
|
62 |
+
"English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"),
|
63 |
+
"English: NPS Chat Corpus": lambda: nps_chat.words(),
|
64 |
+
"English: Wall Street Journal Corpus": lambda: treebank.words(),
|
65 |
+
"Chinese: Sinica Corpus": lambda: sinica_treebank.words(),
|
66 |
+
"Dutch: Alpino Corpus": lambda: alpino.words(),
|
67 |
+
"Hindi: Indian Languages Corpus": lambda: indian.words(files="hindi.pos"),
|
68 |
+
"Portuguese: Floresta Corpus (Portugal)": lambda: floresta.words(),
|
69 |
+
"Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.words(),
|
70 |
+
"Portuguese: Machado Corpus (Brazil)": lambda: machado.words(),
|
71 |
+
"Spanish: CESS-ESP Corpus": lambda: cess_esp.words(),
|
72 |
+
}
|
73 |
+
|
74 |
+
|
75 |
+
class CollocationsView:
|
76 |
+
_BACKGROUND_COLOUR = "#FFF" # white
|
77 |
+
|
78 |
+
def __init__(self):
|
79 |
+
self.queue = q.Queue()
|
80 |
+
self.model = CollocationsModel(self.queue)
|
81 |
+
self.top = Tk()
|
82 |
+
self._init_top(self.top)
|
83 |
+
self._init_menubar()
|
84 |
+
self._init_widgets(self.top)
|
85 |
+
self.load_corpus(self.model.DEFAULT_CORPUS)
|
86 |
+
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
87 |
+
|
88 |
+
def _init_top(self, top):
|
89 |
+
top.geometry("550x650+50+50")
|
90 |
+
top.title("NLTK Collocations List")
|
91 |
+
top.bind("<Control-q>", self.destroy)
|
92 |
+
top.protocol("WM_DELETE_WINDOW", self.destroy)
|
93 |
+
top.minsize(550, 650)
|
94 |
+
|
95 |
+
def _init_widgets(self, parent):
|
96 |
+
self.main_frame = Frame(
|
97 |
+
parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
|
98 |
+
)
|
99 |
+
self._init_corpus_select(self.main_frame)
|
100 |
+
self._init_results_box(self.main_frame)
|
101 |
+
self._init_paging(self.main_frame)
|
102 |
+
self._init_status(self.main_frame)
|
103 |
+
self.main_frame.pack(fill="both", expand=True)
|
104 |
+
|
105 |
+
def _init_corpus_select(self, parent):
|
106 |
+
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
107 |
+
self.var = StringVar(innerframe)
|
108 |
+
self.var.set(self.model.DEFAULT_CORPUS)
|
109 |
+
Label(
|
110 |
+
innerframe,
|
111 |
+
justify=LEFT,
|
112 |
+
text=" Corpus: ",
|
113 |
+
background=self._BACKGROUND_COLOUR,
|
114 |
+
padx=2,
|
115 |
+
pady=1,
|
116 |
+
border=0,
|
117 |
+
).pack(side="left")
|
118 |
+
|
119 |
+
other_corpora = list(self.model.CORPORA.keys()).remove(
|
120 |
+
self.model.DEFAULT_CORPUS
|
121 |
+
)
|
122 |
+
om = OptionMenu(
|
123 |
+
innerframe,
|
124 |
+
self.var,
|
125 |
+
self.model.DEFAULT_CORPUS,
|
126 |
+
command=self.corpus_selected,
|
127 |
+
*self.model.non_default_corpora()
|
128 |
+
)
|
129 |
+
om["borderwidth"] = 0
|
130 |
+
om["highlightthickness"] = 1
|
131 |
+
om.pack(side="left")
|
132 |
+
innerframe.pack(side="top", fill="x", anchor="n")
|
133 |
+
|
134 |
+
def _init_status(self, parent):
|
135 |
+
self.status = Label(
|
136 |
+
parent,
|
137 |
+
justify=LEFT,
|
138 |
+
relief=SUNKEN,
|
139 |
+
background=self._BACKGROUND_COLOUR,
|
140 |
+
border=0,
|
141 |
+
padx=1,
|
142 |
+
pady=0,
|
143 |
+
)
|
144 |
+
self.status.pack(side="top", anchor="sw")
|
145 |
+
|
146 |
+
def _init_menubar(self):
|
147 |
+
self._result_size = IntVar(self.top)
|
148 |
+
menubar = Menu(self.top)
|
149 |
+
|
150 |
+
filemenu = Menu(menubar, tearoff=0, borderwidth=0)
|
151 |
+
filemenu.add_command(
|
152 |
+
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
|
153 |
+
)
|
154 |
+
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
155 |
+
|
156 |
+
editmenu = Menu(menubar, tearoff=0)
|
157 |
+
rescntmenu = Menu(editmenu, tearoff=0)
|
158 |
+
rescntmenu.add_radiobutton(
|
159 |
+
label="20",
|
160 |
+
variable=self._result_size,
|
161 |
+
underline=0,
|
162 |
+
value=20,
|
163 |
+
command=self.set_result_size,
|
164 |
+
)
|
165 |
+
rescntmenu.add_radiobutton(
|
166 |
+
label="50",
|
167 |
+
variable=self._result_size,
|
168 |
+
underline=0,
|
169 |
+
value=50,
|
170 |
+
command=self.set_result_size,
|
171 |
+
)
|
172 |
+
rescntmenu.add_radiobutton(
|
173 |
+
label="100",
|
174 |
+
variable=self._result_size,
|
175 |
+
underline=0,
|
176 |
+
value=100,
|
177 |
+
command=self.set_result_size,
|
178 |
+
)
|
179 |
+
rescntmenu.invoke(1)
|
180 |
+
editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
|
181 |
+
|
182 |
+
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
183 |
+
self.top.config(menu=menubar)
|
184 |
+
|
185 |
+
def set_result_size(self, **kwargs):
|
186 |
+
self.model.result_count = self._result_size.get()
|
187 |
+
|
188 |
+
def _init_results_box(self, parent):
|
189 |
+
innerframe = Frame(parent)
|
190 |
+
i1 = Frame(innerframe)
|
191 |
+
i2 = Frame(innerframe)
|
192 |
+
vscrollbar = Scrollbar(i1, borderwidth=1)
|
193 |
+
hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
|
194 |
+
self.results_box = Text(
|
195 |
+
i1,
|
196 |
+
font=Font(family="courier", size="16"),
|
197 |
+
state="disabled",
|
198 |
+
borderwidth=1,
|
199 |
+
yscrollcommand=vscrollbar.set,
|
200 |
+
xscrollcommand=hscrollbar.set,
|
201 |
+
wrap="none",
|
202 |
+
width="40",
|
203 |
+
height="20",
|
204 |
+
exportselection=1,
|
205 |
+
)
|
206 |
+
self.results_box.pack(side="left", fill="both", expand=True)
|
207 |
+
vscrollbar.pack(side="left", fill="y", anchor="e")
|
208 |
+
vscrollbar.config(command=self.results_box.yview)
|
209 |
+
hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
|
210 |
+
hscrollbar.config(command=self.results_box.xview)
|
211 |
+
# there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
|
212 |
+
Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
|
213 |
+
side="left", anchor="e"
|
214 |
+
)
|
215 |
+
i1.pack(side="top", fill="both", expand=True, anchor="n")
|
216 |
+
i2.pack(side="bottom", fill="x", anchor="s")
|
217 |
+
innerframe.pack(side="top", fill="both", expand=True)
|
218 |
+
|
219 |
+
def _init_paging(self, parent):
|
220 |
+
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
221 |
+
self.prev = prev = Button(
|
222 |
+
innerframe,
|
223 |
+
text="Previous",
|
224 |
+
command=self.previous,
|
225 |
+
width="10",
|
226 |
+
borderwidth=1,
|
227 |
+
highlightthickness=1,
|
228 |
+
state="disabled",
|
229 |
+
)
|
230 |
+
prev.pack(side="left", anchor="center")
|
231 |
+
self.next = next = Button(
|
232 |
+
innerframe,
|
233 |
+
text="Next",
|
234 |
+
command=self.__next__,
|
235 |
+
width="10",
|
236 |
+
borderwidth=1,
|
237 |
+
highlightthickness=1,
|
238 |
+
state="disabled",
|
239 |
+
)
|
240 |
+
next.pack(side="right", anchor="center")
|
241 |
+
innerframe.pack(side="top", fill="y")
|
242 |
+
self.reset_current_page()
|
243 |
+
|
244 |
+
def reset_current_page(self):
|
245 |
+
self.current_page = -1
|
246 |
+
|
247 |
+
def _poll(self):
|
248 |
+
try:
|
249 |
+
event = self.queue.get(block=False)
|
250 |
+
except q.Empty:
|
251 |
+
pass
|
252 |
+
else:
|
253 |
+
if event == CORPUS_LOADED_EVENT:
|
254 |
+
self.handle_corpus_loaded(event)
|
255 |
+
elif event == ERROR_LOADING_CORPUS_EVENT:
|
256 |
+
self.handle_error_loading_corpus(event)
|
257 |
+
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
258 |
+
|
259 |
+
def handle_error_loading_corpus(self, event):
|
260 |
+
self.status["text"] = "Error in loading " + self.var.get()
|
261 |
+
self.unfreeze_editable()
|
262 |
+
self.clear_results_box()
|
263 |
+
self.freeze_editable()
|
264 |
+
self.reset_current_page()
|
265 |
+
|
266 |
+
def handle_corpus_loaded(self, event):
|
267 |
+
self.status["text"] = self.var.get() + " is loaded"
|
268 |
+
self.unfreeze_editable()
|
269 |
+
self.clear_results_box()
|
270 |
+
self.reset_current_page()
|
271 |
+
# self.next()
|
272 |
+
collocations = self.model.next(self.current_page + 1)
|
273 |
+
self.write_results(collocations)
|
274 |
+
self.current_page += 1
|
275 |
+
|
276 |
+
def corpus_selected(self, *args):
|
277 |
+
new_selection = self.var.get()
|
278 |
+
self.load_corpus(new_selection)
|
279 |
+
|
280 |
+
def previous(self):
|
281 |
+
self.freeze_editable()
|
282 |
+
collocations = self.model.prev(self.current_page - 1)
|
283 |
+
self.current_page = self.current_page - 1
|
284 |
+
self.clear_results_box()
|
285 |
+
self.write_results(collocations)
|
286 |
+
self.unfreeze_editable()
|
287 |
+
|
288 |
+
def __next__(self):
|
289 |
+
self.freeze_editable()
|
290 |
+
collocations = self.model.next(self.current_page + 1)
|
291 |
+
self.clear_results_box()
|
292 |
+
self.write_results(collocations)
|
293 |
+
self.current_page += 1
|
294 |
+
self.unfreeze_editable()
|
295 |
+
|
296 |
+
def load_corpus(self, selection):
|
297 |
+
if self.model.selected_corpus != selection:
|
298 |
+
self.status["text"] = "Loading " + selection + "..."
|
299 |
+
self.freeze_editable()
|
300 |
+
self.model.load_corpus(selection)
|
301 |
+
|
302 |
+
def freeze_editable(self):
|
303 |
+
self.prev["state"] = "disabled"
|
304 |
+
self.next["state"] = "disabled"
|
305 |
+
|
306 |
+
def clear_results_box(self):
|
307 |
+
self.results_box["state"] = "normal"
|
308 |
+
self.results_box.delete("1.0", END)
|
309 |
+
self.results_box["state"] = "disabled"
|
310 |
+
|
311 |
+
def fire_event(self, event):
|
312 |
+
# Firing an event so that rendering of widgets happen in the mainloop thread
|
313 |
+
self.top.event_generate(event, when="tail")
|
314 |
+
|
315 |
+
def destroy(self, *e):
|
316 |
+
if self.top is None:
|
317 |
+
return
|
318 |
+
self.top.after_cancel(self.after)
|
319 |
+
self.top.destroy()
|
320 |
+
self.top = None
|
321 |
+
|
322 |
+
def mainloop(self, *args, **kwargs):
|
323 |
+
if in_idle():
|
324 |
+
return
|
325 |
+
self.top.mainloop(*args, **kwargs)
|
326 |
+
|
327 |
+
def unfreeze_editable(self):
|
328 |
+
self.set_paging_button_states()
|
329 |
+
|
330 |
+
def set_paging_button_states(self):
|
331 |
+
if self.current_page == -1 or self.current_page == 0:
|
332 |
+
self.prev["state"] = "disabled"
|
333 |
+
else:
|
334 |
+
self.prev["state"] = "normal"
|
335 |
+
if self.model.is_last_page(self.current_page):
|
336 |
+
self.next["state"] = "disabled"
|
337 |
+
else:
|
338 |
+
self.next["state"] = "normal"
|
339 |
+
|
340 |
+
def write_results(self, results):
|
341 |
+
self.results_box["state"] = "normal"
|
342 |
+
row = 1
|
343 |
+
for each in results:
|
344 |
+
self.results_box.insert(str(row) + ".0", each[0] + " " + each[1] + "\n")
|
345 |
+
row += 1
|
346 |
+
self.results_box["state"] = "disabled"
|
347 |
+
|
348 |
+
|
349 |
+
class CollocationsModel:
|
350 |
+
def __init__(self, queue):
|
351 |
+
self.result_count = None
|
352 |
+
self.selected_corpus = None
|
353 |
+
self.collocations = None
|
354 |
+
self.CORPORA = _CORPORA
|
355 |
+
self.DEFAULT_CORPUS = _DEFAULT
|
356 |
+
self.queue = queue
|
357 |
+
self.reset_results()
|
358 |
+
|
359 |
+
def reset_results(self):
|
360 |
+
self.result_pages = []
|
361 |
+
self.results_returned = 0
|
362 |
+
|
363 |
+
def load_corpus(self, name):
|
364 |
+
self.selected_corpus = name
|
365 |
+
self.collocations = None
|
366 |
+
runner_thread = self.LoadCorpus(name, self)
|
367 |
+
runner_thread.start()
|
368 |
+
self.reset_results()
|
369 |
+
|
370 |
+
def non_default_corpora(self):
|
371 |
+
copy = []
|
372 |
+
copy.extend(list(self.CORPORA.keys()))
|
373 |
+
copy.remove(self.DEFAULT_CORPUS)
|
374 |
+
copy.sort()
|
375 |
+
return copy
|
376 |
+
|
377 |
+
def is_last_page(self, number):
|
378 |
+
if number < len(self.result_pages):
|
379 |
+
return False
|
380 |
+
return self.results_returned + (
|
381 |
+
number - len(self.result_pages)
|
382 |
+
) * self.result_count >= len(self.collocations)
|
383 |
+
|
384 |
+
def next(self, page):
|
385 |
+
if (len(self.result_pages) - 1) < page:
|
386 |
+
for i in range(page - (len(self.result_pages) - 1)):
|
387 |
+
self.result_pages.append(
|
388 |
+
self.collocations[
|
389 |
+
self.results_returned : self.results_returned
|
390 |
+
+ self.result_count
|
391 |
+
]
|
392 |
+
)
|
393 |
+
self.results_returned += self.result_count
|
394 |
+
return self.result_pages[page]
|
395 |
+
|
396 |
+
def prev(self, page):
|
397 |
+
if page == -1:
|
398 |
+
return []
|
399 |
+
return self.result_pages[page]
|
400 |
+
|
401 |
+
class LoadCorpus(threading.Thread):
|
402 |
+
def __init__(self, name, model):
|
403 |
+
threading.Thread.__init__(self)
|
404 |
+
self.model, self.name = model, name
|
405 |
+
|
406 |
+
def run(self):
|
407 |
+
try:
|
408 |
+
words = self.model.CORPORA[self.name]()
|
409 |
+
from operator import itemgetter
|
410 |
+
|
411 |
+
text = [w for w in words if len(w) > 2]
|
412 |
+
fd = FreqDist(tuple(text[i : i + 2]) for i in range(len(text) - 1))
|
413 |
+
vocab = FreqDist(text)
|
414 |
+
scored = [
|
415 |
+
((w1, w2), fd[(w1, w2)] ** 3 / (vocab[w1] * vocab[w2]))
|
416 |
+
for w1, w2 in fd
|
417 |
+
]
|
418 |
+
scored.sort(key=itemgetter(1), reverse=True)
|
419 |
+
self.model.collocations = list(map(itemgetter(0), scored))
|
420 |
+
self.model.queue.put(CORPUS_LOADED_EVENT)
|
421 |
+
except Exception as e:
|
422 |
+
print(e)
|
423 |
+
self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
|
424 |
+
|
425 |
+
|
426 |
+
# def collocations():
|
427 |
+
# colloc_strings = [w1 + ' ' + w2 for w1, w2 in self._collocations[:num]]
|
428 |
+
|
429 |
+
|
430 |
+
def app():
|
431 |
+
c = CollocationsView()
|
432 |
+
c.mainloop()
|
433 |
+
|
434 |
+
|
435 |
+
if __name__ == "__main__":
|
436 |
+
app()
|
437 |
+
|
438 |
+
__all__ = ["app"]
|
pipeline/nltk/app/concordance_app.py
ADDED
@@ -0,0 +1,709 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Concordance Application
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Sumukh Ghodke <[email protected]>
|
5 |
+
# URL: <https://www.nltk.org/>
|
6 |
+
# For license information, see LICENSE.TXT
|
7 |
+
|
8 |
+
import queue as q
|
9 |
+
import re
|
10 |
+
import threading
|
11 |
+
from tkinter import (
|
12 |
+
END,
|
13 |
+
LEFT,
|
14 |
+
SUNKEN,
|
15 |
+
Button,
|
16 |
+
Entry,
|
17 |
+
Frame,
|
18 |
+
IntVar,
|
19 |
+
Label,
|
20 |
+
Menu,
|
21 |
+
OptionMenu,
|
22 |
+
Scrollbar,
|
23 |
+
StringVar,
|
24 |
+
Text,
|
25 |
+
Tk,
|
26 |
+
)
|
27 |
+
from tkinter.font import Font
|
28 |
+
|
29 |
+
from nltk.corpus import (
|
30 |
+
alpino,
|
31 |
+
brown,
|
32 |
+
cess_cat,
|
33 |
+
cess_esp,
|
34 |
+
floresta,
|
35 |
+
indian,
|
36 |
+
mac_morpho,
|
37 |
+
nps_chat,
|
38 |
+
sinica_treebank,
|
39 |
+
treebank,
|
40 |
+
)
|
41 |
+
from nltk.draw.util import ShowText
|
42 |
+
from nltk.util import in_idle
|
43 |
+
|
44 |
+
WORD_OR_TAG = "[^/ ]+"
|
45 |
+
BOUNDARY = r"\b"
|
46 |
+
|
47 |
+
CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
|
48 |
+
SEARCH_TERMINATED_EVENT = "<<ST_EVENT>>"
|
49 |
+
SEARCH_ERROR_EVENT = "<<SE_EVENT>>"
|
50 |
+
ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
|
51 |
+
|
52 |
+
POLL_INTERVAL = 50
|
53 |
+
|
54 |
+
# NB All corpora must be specified in a lambda expression so as not to be
|
55 |
+
# loaded when the module is imported.
|
56 |
+
|
57 |
+
_DEFAULT = "English: Brown Corpus (Humor, simplified)"
|
58 |
+
_CORPORA = {
|
59 |
+
"Catalan: CESS-CAT Corpus (simplified)": lambda: cess_cat.tagged_sents(
|
60 |
+
tagset="universal"
|
61 |
+
),
|
62 |
+
"English: Brown Corpus": lambda: brown.tagged_sents(),
|
63 |
+
"English: Brown Corpus (simplified)": lambda: brown.tagged_sents(
|
64 |
+
tagset="universal"
|
65 |
+
),
|
66 |
+
"English: Brown Corpus (Press, simplified)": lambda: brown.tagged_sents(
|
67 |
+
categories=["news", "editorial", "reviews"], tagset="universal"
|
68 |
+
),
|
69 |
+
"English: Brown Corpus (Religion, simplified)": lambda: brown.tagged_sents(
|
70 |
+
categories="religion", tagset="universal"
|
71 |
+
),
|
72 |
+
"English: Brown Corpus (Learned, simplified)": lambda: brown.tagged_sents(
|
73 |
+
categories="learned", tagset="universal"
|
74 |
+
),
|
75 |
+
"English: Brown Corpus (Science Fiction, simplified)": lambda: brown.tagged_sents(
|
76 |
+
categories="science_fiction", tagset="universal"
|
77 |
+
),
|
78 |
+
"English: Brown Corpus (Romance, simplified)": lambda: brown.tagged_sents(
|
79 |
+
categories="romance", tagset="universal"
|
80 |
+
),
|
81 |
+
"English: Brown Corpus (Humor, simplified)": lambda: brown.tagged_sents(
|
82 |
+
categories="humor", tagset="universal"
|
83 |
+
),
|
84 |
+
"English: NPS Chat Corpus": lambda: nps_chat.tagged_posts(),
|
85 |
+
"English: NPS Chat Corpus (simplified)": lambda: nps_chat.tagged_posts(
|
86 |
+
tagset="universal"
|
87 |
+
),
|
88 |
+
"English: Wall Street Journal Corpus": lambda: treebank.tagged_sents(),
|
89 |
+
"English: Wall Street Journal Corpus (simplified)": lambda: treebank.tagged_sents(
|
90 |
+
tagset="universal"
|
91 |
+
),
|
92 |
+
"Chinese: Sinica Corpus": lambda: sinica_treebank.tagged_sents(),
|
93 |
+
"Chinese: Sinica Corpus (simplified)": lambda: sinica_treebank.tagged_sents(
|
94 |
+
tagset="universal"
|
95 |
+
),
|
96 |
+
"Dutch: Alpino Corpus": lambda: alpino.tagged_sents(),
|
97 |
+
"Dutch: Alpino Corpus (simplified)": lambda: alpino.tagged_sents(
|
98 |
+
tagset="universal"
|
99 |
+
),
|
100 |
+
"Hindi: Indian Languages Corpus": lambda: indian.tagged_sents(files="hindi.pos"),
|
101 |
+
"Hindi: Indian Languages Corpus (simplified)": lambda: indian.tagged_sents(
|
102 |
+
files="hindi.pos", tagset="universal"
|
103 |
+
),
|
104 |
+
"Portuguese: Floresta Corpus (Portugal)": lambda: floresta.tagged_sents(),
|
105 |
+
"Portuguese: Floresta Corpus (Portugal, simplified)": lambda: floresta.tagged_sents(
|
106 |
+
tagset="universal"
|
107 |
+
),
|
108 |
+
"Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.tagged_sents(),
|
109 |
+
"Portuguese: MAC-MORPHO Corpus (Brazil, simplified)": lambda: mac_morpho.tagged_sents(
|
110 |
+
tagset="universal"
|
111 |
+
),
|
112 |
+
"Spanish: CESS-ESP Corpus (simplified)": lambda: cess_esp.tagged_sents(
|
113 |
+
tagset="universal"
|
114 |
+
),
|
115 |
+
}
|
116 |
+
|
117 |
+
|
118 |
+
class ConcordanceSearchView:
|
119 |
+
_BACKGROUND_COLOUR = "#FFF" # white
|
120 |
+
|
121 |
+
# Colour of highlighted results
|
122 |
+
_HIGHLIGHT_WORD_COLOUR = "#F00" # red
|
123 |
+
_HIGHLIGHT_WORD_TAG = "HL_WRD_TAG"
|
124 |
+
|
125 |
+
_HIGHLIGHT_LABEL_COLOUR = "#C0C0C0" # dark grey
|
126 |
+
_HIGHLIGHT_LABEL_TAG = "HL_LBL_TAG"
|
127 |
+
|
128 |
+
# Percentage of text left of the scrollbar position
|
129 |
+
_FRACTION_LEFT_TEXT = 0.30
|
130 |
+
|
131 |
+
def __init__(self):
|
132 |
+
self.queue = q.Queue()
|
133 |
+
self.model = ConcordanceSearchModel(self.queue)
|
134 |
+
self.top = Tk()
|
135 |
+
self._init_top(self.top)
|
136 |
+
self._init_menubar()
|
137 |
+
self._init_widgets(self.top)
|
138 |
+
self.load_corpus(self.model.DEFAULT_CORPUS)
|
139 |
+
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
140 |
+
|
141 |
+
def _init_top(self, top):
|
142 |
+
top.geometry("950x680+50+50")
|
143 |
+
top.title("NLTK Concordance Search")
|
144 |
+
top.bind("<Control-q>", self.destroy)
|
145 |
+
top.protocol("WM_DELETE_WINDOW", self.destroy)
|
146 |
+
top.minsize(950, 680)
|
147 |
+
|
148 |
+
def _init_widgets(self, parent):
|
149 |
+
self.main_frame = Frame(
|
150 |
+
parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
|
151 |
+
)
|
152 |
+
self._init_corpus_select(self.main_frame)
|
153 |
+
self._init_query_box(self.main_frame)
|
154 |
+
self._init_results_box(self.main_frame)
|
155 |
+
self._init_paging(self.main_frame)
|
156 |
+
self._init_status(self.main_frame)
|
157 |
+
self.main_frame.pack(fill="both", expand=True)
|
158 |
+
|
159 |
+
def _init_menubar(self):
|
160 |
+
self._result_size = IntVar(self.top)
|
161 |
+
self._cntx_bf_len = IntVar(self.top)
|
162 |
+
self._cntx_af_len = IntVar(self.top)
|
163 |
+
menubar = Menu(self.top)
|
164 |
+
|
165 |
+
filemenu = Menu(menubar, tearoff=0, borderwidth=0)
|
166 |
+
filemenu.add_command(
|
167 |
+
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
|
168 |
+
)
|
169 |
+
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
170 |
+
|
171 |
+
editmenu = Menu(menubar, tearoff=0)
|
172 |
+
rescntmenu = Menu(editmenu, tearoff=0)
|
173 |
+
rescntmenu.add_radiobutton(
|
174 |
+
label="20",
|
175 |
+
variable=self._result_size,
|
176 |
+
underline=0,
|
177 |
+
value=20,
|
178 |
+
command=self.set_result_size,
|
179 |
+
)
|
180 |
+
rescntmenu.add_radiobutton(
|
181 |
+
label="50",
|
182 |
+
variable=self._result_size,
|
183 |
+
underline=0,
|
184 |
+
value=50,
|
185 |
+
command=self.set_result_size,
|
186 |
+
)
|
187 |
+
rescntmenu.add_radiobutton(
|
188 |
+
label="100",
|
189 |
+
variable=self._result_size,
|
190 |
+
underline=0,
|
191 |
+
value=100,
|
192 |
+
command=self.set_result_size,
|
193 |
+
)
|
194 |
+
rescntmenu.invoke(1)
|
195 |
+
editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
|
196 |
+
|
197 |
+
cntxmenu = Menu(editmenu, tearoff=0)
|
198 |
+
cntxbfmenu = Menu(cntxmenu, tearoff=0)
|
199 |
+
cntxbfmenu.add_radiobutton(
|
200 |
+
label="60 characters",
|
201 |
+
variable=self._cntx_bf_len,
|
202 |
+
underline=0,
|
203 |
+
value=60,
|
204 |
+
command=self.set_cntx_bf_len,
|
205 |
+
)
|
206 |
+
cntxbfmenu.add_radiobutton(
|
207 |
+
label="80 characters",
|
208 |
+
variable=self._cntx_bf_len,
|
209 |
+
underline=0,
|
210 |
+
value=80,
|
211 |
+
command=self.set_cntx_bf_len,
|
212 |
+
)
|
213 |
+
cntxbfmenu.add_radiobutton(
|
214 |
+
label="100 characters",
|
215 |
+
variable=self._cntx_bf_len,
|
216 |
+
underline=0,
|
217 |
+
value=100,
|
218 |
+
command=self.set_cntx_bf_len,
|
219 |
+
)
|
220 |
+
cntxbfmenu.invoke(1)
|
221 |
+
cntxmenu.add_cascade(label="Before", underline=0, menu=cntxbfmenu)
|
222 |
+
|
223 |
+
cntxafmenu = Menu(cntxmenu, tearoff=0)
|
224 |
+
cntxafmenu.add_radiobutton(
|
225 |
+
label="70 characters",
|
226 |
+
variable=self._cntx_af_len,
|
227 |
+
underline=0,
|
228 |
+
value=70,
|
229 |
+
command=self.set_cntx_af_len,
|
230 |
+
)
|
231 |
+
cntxafmenu.add_radiobutton(
|
232 |
+
label="90 characters",
|
233 |
+
variable=self._cntx_af_len,
|
234 |
+
underline=0,
|
235 |
+
value=90,
|
236 |
+
command=self.set_cntx_af_len,
|
237 |
+
)
|
238 |
+
cntxafmenu.add_radiobutton(
|
239 |
+
label="110 characters",
|
240 |
+
variable=self._cntx_af_len,
|
241 |
+
underline=0,
|
242 |
+
value=110,
|
243 |
+
command=self.set_cntx_af_len,
|
244 |
+
)
|
245 |
+
cntxafmenu.invoke(1)
|
246 |
+
cntxmenu.add_cascade(label="After", underline=0, menu=cntxafmenu)
|
247 |
+
|
248 |
+
editmenu.add_cascade(label="Context", underline=0, menu=cntxmenu)
|
249 |
+
|
250 |
+
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
251 |
+
|
252 |
+
self.top.config(menu=menubar)
|
253 |
+
|
254 |
+
def set_result_size(self, **kwargs):
|
255 |
+
self.model.result_count = self._result_size.get()
|
256 |
+
|
257 |
+
def set_cntx_af_len(self, **kwargs):
|
258 |
+
self._char_after = self._cntx_af_len.get()
|
259 |
+
|
260 |
+
def set_cntx_bf_len(self, **kwargs):
|
261 |
+
self._char_before = self._cntx_bf_len.get()
|
262 |
+
|
263 |
+
def _init_corpus_select(self, parent):
|
264 |
+
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
265 |
+
self.var = StringVar(innerframe)
|
266 |
+
self.var.set(self.model.DEFAULT_CORPUS)
|
267 |
+
Label(
|
268 |
+
innerframe,
|
269 |
+
justify=LEFT,
|
270 |
+
text=" Corpus: ",
|
271 |
+
background=self._BACKGROUND_COLOUR,
|
272 |
+
padx=2,
|
273 |
+
pady=1,
|
274 |
+
border=0,
|
275 |
+
).pack(side="left")
|
276 |
+
|
277 |
+
other_corpora = list(self.model.CORPORA.keys()).remove(
|
278 |
+
self.model.DEFAULT_CORPUS
|
279 |
+
)
|
280 |
+
om = OptionMenu(
|
281 |
+
innerframe,
|
282 |
+
self.var,
|
283 |
+
self.model.DEFAULT_CORPUS,
|
284 |
+
command=self.corpus_selected,
|
285 |
+
*self.model.non_default_corpora()
|
286 |
+
)
|
287 |
+
om["borderwidth"] = 0
|
288 |
+
om["highlightthickness"] = 1
|
289 |
+
om.pack(side="left")
|
290 |
+
innerframe.pack(side="top", fill="x", anchor="n")
|
291 |
+
|
292 |
+
def _init_status(self, parent):
|
293 |
+
self.status = Label(
|
294 |
+
parent,
|
295 |
+
justify=LEFT,
|
296 |
+
relief=SUNKEN,
|
297 |
+
background=self._BACKGROUND_COLOUR,
|
298 |
+
border=0,
|
299 |
+
padx=1,
|
300 |
+
pady=0,
|
301 |
+
)
|
302 |
+
self.status.pack(side="top", anchor="sw")
|
303 |
+
|
304 |
+
def _init_query_box(self, parent):
|
305 |
+
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
306 |
+
another = Frame(innerframe, background=self._BACKGROUND_COLOUR)
|
307 |
+
self.query_box = Entry(another, width=60)
|
308 |
+
self.query_box.pack(side="left", fill="x", pady=25, anchor="center")
|
309 |
+
self.search_button = Button(
|
310 |
+
another,
|
311 |
+
text="Search",
|
312 |
+
command=self.search,
|
313 |
+
borderwidth=1,
|
314 |
+
highlightthickness=1,
|
315 |
+
)
|
316 |
+
self.search_button.pack(side="left", fill="x", pady=25, anchor="center")
|
317 |
+
self.query_box.bind("<KeyPress-Return>", self.search_enter_keypress_handler)
|
318 |
+
another.pack()
|
319 |
+
innerframe.pack(side="top", fill="x", anchor="n")
|
320 |
+
|
321 |
+
def search_enter_keypress_handler(self, *event):
|
322 |
+
self.search()
|
323 |
+
|
324 |
+
def _init_results_box(self, parent):
|
325 |
+
innerframe = Frame(parent)
|
326 |
+
i1 = Frame(innerframe)
|
327 |
+
i2 = Frame(innerframe)
|
328 |
+
vscrollbar = Scrollbar(i1, borderwidth=1)
|
329 |
+
hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
|
330 |
+
self.results_box = Text(
|
331 |
+
i1,
|
332 |
+
font=Font(family="courier", size="16"),
|
333 |
+
state="disabled",
|
334 |
+
borderwidth=1,
|
335 |
+
yscrollcommand=vscrollbar.set,
|
336 |
+
xscrollcommand=hscrollbar.set,
|
337 |
+
wrap="none",
|
338 |
+
width="40",
|
339 |
+
height="20",
|
340 |
+
exportselection=1,
|
341 |
+
)
|
342 |
+
self.results_box.pack(side="left", fill="both", expand=True)
|
343 |
+
self.results_box.tag_config(
|
344 |
+
self._HIGHLIGHT_WORD_TAG, foreground=self._HIGHLIGHT_WORD_COLOUR
|
345 |
+
)
|
346 |
+
self.results_box.tag_config(
|
347 |
+
self._HIGHLIGHT_LABEL_TAG, foreground=self._HIGHLIGHT_LABEL_COLOUR
|
348 |
+
)
|
349 |
+
vscrollbar.pack(side="left", fill="y", anchor="e")
|
350 |
+
vscrollbar.config(command=self.results_box.yview)
|
351 |
+
hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
|
352 |
+
hscrollbar.config(command=self.results_box.xview)
|
353 |
+
# there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
|
354 |
+
Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
|
355 |
+
side="left", anchor="e"
|
356 |
+
)
|
357 |
+
i1.pack(side="top", fill="both", expand=True, anchor="n")
|
358 |
+
i2.pack(side="bottom", fill="x", anchor="s")
|
359 |
+
innerframe.pack(side="top", fill="both", expand=True)
|
360 |
+
|
361 |
+
def _init_paging(self, parent):
|
362 |
+
innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
|
363 |
+
self.prev = prev = Button(
|
364 |
+
innerframe,
|
365 |
+
text="Previous",
|
366 |
+
command=self.previous,
|
367 |
+
width="10",
|
368 |
+
borderwidth=1,
|
369 |
+
highlightthickness=1,
|
370 |
+
state="disabled",
|
371 |
+
)
|
372 |
+
prev.pack(side="left", anchor="center")
|
373 |
+
self.next = next = Button(
|
374 |
+
innerframe,
|
375 |
+
text="Next",
|
376 |
+
command=self.__next__,
|
377 |
+
width="10",
|
378 |
+
borderwidth=1,
|
379 |
+
highlightthickness=1,
|
380 |
+
state="disabled",
|
381 |
+
)
|
382 |
+
next.pack(side="right", anchor="center")
|
383 |
+
innerframe.pack(side="top", fill="y")
|
384 |
+
self.current_page = 0
|
385 |
+
|
386 |
+
def previous(self):
|
387 |
+
self.clear_results_box()
|
388 |
+
self.freeze_editable()
|
389 |
+
self.model.prev(self.current_page - 1)
|
390 |
+
|
391 |
+
def __next__(self):
|
392 |
+
self.clear_results_box()
|
393 |
+
self.freeze_editable()
|
394 |
+
self.model.next(self.current_page + 1)
|
395 |
+
|
396 |
+
def about(self, *e):
|
397 |
+
ABOUT = "NLTK Concordance Search Demo\n"
|
398 |
+
TITLE = "About: NLTK Concordance Search Demo"
|
399 |
+
try:
|
400 |
+
from tkinter.messagebox import Message
|
401 |
+
|
402 |
+
Message(message=ABOUT, title=TITLE, parent=self.main_frame).show()
|
403 |
+
except:
|
404 |
+
ShowText(self.top, TITLE, ABOUT)
|
405 |
+
|
406 |
+
def _bind_event_handlers(self):
|
407 |
+
self.top.bind(CORPUS_LOADED_EVENT, self.handle_corpus_loaded)
|
408 |
+
self.top.bind(SEARCH_TERMINATED_EVENT, self.handle_search_terminated)
|
409 |
+
self.top.bind(SEARCH_ERROR_EVENT, self.handle_search_error)
|
410 |
+
self.top.bind(ERROR_LOADING_CORPUS_EVENT, self.handle_error_loading_corpus)
|
411 |
+
|
412 |
+
def _poll(self):
|
413 |
+
try:
|
414 |
+
event = self.queue.get(block=False)
|
415 |
+
except q.Empty:
|
416 |
+
pass
|
417 |
+
else:
|
418 |
+
if event == CORPUS_LOADED_EVENT:
|
419 |
+
self.handle_corpus_loaded(event)
|
420 |
+
elif event == SEARCH_TERMINATED_EVENT:
|
421 |
+
self.handle_search_terminated(event)
|
422 |
+
elif event == SEARCH_ERROR_EVENT:
|
423 |
+
self.handle_search_error(event)
|
424 |
+
elif event == ERROR_LOADING_CORPUS_EVENT:
|
425 |
+
self.handle_error_loading_corpus(event)
|
426 |
+
self.after = self.top.after(POLL_INTERVAL, self._poll)
|
427 |
+
|
428 |
+
def handle_error_loading_corpus(self, event):
|
429 |
+
self.status["text"] = "Error in loading " + self.var.get()
|
430 |
+
self.unfreeze_editable()
|
431 |
+
self.clear_all()
|
432 |
+
self.freeze_editable()
|
433 |
+
|
434 |
+
def handle_corpus_loaded(self, event):
|
435 |
+
self.status["text"] = self.var.get() + " is loaded"
|
436 |
+
self.unfreeze_editable()
|
437 |
+
self.clear_all()
|
438 |
+
self.query_box.focus_set()
|
439 |
+
|
440 |
+
def handle_search_terminated(self, event):
|
441 |
+
# todo: refactor the model such that it is less state sensitive
|
442 |
+
results = self.model.get_results()
|
443 |
+
self.write_results(results)
|
444 |
+
self.status["text"] = ""
|
445 |
+
if len(results) == 0:
|
446 |
+
self.status["text"] = "No results found for " + self.model.query
|
447 |
+
else:
|
448 |
+
self.current_page = self.model.last_requested_page
|
449 |
+
self.unfreeze_editable()
|
450 |
+
self.results_box.xview_moveto(self._FRACTION_LEFT_TEXT)
|
451 |
+
|
452 |
+
def handle_search_error(self, event):
|
453 |
+
self.status["text"] = "Error in query " + self.model.query
|
454 |
+
self.unfreeze_editable()
|
455 |
+
|
456 |
+
def corpus_selected(self, *args):
|
457 |
+
new_selection = self.var.get()
|
458 |
+
self.load_corpus(new_selection)
|
459 |
+
|
460 |
+
def load_corpus(self, selection):
|
461 |
+
if self.model.selected_corpus != selection:
|
462 |
+
self.status["text"] = "Loading " + selection + "..."
|
463 |
+
self.freeze_editable()
|
464 |
+
self.model.load_corpus(selection)
|
465 |
+
|
466 |
+
def search(self):
|
467 |
+
self.current_page = 0
|
468 |
+
self.clear_results_box()
|
469 |
+
self.model.reset_results()
|
470 |
+
query = self.query_box.get()
|
471 |
+
if len(query.strip()) == 0:
|
472 |
+
return
|
473 |
+
self.status["text"] = "Searching for " + query
|
474 |
+
self.freeze_editable()
|
475 |
+
self.model.search(query, self.current_page + 1)
|
476 |
+
|
477 |
+
def write_results(self, results):
|
478 |
+
self.results_box["state"] = "normal"
|
479 |
+
row = 1
|
480 |
+
for each in results:
|
481 |
+
sent, pos1, pos2 = each[0].strip(), each[1], each[2]
|
482 |
+
if len(sent) != 0:
|
483 |
+
if pos1 < self._char_before:
|
484 |
+
sent, pos1, pos2 = self.pad(sent, pos1, pos2)
|
485 |
+
sentence = sent[pos1 - self._char_before : pos1 + self._char_after]
|
486 |
+
if not row == len(results):
|
487 |
+
sentence += "\n"
|
488 |
+
self.results_box.insert(str(row) + ".0", sentence)
|
489 |
+
word_markers, label_markers = self.words_and_labels(sent, pos1, pos2)
|
490 |
+
for marker in word_markers:
|
491 |
+
self.results_box.tag_add(
|
492 |
+
self._HIGHLIGHT_WORD_TAG,
|
493 |
+
str(row) + "." + str(marker[0]),
|
494 |
+
str(row) + "." + str(marker[1]),
|
495 |
+
)
|
496 |
+
for marker in label_markers:
|
497 |
+
self.results_box.tag_add(
|
498 |
+
self._HIGHLIGHT_LABEL_TAG,
|
499 |
+
str(row) + "." + str(marker[0]),
|
500 |
+
str(row) + "." + str(marker[1]),
|
501 |
+
)
|
502 |
+
row += 1
|
503 |
+
self.results_box["state"] = "disabled"
|
504 |
+
|
505 |
+
def words_and_labels(self, sentence, pos1, pos2):
|
506 |
+
search_exp = sentence[pos1:pos2]
|
507 |
+
words, labels = [], []
|
508 |
+
labeled_words = search_exp.split(" ")
|
509 |
+
index = 0
|
510 |
+
for each in labeled_words:
|
511 |
+
if each == "":
|
512 |
+
index += 1
|
513 |
+
else:
|
514 |
+
word, label = each.split("/")
|
515 |
+
words.append(
|
516 |
+
(self._char_before + index, self._char_before + index + len(word))
|
517 |
+
)
|
518 |
+
index += len(word) + 1
|
519 |
+
labels.append(
|
520 |
+
(self._char_before + index, self._char_before + index + len(label))
|
521 |
+
)
|
522 |
+
index += len(label)
|
523 |
+
index += 1
|
524 |
+
return words, labels
|
525 |
+
|
526 |
+
def pad(self, sent, hstart, hend):
|
527 |
+
if hstart >= self._char_before:
|
528 |
+
return sent, hstart, hend
|
529 |
+
d = self._char_before - hstart
|
530 |
+
sent = "".join([" "] * d) + sent
|
531 |
+
return sent, hstart + d, hend + d
|
532 |
+
|
533 |
+
def destroy(self, *e):
|
534 |
+
if self.top is None:
|
535 |
+
return
|
536 |
+
self.top.after_cancel(self.after)
|
537 |
+
self.top.destroy()
|
538 |
+
self.top = None
|
539 |
+
|
540 |
+
def clear_all(self):
|
541 |
+
self.query_box.delete(0, END)
|
542 |
+
self.model.reset_query()
|
543 |
+
self.clear_results_box()
|
544 |
+
|
545 |
+
def clear_results_box(self):
|
546 |
+
self.results_box["state"] = "normal"
|
547 |
+
self.results_box.delete("1.0", END)
|
548 |
+
self.results_box["state"] = "disabled"
|
549 |
+
|
550 |
+
def freeze_editable(self):
|
551 |
+
self.query_box["state"] = "disabled"
|
552 |
+
self.search_button["state"] = "disabled"
|
553 |
+
self.prev["state"] = "disabled"
|
554 |
+
self.next["state"] = "disabled"
|
555 |
+
|
556 |
+
def unfreeze_editable(self):
|
557 |
+
self.query_box["state"] = "normal"
|
558 |
+
self.search_button["state"] = "normal"
|
559 |
+
self.set_paging_button_states()
|
560 |
+
|
561 |
+
def set_paging_button_states(self):
|
562 |
+
if self.current_page == 0 or self.current_page == 1:
|
563 |
+
self.prev["state"] = "disabled"
|
564 |
+
else:
|
565 |
+
self.prev["state"] = "normal"
|
566 |
+
if self.model.has_more_pages(self.current_page):
|
567 |
+
self.next["state"] = "normal"
|
568 |
+
else:
|
569 |
+
self.next["state"] = "disabled"
|
570 |
+
|
571 |
+
def fire_event(self, event):
|
572 |
+
# Firing an event so that rendering of widgets happen in the mainloop thread
|
573 |
+
self.top.event_generate(event, when="tail")
|
574 |
+
|
575 |
+
def mainloop(self, *args, **kwargs):
|
576 |
+
if in_idle():
|
577 |
+
return
|
578 |
+
self.top.mainloop(*args, **kwargs)
|
579 |
+
|
580 |
+
|
581 |
+
class ConcordanceSearchModel:
|
582 |
+
def __init__(self, queue):
|
583 |
+
self.queue = queue
|
584 |
+
self.CORPORA = _CORPORA
|
585 |
+
self.DEFAULT_CORPUS = _DEFAULT
|
586 |
+
self.selected_corpus = None
|
587 |
+
self.reset_query()
|
588 |
+
self.reset_results()
|
589 |
+
self.result_count = None
|
590 |
+
self.last_sent_searched = 0
|
591 |
+
|
592 |
+
def non_default_corpora(self):
|
593 |
+
copy = []
|
594 |
+
copy.extend(list(self.CORPORA.keys()))
|
595 |
+
copy.remove(self.DEFAULT_CORPUS)
|
596 |
+
copy.sort()
|
597 |
+
return copy
|
598 |
+
|
599 |
+
def load_corpus(self, name):
|
600 |
+
self.selected_corpus = name
|
601 |
+
self.tagged_sents = []
|
602 |
+
runner_thread = self.LoadCorpus(name, self)
|
603 |
+
runner_thread.start()
|
604 |
+
|
605 |
+
def search(self, query, page):
|
606 |
+
self.query = query
|
607 |
+
self.last_requested_page = page
|
608 |
+
self.SearchCorpus(self, page, self.result_count).start()
|
609 |
+
|
610 |
+
def next(self, page):
|
611 |
+
self.last_requested_page = page
|
612 |
+
if len(self.results) < page:
|
613 |
+
self.search(self.query, page)
|
614 |
+
else:
|
615 |
+
self.queue.put(SEARCH_TERMINATED_EVENT)
|
616 |
+
|
617 |
+
def prev(self, page):
|
618 |
+
self.last_requested_page = page
|
619 |
+
self.queue.put(SEARCH_TERMINATED_EVENT)
|
620 |
+
|
621 |
+
def reset_results(self):
|
622 |
+
self.last_sent_searched = 0
|
623 |
+
self.results = []
|
624 |
+
self.last_page = None
|
625 |
+
|
626 |
+
def reset_query(self):
|
627 |
+
self.query = None
|
628 |
+
|
629 |
+
def set_results(self, page, resultset):
|
630 |
+
self.results.insert(page - 1, resultset)
|
631 |
+
|
632 |
+
def get_results(self):
|
633 |
+
return self.results[self.last_requested_page - 1]
|
634 |
+
|
635 |
+
def has_more_pages(self, page):
|
636 |
+
if self.results == [] or self.results[0] == []:
|
637 |
+
return False
|
638 |
+
if self.last_page is None:
|
639 |
+
return True
|
640 |
+
return page < self.last_page
|
641 |
+
|
642 |
+
class LoadCorpus(threading.Thread):
|
643 |
+
def __init__(self, name, model):
|
644 |
+
threading.Thread.__init__(self)
|
645 |
+
self.model, self.name = model, name
|
646 |
+
|
647 |
+
def run(self):
|
648 |
+
try:
|
649 |
+
ts = self.model.CORPORA[self.name]()
|
650 |
+
self.model.tagged_sents = [
|
651 |
+
" ".join(w + "/" + t for (w, t) in sent) for sent in ts
|
652 |
+
]
|
653 |
+
self.model.queue.put(CORPUS_LOADED_EVENT)
|
654 |
+
except Exception as e:
|
655 |
+
print(e)
|
656 |
+
self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
|
657 |
+
|
658 |
+
class SearchCorpus(threading.Thread):
|
659 |
+
def __init__(self, model, page, count):
|
660 |
+
self.model, self.count, self.page = model, count, page
|
661 |
+
threading.Thread.__init__(self)
|
662 |
+
|
663 |
+
def run(self):
|
664 |
+
q = self.processed_query()
|
665 |
+
sent_pos, i, sent_count = [], 0, 0
|
666 |
+
for sent in self.model.tagged_sents[self.model.last_sent_searched :]:
|
667 |
+
try:
|
668 |
+
m = re.search(q, sent)
|
669 |
+
except re.error:
|
670 |
+
self.model.reset_results()
|
671 |
+
self.model.queue.put(SEARCH_ERROR_EVENT)
|
672 |
+
return
|
673 |
+
if m:
|
674 |
+
sent_pos.append((sent, m.start(), m.end()))
|
675 |
+
i += 1
|
676 |
+
if i > self.count:
|
677 |
+
self.model.last_sent_searched += sent_count - 1
|
678 |
+
break
|
679 |
+
sent_count += 1
|
680 |
+
if self.count >= len(sent_pos):
|
681 |
+
self.model.last_sent_searched += sent_count - 1
|
682 |
+
self.model.last_page = self.page
|
683 |
+
self.model.set_results(self.page, sent_pos)
|
684 |
+
else:
|
685 |
+
self.model.set_results(self.page, sent_pos[:-1])
|
686 |
+
self.model.queue.put(SEARCH_TERMINATED_EVENT)
|
687 |
+
|
688 |
+
def processed_query(self):
|
689 |
+
new = []
|
690 |
+
for term in self.model.query.split():
|
691 |
+
term = re.sub(r"\.", r"[^/ ]", term)
|
692 |
+
if re.match("[A-Z]+$", term):
|
693 |
+
new.append(BOUNDARY + WORD_OR_TAG + "/" + term + BOUNDARY)
|
694 |
+
elif "/" in term:
|
695 |
+
new.append(BOUNDARY + term + BOUNDARY)
|
696 |
+
else:
|
697 |
+
new.append(BOUNDARY + term + "/" + WORD_OR_TAG + BOUNDARY)
|
698 |
+
return " ".join(new)
|
699 |
+
|
700 |
+
|
701 |
+
def app():
|
702 |
+
d = ConcordanceSearchView()
|
703 |
+
d.mainloop()
|
704 |
+
|
705 |
+
|
706 |
+
if __name__ == "__main__":
|
707 |
+
app()
|
708 |
+
|
709 |
+
__all__ = ["app"]
|
pipeline/nltk/app/nemo_app.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Finding (and Replacing) Nemo, Version 1.1, Aristide Grange 2006/06/06
|
2 |
+
# https://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496783
|
3 |
+
|
4 |
+
"""
|
5 |
+
Finding (and Replacing) Nemo
|
6 |
+
|
7 |
+
Instant Regular Expressions
|
8 |
+
Created by Aristide Grange
|
9 |
+
"""
|
10 |
+
import itertools
|
11 |
+
import re
|
12 |
+
from tkinter import SEL_FIRST, SEL_LAST, Frame, Label, PhotoImage, Scrollbar, Text, Tk
|
13 |
+
|
14 |
+
windowTitle = "Finding (and Replacing) Nemo"
|
15 |
+
initialFind = r"n(.*?)e(.*?)m(.*?)o"
|
16 |
+
initialRepl = r"M\1A\2K\3I"
|
17 |
+
initialText = """\
|
18 |
+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
19 |
+
Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
|
20 |
+
Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
|
21 |
+
Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
|
22 |
+
"""
|
23 |
+
images = {
|
24 |
+
"FIND": "R0lGODlhMAAiAPcAMf/////37//35//n1v97Off///f/9/f37/fexvfOvfeEQvd7QvdrQvdrKfdaKfdSMfdSIe/v9+/v7+/v5+/n3u/e1u/Wxu/Gre+1lO+tnO+thO+Ua+97Y+97Oe97Me9rOe9rMe9jOe9jMe9jIe9aMefe5+fe3ufezuece+eEWudzQudaIedSIedKMedKIedCKedCId7e1t7Wzt7Oxt7Gvd69vd69rd61pd6ljN6UjN6Ue96EY95zY95rUt5rQt5jMd5SId5KIdbn59be3tbGztbGvda1rdaEa9Z7a9Z7WtZzQtZzOdZzMdZjMdZaQtZSOdZSMdZKMdZCKdZCGNY5Ic7W1s7Oxs7Gtc69xs69tc69rc6tpc6llM6clM6cjM6Ue86EY85zWs5rSs5SKc5KKc5KGMa1tcatrcalvcalnMaUpcZ7c8ZzMcZrUsZrOcZrMcZaQsZSOcZSMcZKMcZCKcZCGMYxIcYxGL3Gxr21tb21rb2lpb2crb2cjL2UnL2UlL2UhL2Ec717Wr17Ur1zWr1rMb1jUr1KMb1KIb1CIb0xGLWlrbWlpbWcnLWEe7V7c7VzY7VzUrVSKbVKMbVCMbVCIbU5KbUxIbUxEK2lta2lpa2clK2UjK2MnK2MlK2Ea617e61za61rY61rMa1jSq1aUq1aSq1SQq1KKa0xEKWlnKWcnKWUnKWUhKWMjKWEa6Vza6VrWqVjMaVaUqVaKaVSMaVCMaU5KaUxIaUxGJyclJyMe5yElJyEhJx7e5x7c5xrOZxaQpxSOZxKQpw5IZSMhJSEjJR7c5Rre5RrY5RrUpRSQpRSKZRCOZRCKZQxKZQxIYyEhIx7hIxza4xzY4xrc4xjUoxaa4xaUoxSSoxKQoxCMYw5GIR7c4Rzc4Rre4RjY4RjWoRaa4RSWoRSUoRSMYRKQoRCOYQ5KYQxIXtra3taY3taSntKOXtCMXtCKXNCMXM5MXMxIWtSUmtKSmtKQmtCOWs5MWs5KWs5IWNCKWMxIVIxKUIQCDkhGAAAACH+AS4ALAAAAAAwACIAAAj/AAEIHEiwoMGDCBMqXMiwoUOHMqxIeEiRoZVp7cpZ29WrF4WKIAd208dGAQEVbiTVChUjZMU9+pYQmPmBZpxgvVw+nDdKwQICNVcIXQEkTgKdDdUJ+/nggVAXK1xI3TEA6UIr2uJ8iBqka1cXXTlkqGoVYRZ7iLyqBSs0iiEtZQVKiDGxBI1u3NR6lUpGDKg8MSgEQCphU7Z22vhg0dILXRCpYLuSCcYJT4wqXASBQaBzU7klHxC127OHD7ZDJFpERqRt0x5OnwQpmZmCLEhrbgg4WIHO1RY+nbQ9WRGEDJlmnXwJ+9FBgXMCIzYMVijBBgYMFxIMqJBMSc0Ht7qh/+Gjpte2rnYsYeNlasWIBgQ6yCewIoPCCp/cyP/wgUGbXVu0QcADZNBDnh98gHMLGXYQUw02w61QU3wdbNWDbQVVIIhMMwFF1DaZiPLBAy7E04kafrjSizaK3LFNNc0AAYRQDsAHHQlJ2IDQJ2zE1+EKDjiAijShkECCC8Qgw4cr7ZgyzC2WaHPNLWWoNeNWPiRAw0QFWQFMhz8C+QQ20yAiVSrY+MGOJCsccsst2GCzoHFxxEGGC+8hgs0MB2kyCpgzrUDCbs1Es41UdtATHFFkWELMOtsoQsYcgvRRQw5RSDgGOjZMR1AvPQIq6KCo9AKOJWDd48owQlHR4DXEKP9iyRrK+DNNBTu4RwIPFeTAGUG7hAomkA84gEg1m6ADljy9PBKGGJY4ig0xlsTBRSn98FOFDUC8pwQOPkgHbCGAzhTkA850s0c7j6Hjix9+gBIrMXLeAccWXUCyiRBcBEECdEJ98KtAqtBCYQc/OvDENnl4gYpUxISCIjjzylkGGV9okYUVNogRhAOBuuAEhjG08wOgDYzAgA5bCjIoCe5uwUk80RKTTSppPREGGGCIISOQ9AXBg6cC6WIywvCpoMHAocRBwhP4bHLFLujYkV42xNxBRhAyGrc113EgYtRBerDDDHMoDCyQEL5sE083EkgwQyBhxGFHMM206DUixGxmE0wssbQjCQ4JCaFKFwgQTVAVVhQUwAVPIFJKrHfYYRwi6OCDzzuIJIFhXAD0EccPsYRiSyqKSDpFcWSMIcZRoBMkQyA2BGZDIKSYcggih8TRRg4VxM5QABVYYLxgwiev/PLMCxQQADs=",
|
25 |
+
"find": "R0lGODlhMAAiAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OSkpKRgYGAAAAAAAAAAAAAAAAAAAACH+AS4ALAAAAAAwACIAAAX/ICCOZGmeaKquY2AGLiuvMCAUBuHWc48Kh0iFInEYCb4kSQCxPBiMxkMigRQEgJiSFVBYHNGG0RiZOHjblWAiiY4fkDhEYoBp06dAWfyAQyKAgAwDaHgnB0RwgYASgQ0IhDuGJDAIFhMRVFSLEX8QCJJ4AQM5AgQHTZqqjBAOCQQEkWkCDRMUFQsICQ4Vm5maEwwHOAsPDTpKMAsUDlO4CssTcb+2DAp8YGCyNFoCEsZwFQ3QDRTTVBRS0g1QbgsCd5QAAwgIBwYFAwStzQ8UEdCKVchky0yVBw7YuXkAKt4IAg74vXHVagqFBRgXSCAyYWAVCH0SNhDTitCJfSL5/4RbAPKPhQYYjVCYYAvCP0BxEDaD8CheAAHNwqh8MMGPSwgLeJWhwHSjqkYI+xg4MMCEgQjtRvZ7UAYCpghMF7CxONOWJkYR+rCpY4JlVpVxKDwYWEactKW9mhYRtqCTgwgWEMArERSK1j5q//6T8KXonFsShpiJkAECgQYVjykooCVA0JGHEWNiYCHThTFeb3UkoiCCBgwGEKQ1kuAJlhFwhA71h5SukwUM5qqeCSGBgicEWkfNiWSERtBad4JNIBaQBaQah1ToyGZBAnsIuIJs1qnqiAIVjIE2gnAB1T5x0icgzXT79ipgMOOEH6HBbREBMJCeGEY08IoLAkzB1YYFwjxwSUGSNULQJnNUwRYlCcyEkALIxECAP9cNMMABYpRhy3ZsSLDaR70oUAiABGCkAxowCGCAAfDYIQACXoElGRsdXWDBdg2Y90IWktDYGYAB9PWHP0PMdFZaF07SQgAFNDAMAQg0QA1UC8xoZQl22JGFPgWkOUCOL1pZQyhjxinnnCWEAAA7",
|
26 |
+
"REPL": "R0lGODlhMAAjAPcAMf/////3//+lOf+UKf+MEPf///f39/f35/fv7/ecQvecOfecKfeUIfeUGPeUEPeUCPeMAO/37+/v9+/v3u/n3u/n1u+9jO+9c++1hO+ta++tY++tWu+tUu+tSu+lUu+lQu+lMe+UMe+UKe+UGO+UEO+UAO+MCOfv5+fvxufn7+fn5+fnzue9lOe9c+e1jOe1e+e1c+e1a+etWuetUuelQuecOeeUUueUCN7e597e3t7e1t7ezt7evd7Wzt7Oxt7Ovd7Otd7Opd7OnN7Gtd7Gpd69lN61hN6ta96lStbextberdbW3tbWztbWxtbOvdbOrda1hNalUtaECM7W1s7Ozs7Oxs7Otc7Gxs7Gvc69tc69rc69pc61jM6lc8bWlMbOvcbGxsbGpca9tca9pca1nMaMAL3OhL3Gtb21vb21tb2tpb2tnL2tlLW9tbW9pbW9e7W1pbWtjLWcKa21nK2tra2tnK2tlK2lpa2llK2ljK2le6WlnKWljKWUe6WUc6WUY5y1QpyclJycjJychJyUc5yMY5StY5SUe5SMhJSMe5SMc5SMWpSEa5SESoyUe4yMhIyEY4SlKYScWoSMe4SEe4SEa4R7c4R7Y3uMY3uEe3t7e3t7c3tza3tzY3trKXtjIXOcAHOUMXOEY3Nzc3NzWnNrSmulCGuUMWuMGGtzWmtrY2taMWtaGGOUOWOMAGNzUmNjWmNjSmNaUmNaQmNaOWNaIWNSCFqcAFpjUlpSMVpSIVpSEFpKKVKMAFJSUlJSSlJSMVJKMVJKGFJKAFI5CEqUAEqEAEpzQkpKIUpCQkpCGEpCAEo5EEoxAEJjOUJCOUJCAEI5IUIxADl7ADlaITlCOTkxMTkxKTkxEDkhADFzADFrGDE5OTExADEpEClrCCkxKSkpKSkpISkpACkhCCkhACkYACFzACFrACEhCCEYGBhjEBhjABghABgYCBgYABgQEBgQABAQABAIAAhjAAhSAAhKAAgIEAgICABaAABCAAAhAAAQAAAIAAAAAAAAACH+AS4ALAAAAAAwACMAAAj/AAEIHEiwoMGDCBMqXMiwocOHAA4cgEixIIIJO3JMmAjADIqKFU/8MHIkg5EgYXx4iaTkI0iHE6wE2TCggYILQayEAgXIy8uGCKz8sDCAQAMRG3iEcXULlJkJPwli3OFjh9UdYYLE6NBhA04UXHoVA2XoTZgfPKBWlOBDphAWOdfMcfMDLloeO3hIMjbWVCQ5Fn6E2UFxgpsgFjYIEBADrZU6luqEEfqjTqpt54z1uuWqTIcgWAk7PECGzIUQDRosDmxlUrVJkwQJkqVuX71v06YZcyUlROAdbnLAJKPFyAYFAhoMwFlnEh0rWkpz8raPHm7dqKKc/KFFkBUrVn1M/ziBcEIeLUEQI8/AYk0i9Be4sqjsrN66c9/OnbobhpR3HkIUoZ0WVnBE0AGLFKKFD0HAFUQe77HQgQI1hRBDEHMcY0899bBzihZuCPILJD8EccEGGzwAQhFaUHHQH82sUkgeNHISDBk8WCCCcsqFUEQWmOyzjz3sUGNNOO5Y48YOEgowAAQhnBScQV00k82V47jzjy9CXZBcjziFoco//4CDiSOyhPMPLkJZkEBqJmRQxA9uZGEQD8Ncmc044/zzDF2IZQBCCDYE8QMZz/iiCSx0neHGI7BIhhhNn+1gxRpokEcQAp7seWU7/PwTyxqG/iCEEVzQmUombnDRxRExzP9nBR2PCKLFD3UJwcMPa/SRqUGNWJmNOVn+M44ukMRB4KGcWDNLVhuUMEIJAlzwA3DJBHMJIXm4sQYhqyxCRQQGLSIsn1qac2UzysQSyzX/hLMGD0F0IMCODYAQBA9W/PKPOcRiw0wzwxTiokF9dLMnuv/Mo+fCZF7jBr0xbDDCACWEYKgb1vzjDp/jZNOMLX0IZxAKq2TZTjtaOjwOsXyG+s8sZJTIQsUdIGHoJPf8w487QI/TDSt5mGwQFZxc406o8HiDJchk/ltLHpSlJwSvz5DpTjvmuGNOM57koelBOaAhiCaaPBLL0wwbm003peRBnBZqJMJL1ECz/HXYYx/NdAIOOVCxQyLorswymU93o0wuwfAiTDNR/xz0MLXU0XdCE+UwSTRZAq2lsSATu+4wkGvt+TjNzPLrQyegAUku2Hij5cd8LhxyM8QIg4w18HgcdC6BTBFSDmfQqsovttveDcG7lFLHI75cE841sARCxeWsnxC4G9HADPK6ywzDCRqBo0EHHWhMgT1IJzziNci1N7PMKnSYfML96/90AiJKey/0KtbLX1QK0rrNnQ541xugQ7SHhkXBghN0SKACWRc4KlAhBwKcIOYymJCAAAA7",
|
27 |
+
"repl": "R0lGODlhMAAjAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OTExMSkpKSEhIRgYGBAQEAgICAAAACH+AS4ALAAAAAAwACMAAAX/ICCOZGmeaKqubOu+gCDANBkIQ1EMQhAghFptYEAkEgjEwXBo7ISvweGgWCwUysPjwTgEoCafTySYIhYMxgLBjEQgCULvCw0QdAZdoVhUIJUFChISEAxYeQM1N1OMTAp+UwZ5eA4TEhFbDWYFdC4ECVMJjwl5BwsQa0umEhUVlhESDgqlBp0rAn5nVpBMDxeZDRQbHBgWFBSWDgtLBnFjKwRYCI9VqQsPs0YKEcMXFq0UEalFDWx4BAO2IwPjppAKDkrTWKYUGd7fEJJFEZpM00cOzCgh4EE8SaoWxKNixQooBRMyZMBwAYIRBhUgLDGS4MoBJeoANMhAgQsaCRZm/5lqaCUJhA4cNHjDoKEDBlJUHqkBlYBTiQUZNGjYMMxDhY3VWk6R4MEDBoMUak5AqoYBqANIBo4wcGGDUKIeLlzVZmWJggsVIkwAZaQSA3kdZzlKkIiEAAlDvW5oOkEBs488JTw44oeUIwdvVTFTUK7uiAAPgubt8GFDhQepqETAQCFU1UMGzlqAgFhUsAcCS0AO6lUDhw8xNRSbENGDhgWSHjWUe6ACbKITizmopZoBa6KvOwj9uuHDhwxyj3xekgDDhw5EvWKo0IB4iQLCOCC/njc7ZQ8UeGvza+ABZZgcxJNc4FO1gc0cOsCUrHevc8tdIMTIAhc4F198G2Qwwd8CBIQUAwEINABBBJUwR9R5wElgVRLwWODBBx4cGB8GEzDQIAo33CGJA8gh+JoH/clUgQU0YvDhdfmJdwEFC6Sjgg8yEPAABsPkh2F22cl2AQbn6QdTghTQ5eAJAQyQAAQV0MSBB9gRVZ4GE1mw5JZOAmiAVi1UWcAZDrDyZXYTeaOhA/bIVuIBPtKQ4h7ViYekUPdcEAEbzTzCRp5CADmAAwj+ORGPBcgwAAHo9ABGCYtm0ChwFHShlRiXhmHlkAcCiOeUodqQw5W0oXLAiamy4MOkjOyAaqxUymApDCEAADs=",
|
28 |
+
}
|
29 |
+
colors = ["#FF7B39", "#80F121"]
|
30 |
+
emphColors = ["#DAFC33", "#F42548"]
|
31 |
+
fieldParams = {
|
32 |
+
"height": 3,
|
33 |
+
"width": 70,
|
34 |
+
"font": ("monaco", 14),
|
35 |
+
"highlightthickness": 0,
|
36 |
+
"borderwidth": 0,
|
37 |
+
"background": "white",
|
38 |
+
}
|
39 |
+
textParams = {
|
40 |
+
"bg": "#F7E0D4",
|
41 |
+
"fg": "#2321F1",
|
42 |
+
"highlightthickness": 0,
|
43 |
+
"width": 1,
|
44 |
+
"height": 10,
|
45 |
+
"font": ("verdana", 16),
|
46 |
+
"wrap": "word",
|
47 |
+
}
|
48 |
+
|
49 |
+
|
50 |
+
class Zone:
|
51 |
+
def __init__(self, image, initialField, initialText):
|
52 |
+
frm = Frame(root)
|
53 |
+
frm.config(background="white")
|
54 |
+
self.image = PhotoImage(format="gif", data=images[image.upper()])
|
55 |
+
self.imageDimmed = PhotoImage(format="gif", data=images[image])
|
56 |
+
self.img = Label(frm)
|
57 |
+
self.img.config(borderwidth=0)
|
58 |
+
self.img.pack(side="left")
|
59 |
+
self.fld = Text(frm, **fieldParams)
|
60 |
+
self.initScrollText(frm, self.fld, initialField)
|
61 |
+
frm = Frame(root)
|
62 |
+
self.txt = Text(frm, **textParams)
|
63 |
+
self.initScrollText(frm, self.txt, initialText)
|
64 |
+
for i in range(2):
|
65 |
+
self.txt.tag_config(colors[i], background=colors[i])
|
66 |
+
self.txt.tag_config("emph" + colors[i], foreground=emphColors[i])
|
67 |
+
|
68 |
+
def initScrollText(self, frm, txt, contents):
|
69 |
+
scl = Scrollbar(frm)
|
70 |
+
scl.config(command=txt.yview)
|
71 |
+
scl.pack(side="right", fill="y")
|
72 |
+
txt.pack(side="left", expand=True, fill="x")
|
73 |
+
txt.config(yscrollcommand=scl.set)
|
74 |
+
txt.insert("1.0", contents)
|
75 |
+
frm.pack(fill="x")
|
76 |
+
Frame(height=2, bd=1, relief="ridge").pack(fill="x")
|
77 |
+
|
78 |
+
def refresh(self):
|
79 |
+
self.colorCycle = itertools.cycle(colors)
|
80 |
+
try:
|
81 |
+
self.substitute()
|
82 |
+
self.img.config(image=self.image)
|
83 |
+
except re.error:
|
84 |
+
self.img.config(image=self.imageDimmed)
|
85 |
+
|
86 |
+
|
87 |
+
class FindZone(Zone):
|
88 |
+
def addTags(self, m):
|
89 |
+
color = next(self.colorCycle)
|
90 |
+
self.txt.tag_add(color, "1.0+%sc" % m.start(), "1.0+%sc" % m.end())
|
91 |
+
try:
|
92 |
+
self.txt.tag_add(
|
93 |
+
"emph" + color, "1.0+%sc" % m.start("emph"), "1.0+%sc" % m.end("emph")
|
94 |
+
)
|
95 |
+
except:
|
96 |
+
pass
|
97 |
+
|
98 |
+
def substitute(self, *args):
|
99 |
+
for color in colors:
|
100 |
+
self.txt.tag_remove(color, "1.0", "end")
|
101 |
+
self.txt.tag_remove("emph" + color, "1.0", "end")
|
102 |
+
self.rex = re.compile("") # default value in case of malformed regexp
|
103 |
+
self.rex = re.compile(self.fld.get("1.0", "end")[:-1], re.MULTILINE)
|
104 |
+
try:
|
105 |
+
re.compile("(?P<emph>%s)" % self.fld.get(SEL_FIRST, SEL_LAST))
|
106 |
+
self.rexSel = re.compile(
|
107 |
+
"%s(?P<emph>%s)%s"
|
108 |
+
% (
|
109 |
+
self.fld.get("1.0", SEL_FIRST),
|
110 |
+
self.fld.get(SEL_FIRST, SEL_LAST),
|
111 |
+
self.fld.get(SEL_LAST, "end")[:-1],
|
112 |
+
),
|
113 |
+
re.MULTILINE,
|
114 |
+
)
|
115 |
+
except:
|
116 |
+
self.rexSel = self.rex
|
117 |
+
self.rexSel.sub(self.addTags, self.txt.get("1.0", "end"))
|
118 |
+
|
119 |
+
|
120 |
+
class ReplaceZone(Zone):
|
121 |
+
def addTags(self, m):
|
122 |
+
s = sz.rex.sub(self.repl, m.group())
|
123 |
+
self.txt.delete(
|
124 |
+
"1.0+%sc" % (m.start() + self.diff), "1.0+%sc" % (m.end() + self.diff)
|
125 |
+
)
|
126 |
+
self.txt.insert("1.0+%sc" % (m.start() + self.diff), s, next(self.colorCycle))
|
127 |
+
self.diff += len(s) - (m.end() - m.start())
|
128 |
+
|
129 |
+
def substitute(self):
|
130 |
+
self.txt.delete("1.0", "end")
|
131 |
+
self.txt.insert("1.0", sz.txt.get("1.0", "end")[:-1])
|
132 |
+
self.diff = 0
|
133 |
+
self.repl = rex0.sub(r"\\g<\1>", self.fld.get("1.0", "end")[:-1])
|
134 |
+
sz.rex.sub(self.addTags, sz.txt.get("1.0", "end")[:-1])
|
135 |
+
|
136 |
+
|
137 |
+
def launchRefresh(_):
|
138 |
+
sz.fld.after_idle(sz.refresh)
|
139 |
+
rz.fld.after_idle(rz.refresh)
|
140 |
+
|
141 |
+
|
142 |
+
def app():
|
143 |
+
global root, sz, rz, rex0
|
144 |
+
root = Tk()
|
145 |
+
root.resizable(height=False, width=True)
|
146 |
+
root.title(windowTitle)
|
147 |
+
root.minsize(width=250, height=0)
|
148 |
+
sz = FindZone("find", initialFind, initialText)
|
149 |
+
sz.fld.bind("<Button-1>", launchRefresh)
|
150 |
+
sz.fld.bind("<ButtonRelease-1>", launchRefresh)
|
151 |
+
sz.fld.bind("<B1-Motion>", launchRefresh)
|
152 |
+
sz.rexSel = re.compile("")
|
153 |
+
rz = ReplaceZone("repl", initialRepl, "")
|
154 |
+
rex0 = re.compile(r"(?<!\\)\\([0-9]+)")
|
155 |
+
root.bind_all("<Key>", launchRefresh)
|
156 |
+
launchRefresh(None)
|
157 |
+
root.mainloop()
|
158 |
+
|
159 |
+
|
160 |
+
if __name__ == "__main__":
|
161 |
+
app()
|
162 |
+
|
163 |
+
__all__ = ["app"]
|
pipeline/nltk/app/rdparser_app.py
ADDED
@@ -0,0 +1,1052 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Recursive Descent Parser Application
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Edward Loper <[email protected]>
|
5 |
+
# URL: <https://www.nltk.org/>
|
6 |
+
# For license information, see LICENSE.TXT
|
7 |
+
|
8 |
+
"""
|
9 |
+
A graphical tool for exploring the recursive descent parser.
|
10 |
+
|
11 |
+
The recursive descent parser maintains a tree, which records the
|
12 |
+
structure of the portion of the text that has been parsed. It uses
|
13 |
+
CFG productions to expand the fringe of the tree, and matches its
|
14 |
+
leaves against the text. Initially, the tree contains the start
|
15 |
+
symbol ("S"). It is shown in the main canvas, to the right of the
|
16 |
+
list of available expansions.
|
17 |
+
|
18 |
+
The parser builds up a tree structure for the text using three
|
19 |
+
operations:
|
20 |
+
|
21 |
+
- "expand" uses a CFG production to add children to a node on the
|
22 |
+
fringe of the tree.
|
23 |
+
- "match" compares a leaf in the tree to a text token.
|
24 |
+
- "backtrack" returns the tree to its state before the most recent
|
25 |
+
expand or match operation.
|
26 |
+
|
27 |
+
The parser maintains a list of tree locations called a "frontier" to
|
28 |
+
remember which nodes have not yet been expanded and which leaves have
|
29 |
+
not yet been matched against the text. The leftmost frontier node is
|
30 |
+
shown in green, and the other frontier nodes are shown in blue. The
|
31 |
+
parser always performs expand and match operations on the leftmost
|
32 |
+
element of the frontier.
|
33 |
+
|
34 |
+
You can control the parser's operation by using the "expand," "match,"
|
35 |
+
and "backtrack" buttons; or you can use the "step" button to let the
|
36 |
+
parser automatically decide which operation to apply. The parser uses
|
37 |
+
the following rules to decide which operation to apply:
|
38 |
+
|
39 |
+
- If the leftmost frontier element is a token, try matching it.
|
40 |
+
- If the leftmost frontier element is a node, try expanding it with
|
41 |
+
the first untried expansion.
|
42 |
+
- Otherwise, backtrack.
|
43 |
+
|
44 |
+
The "expand" button applies the untried expansion whose CFG production
|
45 |
+
is listed earliest in the grammar. To manually choose which expansion
|
46 |
+
to apply, click on a CFG production from the list of available
|
47 |
+
expansions, on the left side of the main window.
|
48 |
+
|
49 |
+
The "autostep" button will let the parser continue applying
|
50 |
+
applications to the tree until it reaches a complete parse. You can
|
51 |
+
cancel an autostep in progress at any time by clicking on the
|
52 |
+
"autostep" button again.
|
53 |
+
|
54 |
+
Keyboard Shortcuts::
|
55 |
+
[Space]\t Perform the next expand, match, or backtrack operation
|
56 |
+
[a]\t Step through operations until the next complete parse
|
57 |
+
[e]\t Perform an expand operation
|
58 |
+
[m]\t Perform a match operation
|
59 |
+
[b]\t Perform a backtrack operation
|
60 |
+
[Delete]\t Reset the parser
|
61 |
+
[g]\t Show/hide available expansions list
|
62 |
+
[h]\t Help
|
63 |
+
[Ctrl-p]\t Print
|
64 |
+
[q]\t Quit
|
65 |
+
"""
|
66 |
+
|
67 |
+
from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
|
68 |
+
from tkinter.font import Font
|
69 |
+
|
70 |
+
from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
|
71 |
+
from nltk.draw.util import CanvasFrame, EntryDialog, ShowText, TextWidget
|
72 |
+
from nltk.parse import SteppingRecursiveDescentParser
|
73 |
+
from nltk.tree import Tree
|
74 |
+
from nltk.util import in_idle
|
75 |
+
|
76 |
+
|
77 |
+
class RecursiveDescentApp:
|
78 |
+
"""
|
79 |
+
A graphical tool for exploring the recursive descent parser. The tool
|
80 |
+
displays the parser's tree and the remaining text, and allows the
|
81 |
+
user to control the parser's operation. In particular, the user
|
82 |
+
can expand subtrees on the frontier, match tokens on the frontier
|
83 |
+
against the text, and backtrack. A "step" button simply steps
|
84 |
+
through the parsing process, performing the operations that
|
85 |
+
``RecursiveDescentParser`` would use.
|
86 |
+
"""
|
87 |
+
|
88 |
+
def __init__(self, grammar, sent, trace=0):
|
89 |
+
self._sent = sent
|
90 |
+
self._parser = SteppingRecursiveDescentParser(grammar, trace)
|
91 |
+
|
92 |
+
# Set up the main window.
|
93 |
+
self._top = Tk()
|
94 |
+
self._top.title("Recursive Descent Parser Application")
|
95 |
+
|
96 |
+
# Set up key bindings.
|
97 |
+
self._init_bindings()
|
98 |
+
|
99 |
+
# Initialize the fonts.
|
100 |
+
self._init_fonts(self._top)
|
101 |
+
|
102 |
+
# Animations. animating_lock is a lock to prevent the demo
|
103 |
+
# from performing new operations while it's animating.
|
104 |
+
self._animation_frames = IntVar(self._top)
|
105 |
+
self._animation_frames.set(5)
|
106 |
+
self._animating_lock = 0
|
107 |
+
self._autostep = 0
|
108 |
+
|
109 |
+
# The user can hide the grammar.
|
110 |
+
self._show_grammar = IntVar(self._top)
|
111 |
+
self._show_grammar.set(1)
|
112 |
+
|
113 |
+
# Create the basic frames.
|
114 |
+
self._init_menubar(self._top)
|
115 |
+
self._init_buttons(self._top)
|
116 |
+
self._init_feedback(self._top)
|
117 |
+
self._init_grammar(self._top)
|
118 |
+
self._init_canvas(self._top)
|
119 |
+
|
120 |
+
# Initialize the parser.
|
121 |
+
self._parser.initialize(self._sent)
|
122 |
+
|
123 |
+
# Resize callback
|
124 |
+
self._canvas.bind("<Configure>", self._configure)
|
125 |
+
|
126 |
+
#########################################
|
127 |
+
## Initialization Helpers
|
128 |
+
#########################################
|
129 |
+
|
130 |
+
def _init_fonts(self, root):
|
131 |
+
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
|
132 |
+
self._sysfont = Font(font=Button()["font"])
|
133 |
+
root.option_add("*Font", self._sysfont)
|
134 |
+
|
135 |
+
# TWhat's our font size (default=same as sysfont)
|
136 |
+
self._size = IntVar(root)
|
137 |
+
self._size.set(self._sysfont.cget("size"))
|
138 |
+
|
139 |
+
self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
|
140 |
+
self._font = Font(family="helvetica", size=self._size.get())
|
141 |
+
if self._size.get() < 0:
|
142 |
+
big = self._size.get() - 2
|
143 |
+
else:
|
144 |
+
big = self._size.get() + 2
|
145 |
+
self._bigfont = Font(family="helvetica", weight="bold", size=big)
|
146 |
+
|
147 |
+
def _init_grammar(self, parent):
|
148 |
+
# Grammar view.
|
149 |
+
self._prodframe = listframe = Frame(parent)
|
150 |
+
self._prodframe.pack(fill="both", side="left", padx=2)
|
151 |
+
self._prodlist_label = Label(
|
152 |
+
self._prodframe, font=self._boldfont, text="Available Expansions"
|
153 |
+
)
|
154 |
+
self._prodlist_label.pack()
|
155 |
+
self._prodlist = Listbox(
|
156 |
+
self._prodframe,
|
157 |
+
selectmode="single",
|
158 |
+
relief="groove",
|
159 |
+
background="white",
|
160 |
+
foreground="#909090",
|
161 |
+
font=self._font,
|
162 |
+
selectforeground="#004040",
|
163 |
+
selectbackground="#c0f0c0",
|
164 |
+
)
|
165 |
+
|
166 |
+
self._prodlist.pack(side="right", fill="both", expand=1)
|
167 |
+
|
168 |
+
self._productions = list(self._parser.grammar().productions())
|
169 |
+
for production in self._productions:
|
170 |
+
self._prodlist.insert("end", (" %s" % production))
|
171 |
+
self._prodlist.config(height=min(len(self._productions), 25))
|
172 |
+
|
173 |
+
# Add a scrollbar if there are more than 25 productions.
|
174 |
+
if len(self._productions) > 25:
|
175 |
+
listscroll = Scrollbar(self._prodframe, orient="vertical")
|
176 |
+
self._prodlist.config(yscrollcommand=listscroll.set)
|
177 |
+
listscroll.config(command=self._prodlist.yview)
|
178 |
+
listscroll.pack(side="left", fill="y")
|
179 |
+
|
180 |
+
# If they select a production, apply it.
|
181 |
+
self._prodlist.bind("<<ListboxSelect>>", self._prodlist_select)
|
182 |
+
|
183 |
+
def _init_bindings(self):
|
184 |
+
# Key bindings are a good thing.
|
185 |
+
self._top.bind("<Control-q>", self.destroy)
|
186 |
+
self._top.bind("<Control-x>", self.destroy)
|
187 |
+
self._top.bind("<Escape>", self.destroy)
|
188 |
+
self._top.bind("e", self.expand)
|
189 |
+
# self._top.bind('<Alt-e>', self.expand)
|
190 |
+
# self._top.bind('<Control-e>', self.expand)
|
191 |
+
self._top.bind("m", self.match)
|
192 |
+
self._top.bind("<Alt-m>", self.match)
|
193 |
+
self._top.bind("<Control-m>", self.match)
|
194 |
+
self._top.bind("b", self.backtrack)
|
195 |
+
self._top.bind("<Alt-b>", self.backtrack)
|
196 |
+
self._top.bind("<Control-b>", self.backtrack)
|
197 |
+
self._top.bind("<Control-z>", self.backtrack)
|
198 |
+
self._top.bind("<BackSpace>", self.backtrack)
|
199 |
+
self._top.bind("a", self.autostep)
|
200 |
+
# self._top.bind('<Control-a>', self.autostep)
|
201 |
+
self._top.bind("<Control-space>", self.autostep)
|
202 |
+
self._top.bind("<Control-c>", self.cancel_autostep)
|
203 |
+
self._top.bind("<space>", self.step)
|
204 |
+
self._top.bind("<Delete>", self.reset)
|
205 |
+
self._top.bind("<Control-p>", self.postscript)
|
206 |
+
# self._top.bind('<h>', self.help)
|
207 |
+
# self._top.bind('<Alt-h>', self.help)
|
208 |
+
self._top.bind("<Control-h>", self.help)
|
209 |
+
self._top.bind("<F1>", self.help)
|
210 |
+
# self._top.bind('<g>', self.toggle_grammar)
|
211 |
+
# self._top.bind('<Alt-g>', self.toggle_grammar)
|
212 |
+
# self._top.bind('<Control-g>', self.toggle_grammar)
|
213 |
+
self._top.bind("<Control-g>", self.edit_grammar)
|
214 |
+
self._top.bind("<Control-t>", self.edit_sentence)
|
215 |
+
|
216 |
+
def _init_buttons(self, parent):
|
217 |
+
# Set up the frames.
|
218 |
+
self._buttonframe = buttonframe = Frame(parent)
|
219 |
+
buttonframe.pack(fill="none", side="bottom", padx=3, pady=2)
|
220 |
+
Button(
|
221 |
+
buttonframe,
|
222 |
+
text="Step",
|
223 |
+
background="#90c0d0",
|
224 |
+
foreground="black",
|
225 |
+
command=self.step,
|
226 |
+
).pack(side="left")
|
227 |
+
Button(
|
228 |
+
buttonframe,
|
229 |
+
text="Autostep",
|
230 |
+
background="#90c0d0",
|
231 |
+
foreground="black",
|
232 |
+
command=self.autostep,
|
233 |
+
).pack(side="left")
|
234 |
+
Button(
|
235 |
+
buttonframe,
|
236 |
+
text="Expand",
|
237 |
+
underline=0,
|
238 |
+
background="#90f090",
|
239 |
+
foreground="black",
|
240 |
+
command=self.expand,
|
241 |
+
).pack(side="left")
|
242 |
+
Button(
|
243 |
+
buttonframe,
|
244 |
+
text="Match",
|
245 |
+
underline=0,
|
246 |
+
background="#90f090",
|
247 |
+
foreground="black",
|
248 |
+
command=self.match,
|
249 |
+
).pack(side="left")
|
250 |
+
Button(
|
251 |
+
buttonframe,
|
252 |
+
text="Backtrack",
|
253 |
+
underline=0,
|
254 |
+
background="#f0a0a0",
|
255 |
+
foreground="black",
|
256 |
+
command=self.backtrack,
|
257 |
+
).pack(side="left")
|
258 |
+
# Replace autostep...
|
259 |
+
|
260 |
+
# self._autostep_button = Button(buttonframe, text='Autostep',
|
261 |
+
# underline=0, command=self.autostep)
|
262 |
+
# self._autostep_button.pack(side='left')
|
263 |
+
|
264 |
+
def _configure(self, event):
|
265 |
+
self._autostep = 0
|
266 |
+
(x1, y1, x2, y2) = self._cframe.scrollregion()
|
267 |
+
y2 = event.height - 6
|
268 |
+
self._canvas["scrollregion"] = "%d %d %d %d" % (x1, y1, x2, y2)
|
269 |
+
self._redraw()
|
270 |
+
|
271 |
+
def _init_feedback(self, parent):
|
272 |
+
self._feedbackframe = feedbackframe = Frame(parent)
|
273 |
+
feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3)
|
274 |
+
self._lastoper_label = Label(
|
275 |
+
feedbackframe, text="Last Operation:", font=self._font
|
276 |
+
)
|
277 |
+
self._lastoper_label.pack(side="left")
|
278 |
+
lastoperframe = Frame(feedbackframe, relief="sunken", border=1)
|
279 |
+
lastoperframe.pack(fill="x", side="right", expand=1, padx=5)
|
280 |
+
self._lastoper1 = Label(
|
281 |
+
lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font
|
282 |
+
)
|
283 |
+
self._lastoper2 = Label(
|
284 |
+
lastoperframe,
|
285 |
+
anchor="w",
|
286 |
+
width=30,
|
287 |
+
foreground="#004040",
|
288 |
+
background="#f0f0f0",
|
289 |
+
font=self._font,
|
290 |
+
)
|
291 |
+
self._lastoper1.pack(side="left")
|
292 |
+
self._lastoper2.pack(side="left", fill="x", expand=1)
|
293 |
+
|
294 |
+
def _init_canvas(self, parent):
|
295 |
+
self._cframe = CanvasFrame(
|
296 |
+
parent,
|
297 |
+
background="white",
|
298 |
+
# width=525, height=250,
|
299 |
+
closeenough=10,
|
300 |
+
border=2,
|
301 |
+
relief="sunken",
|
302 |
+
)
|
303 |
+
self._cframe.pack(expand=1, fill="both", side="top", pady=2)
|
304 |
+
canvas = self._canvas = self._cframe.canvas()
|
305 |
+
|
306 |
+
# Initially, there's no tree or text
|
307 |
+
self._tree = None
|
308 |
+
self._textwidgets = []
|
309 |
+
self._textline = None
|
310 |
+
|
311 |
+
def _init_menubar(self, parent):
|
312 |
+
menubar = Menu(parent)
|
313 |
+
|
314 |
+
filemenu = Menu(menubar, tearoff=0)
|
315 |
+
filemenu.add_command(
|
316 |
+
label="Reset Parser", underline=0, command=self.reset, accelerator="Del"
|
317 |
+
)
|
318 |
+
filemenu.add_command(
|
319 |
+
label="Print to Postscript",
|
320 |
+
underline=0,
|
321 |
+
command=self.postscript,
|
322 |
+
accelerator="Ctrl-p",
|
323 |
+
)
|
324 |
+
filemenu.add_command(
|
325 |
+
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
|
326 |
+
)
|
327 |
+
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
328 |
+
|
329 |
+
editmenu = Menu(menubar, tearoff=0)
|
330 |
+
editmenu.add_command(
|
331 |
+
label="Edit Grammar",
|
332 |
+
underline=5,
|
333 |
+
command=self.edit_grammar,
|
334 |
+
accelerator="Ctrl-g",
|
335 |
+
)
|
336 |
+
editmenu.add_command(
|
337 |
+
label="Edit Text",
|
338 |
+
underline=5,
|
339 |
+
command=self.edit_sentence,
|
340 |
+
accelerator="Ctrl-t",
|
341 |
+
)
|
342 |
+
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
343 |
+
|
344 |
+
rulemenu = Menu(menubar, tearoff=0)
|
345 |
+
rulemenu.add_command(
|
346 |
+
label="Step", underline=1, command=self.step, accelerator="Space"
|
347 |
+
)
|
348 |
+
rulemenu.add_separator()
|
349 |
+
rulemenu.add_command(
|
350 |
+
label="Match", underline=0, command=self.match, accelerator="Ctrl-m"
|
351 |
+
)
|
352 |
+
rulemenu.add_command(
|
353 |
+
label="Expand", underline=0, command=self.expand, accelerator="Ctrl-e"
|
354 |
+
)
|
355 |
+
rulemenu.add_separator()
|
356 |
+
rulemenu.add_command(
|
357 |
+
label="Backtrack", underline=0, command=self.backtrack, accelerator="Ctrl-b"
|
358 |
+
)
|
359 |
+
menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
|
360 |
+
|
361 |
+
viewmenu = Menu(menubar, tearoff=0)
|
362 |
+
viewmenu.add_checkbutton(
|
363 |
+
label="Show Grammar",
|
364 |
+
underline=0,
|
365 |
+
variable=self._show_grammar,
|
366 |
+
command=self._toggle_grammar,
|
367 |
+
)
|
368 |
+
viewmenu.add_separator()
|
369 |
+
viewmenu.add_radiobutton(
|
370 |
+
label="Tiny",
|
371 |
+
variable=self._size,
|
372 |
+
underline=0,
|
373 |
+
value=10,
|
374 |
+
command=self.resize,
|
375 |
+
)
|
376 |
+
viewmenu.add_radiobutton(
|
377 |
+
label="Small",
|
378 |
+
variable=self._size,
|
379 |
+
underline=0,
|
380 |
+
value=12,
|
381 |
+
command=self.resize,
|
382 |
+
)
|
383 |
+
viewmenu.add_radiobutton(
|
384 |
+
label="Medium",
|
385 |
+
variable=self._size,
|
386 |
+
underline=0,
|
387 |
+
value=14,
|
388 |
+
command=self.resize,
|
389 |
+
)
|
390 |
+
viewmenu.add_radiobutton(
|
391 |
+
label="Large",
|
392 |
+
variable=self._size,
|
393 |
+
underline=0,
|
394 |
+
value=18,
|
395 |
+
command=self.resize,
|
396 |
+
)
|
397 |
+
viewmenu.add_radiobutton(
|
398 |
+
label="Huge",
|
399 |
+
variable=self._size,
|
400 |
+
underline=0,
|
401 |
+
value=24,
|
402 |
+
command=self.resize,
|
403 |
+
)
|
404 |
+
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
|
405 |
+
|
406 |
+
animatemenu = Menu(menubar, tearoff=0)
|
407 |
+
animatemenu.add_radiobutton(
|
408 |
+
label="No Animation", underline=0, variable=self._animation_frames, value=0
|
409 |
+
)
|
410 |
+
animatemenu.add_radiobutton(
|
411 |
+
label="Slow Animation",
|
412 |
+
underline=0,
|
413 |
+
variable=self._animation_frames,
|
414 |
+
value=10,
|
415 |
+
accelerator="-",
|
416 |
+
)
|
417 |
+
animatemenu.add_radiobutton(
|
418 |
+
label="Normal Animation",
|
419 |
+
underline=0,
|
420 |
+
variable=self._animation_frames,
|
421 |
+
value=5,
|
422 |
+
accelerator="=",
|
423 |
+
)
|
424 |
+
animatemenu.add_radiobutton(
|
425 |
+
label="Fast Animation",
|
426 |
+
underline=0,
|
427 |
+
variable=self._animation_frames,
|
428 |
+
value=2,
|
429 |
+
accelerator="+",
|
430 |
+
)
|
431 |
+
menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
|
432 |
+
|
433 |
+
helpmenu = Menu(menubar, tearoff=0)
|
434 |
+
helpmenu.add_command(label="About", underline=0, command=self.about)
|
435 |
+
helpmenu.add_command(
|
436 |
+
label="Instructions", underline=0, command=self.help, accelerator="F1"
|
437 |
+
)
|
438 |
+
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
|
439 |
+
|
440 |
+
parent.config(menu=menubar)
|
441 |
+
|
442 |
+
#########################################
|
443 |
+
## Helper
|
444 |
+
#########################################
|
445 |
+
|
446 |
+
def _get(self, widget, treeloc):
|
447 |
+
for i in treeloc:
|
448 |
+
widget = widget.subtrees()[i]
|
449 |
+
if isinstance(widget, TreeSegmentWidget):
|
450 |
+
widget = widget.label()
|
451 |
+
return widget
|
452 |
+
|
453 |
+
#########################################
|
454 |
+
## Main draw procedure
|
455 |
+
#########################################
|
456 |
+
|
457 |
+
def _redraw(self):
|
458 |
+
canvas = self._canvas
|
459 |
+
|
460 |
+
# Delete the old tree, widgets, etc.
|
461 |
+
if self._tree is not None:
|
462 |
+
self._cframe.destroy_widget(self._tree)
|
463 |
+
for twidget in self._textwidgets:
|
464 |
+
self._cframe.destroy_widget(twidget)
|
465 |
+
if self._textline is not None:
|
466 |
+
self._canvas.delete(self._textline)
|
467 |
+
|
468 |
+
# Draw the tree.
|
469 |
+
helv = ("helvetica", -self._size.get())
|
470 |
+
bold = ("helvetica", -self._size.get(), "bold")
|
471 |
+
attribs = {
|
472 |
+
"tree_color": "#000000",
|
473 |
+
"tree_width": 2,
|
474 |
+
"node_font": bold,
|
475 |
+
"leaf_font": helv,
|
476 |
+
}
|
477 |
+
tree = self._parser.tree()
|
478 |
+
self._tree = tree_to_treesegment(canvas, tree, **attribs)
|
479 |
+
self._cframe.add_widget(self._tree, 30, 5)
|
480 |
+
|
481 |
+
# Draw the text.
|
482 |
+
helv = ("helvetica", -self._size.get())
|
483 |
+
bottom = y = self._cframe.scrollregion()[3]
|
484 |
+
self._textwidgets = [
|
485 |
+
TextWidget(canvas, word, font=self._font) for word in self._sent
|
486 |
+
]
|
487 |
+
for twidget in self._textwidgets:
|
488 |
+
self._cframe.add_widget(twidget, 0, 0)
|
489 |
+
twidget.move(0, bottom - twidget.bbox()[3] - 5)
|
490 |
+
y = min(y, twidget.bbox()[1])
|
491 |
+
|
492 |
+
# Draw a line over the text, to separate it from the tree.
|
493 |
+
self._textline = canvas.create_line(-5000, y - 5, 5000, y - 5, dash=".")
|
494 |
+
|
495 |
+
# Highlight appropriate nodes.
|
496 |
+
self._highlight_nodes()
|
497 |
+
self._highlight_prodlist()
|
498 |
+
|
499 |
+
# Make sure the text lines up.
|
500 |
+
self._position_text()
|
501 |
+
|
502 |
+
def _redraw_quick(self):
|
503 |
+
# This should be more-or-less sufficient after an animation.
|
504 |
+
self._highlight_nodes()
|
505 |
+
self._highlight_prodlist()
|
506 |
+
self._position_text()
|
507 |
+
|
508 |
+
def _highlight_nodes(self):
|
509 |
+
# Highlight the list of nodes to be checked.
|
510 |
+
bold = ("helvetica", -self._size.get(), "bold")
|
511 |
+
for treeloc in self._parser.frontier()[:1]:
|
512 |
+
self._get(self._tree, treeloc)["color"] = "#20a050"
|
513 |
+
self._get(self._tree, treeloc)["font"] = bold
|
514 |
+
for treeloc in self._parser.frontier()[1:]:
|
515 |
+
self._get(self._tree, treeloc)["color"] = "#008080"
|
516 |
+
|
517 |
+
def _highlight_prodlist(self):
|
518 |
+
# Highlight the productions that can be expanded.
|
519 |
+
# Boy, too bad tkinter doesn't implement Listbox.itemconfig;
|
520 |
+
# that would be pretty useful here.
|
521 |
+
self._prodlist.delete(0, "end")
|
522 |
+
expandable = self._parser.expandable_productions()
|
523 |
+
untried = self._parser.untried_expandable_productions()
|
524 |
+
productions = self._productions
|
525 |
+
for index in range(len(productions)):
|
526 |
+
if productions[index] in expandable:
|
527 |
+
if productions[index] in untried:
|
528 |
+
self._prodlist.insert(index, " %s" % productions[index])
|
529 |
+
else:
|
530 |
+
self._prodlist.insert(index, " %s (TRIED)" % productions[index])
|
531 |
+
self._prodlist.selection_set(index)
|
532 |
+
else:
|
533 |
+
self._prodlist.insert(index, " %s" % productions[index])
|
534 |
+
|
535 |
+
def _position_text(self):
|
536 |
+
# Line up the text widgets that are matched against the tree
|
537 |
+
numwords = len(self._sent)
|
538 |
+
num_matched = numwords - len(self._parser.remaining_text())
|
539 |
+
leaves = self._tree_leaves()[:num_matched]
|
540 |
+
xmax = self._tree.bbox()[0]
|
541 |
+
for i in range(0, len(leaves)):
|
542 |
+
widget = self._textwidgets[i]
|
543 |
+
leaf = leaves[i]
|
544 |
+
widget["color"] = "#006040"
|
545 |
+
leaf["color"] = "#006040"
|
546 |
+
widget.move(leaf.bbox()[0] - widget.bbox()[0], 0)
|
547 |
+
xmax = widget.bbox()[2] + 10
|
548 |
+
|
549 |
+
# Line up the text widgets that are not matched against the tree.
|
550 |
+
for i in range(len(leaves), numwords):
|
551 |
+
widget = self._textwidgets[i]
|
552 |
+
widget["color"] = "#a0a0a0"
|
553 |
+
widget.move(xmax - widget.bbox()[0], 0)
|
554 |
+
xmax = widget.bbox()[2] + 10
|
555 |
+
|
556 |
+
# If we have a complete parse, make everything green :)
|
557 |
+
if self._parser.currently_complete():
|
558 |
+
for twidget in self._textwidgets:
|
559 |
+
twidget["color"] = "#00a000"
|
560 |
+
|
561 |
+
# Move the matched leaves down to the text.
|
562 |
+
for i in range(0, len(leaves)):
|
563 |
+
widget = self._textwidgets[i]
|
564 |
+
leaf = leaves[i]
|
565 |
+
dy = widget.bbox()[1] - leaf.bbox()[3] - 10.0
|
566 |
+
dy = max(dy, leaf.parent().label().bbox()[3] - leaf.bbox()[3] + 10)
|
567 |
+
leaf.move(0, dy)
|
568 |
+
|
569 |
+
def _tree_leaves(self, tree=None):
|
570 |
+
if tree is None:
|
571 |
+
tree = self._tree
|
572 |
+
if isinstance(tree, TreeSegmentWidget):
|
573 |
+
leaves = []
|
574 |
+
for child in tree.subtrees():
|
575 |
+
leaves += self._tree_leaves(child)
|
576 |
+
return leaves
|
577 |
+
else:
|
578 |
+
return [tree]
|
579 |
+
|
580 |
+
#########################################
|
581 |
+
## Button Callbacks
|
582 |
+
#########################################
|
583 |
+
|
584 |
+
def destroy(self, *e):
|
585 |
+
self._autostep = 0
|
586 |
+
if self._top is None:
|
587 |
+
return
|
588 |
+
self._top.destroy()
|
589 |
+
self._top = None
|
590 |
+
|
591 |
+
def reset(self, *e):
|
592 |
+
self._autostep = 0
|
593 |
+
self._parser.initialize(self._sent)
|
594 |
+
self._lastoper1["text"] = "Reset Application"
|
595 |
+
self._lastoper2["text"] = ""
|
596 |
+
self._redraw()
|
597 |
+
|
598 |
+
def autostep(self, *e):
|
599 |
+
if self._animation_frames.get() == 0:
|
600 |
+
self._animation_frames.set(2)
|
601 |
+
if self._autostep:
|
602 |
+
self._autostep = 0
|
603 |
+
else:
|
604 |
+
self._autostep = 1
|
605 |
+
self._step()
|
606 |
+
|
607 |
+
def cancel_autostep(self, *e):
|
608 |
+
# self._autostep_button['text'] = 'Autostep'
|
609 |
+
self._autostep = 0
|
610 |
+
|
611 |
+
# Make sure to stop auto-stepping if we get any user input.
|
612 |
+
def step(self, *e):
|
613 |
+
self._autostep = 0
|
614 |
+
self._step()
|
615 |
+
|
616 |
+
def match(self, *e):
|
617 |
+
self._autostep = 0
|
618 |
+
self._match()
|
619 |
+
|
620 |
+
def expand(self, *e):
|
621 |
+
self._autostep = 0
|
622 |
+
self._expand()
|
623 |
+
|
624 |
+
def backtrack(self, *e):
|
625 |
+
self._autostep = 0
|
626 |
+
self._backtrack()
|
627 |
+
|
628 |
+
def _step(self):
|
629 |
+
if self._animating_lock:
|
630 |
+
return
|
631 |
+
|
632 |
+
# Try expanding, matching, and backtracking (in that order)
|
633 |
+
if self._expand():
|
634 |
+
pass
|
635 |
+
elif self._parser.untried_match() and self._match():
|
636 |
+
pass
|
637 |
+
elif self._backtrack():
|
638 |
+
pass
|
639 |
+
else:
|
640 |
+
self._lastoper1["text"] = "Finished"
|
641 |
+
self._lastoper2["text"] = ""
|
642 |
+
self._autostep = 0
|
643 |
+
|
644 |
+
# Check if we just completed a parse.
|
645 |
+
if self._parser.currently_complete():
|
646 |
+
self._autostep = 0
|
647 |
+
self._lastoper2["text"] += " [COMPLETE PARSE]"
|
648 |
+
|
649 |
+
def _expand(self, *e):
|
650 |
+
if self._animating_lock:
|
651 |
+
return
|
652 |
+
old_frontier = self._parser.frontier()
|
653 |
+
rv = self._parser.expand()
|
654 |
+
if rv is not None:
|
655 |
+
self._lastoper1["text"] = "Expand:"
|
656 |
+
self._lastoper2["text"] = rv
|
657 |
+
self._prodlist.selection_clear(0, "end")
|
658 |
+
index = self._productions.index(rv)
|
659 |
+
self._prodlist.selection_set(index)
|
660 |
+
self._animate_expand(old_frontier[0])
|
661 |
+
return True
|
662 |
+
else:
|
663 |
+
self._lastoper1["text"] = "Expand:"
|
664 |
+
self._lastoper2["text"] = "(all expansions tried)"
|
665 |
+
return False
|
666 |
+
|
667 |
+
def _match(self, *e):
|
668 |
+
if self._animating_lock:
|
669 |
+
return
|
670 |
+
old_frontier = self._parser.frontier()
|
671 |
+
rv = self._parser.match()
|
672 |
+
if rv is not None:
|
673 |
+
self._lastoper1["text"] = "Match:"
|
674 |
+
self._lastoper2["text"] = rv
|
675 |
+
self._animate_match(old_frontier[0])
|
676 |
+
return True
|
677 |
+
else:
|
678 |
+
self._lastoper1["text"] = "Match:"
|
679 |
+
self._lastoper2["text"] = "(failed)"
|
680 |
+
return False
|
681 |
+
|
682 |
+
def _backtrack(self, *e):
|
683 |
+
if self._animating_lock:
|
684 |
+
return
|
685 |
+
if self._parser.backtrack():
|
686 |
+
elt = self._parser.tree()
|
687 |
+
for i in self._parser.frontier()[0]:
|
688 |
+
elt = elt[i]
|
689 |
+
self._lastoper1["text"] = "Backtrack"
|
690 |
+
self._lastoper2["text"] = ""
|
691 |
+
if isinstance(elt, Tree):
|
692 |
+
self._animate_backtrack(self._parser.frontier()[0])
|
693 |
+
else:
|
694 |
+
self._animate_match_backtrack(self._parser.frontier()[0])
|
695 |
+
return True
|
696 |
+
else:
|
697 |
+
self._autostep = 0
|
698 |
+
self._lastoper1["text"] = "Finished"
|
699 |
+
self._lastoper2["text"] = ""
|
700 |
+
return False
|
701 |
+
|
702 |
+
def about(self, *e):
|
703 |
+
ABOUT = (
|
704 |
+
"NLTK Recursive Descent Parser Application\n" + "Written by Edward Loper"
|
705 |
+
)
|
706 |
+
TITLE = "About: Recursive Descent Parser Application"
|
707 |
+
try:
|
708 |
+
from tkinter.messagebox import Message
|
709 |
+
|
710 |
+
Message(message=ABOUT, title=TITLE).show()
|
711 |
+
except:
|
712 |
+
ShowText(self._top, TITLE, ABOUT)
|
713 |
+
|
714 |
+
def help(self, *e):
|
715 |
+
self._autostep = 0
|
716 |
+
# The default font's not very legible; try using 'fixed' instead.
|
717 |
+
try:
|
718 |
+
ShowText(
|
719 |
+
self._top,
|
720 |
+
"Help: Recursive Descent Parser Application",
|
721 |
+
(__doc__ or "").strip(),
|
722 |
+
width=75,
|
723 |
+
font="fixed",
|
724 |
+
)
|
725 |
+
except:
|
726 |
+
ShowText(
|
727 |
+
self._top,
|
728 |
+
"Help: Recursive Descent Parser Application",
|
729 |
+
(__doc__ or "").strip(),
|
730 |
+
width=75,
|
731 |
+
)
|
732 |
+
|
733 |
+
def postscript(self, *e):
|
734 |
+
self._autostep = 0
|
735 |
+
self._cframe.print_to_file()
|
736 |
+
|
737 |
+
def mainloop(self, *args, **kwargs):
|
738 |
+
"""
|
739 |
+
Enter the Tkinter mainloop. This function must be called if
|
740 |
+
this demo is created from a non-interactive program (e.g.
|
741 |
+
from a secript); otherwise, the demo will close as soon as
|
742 |
+
the script completes.
|
743 |
+
"""
|
744 |
+
if in_idle():
|
745 |
+
return
|
746 |
+
self._top.mainloop(*args, **kwargs)
|
747 |
+
|
748 |
+
def resize(self, size=None):
|
749 |
+
if size is not None:
|
750 |
+
self._size.set(size)
|
751 |
+
size = self._size.get()
|
752 |
+
self._font.configure(size=-(abs(size)))
|
753 |
+
self._boldfont.configure(size=-(abs(size)))
|
754 |
+
self._sysfont.configure(size=-(abs(size)))
|
755 |
+
self._bigfont.configure(size=-(abs(size + 2)))
|
756 |
+
self._redraw()
|
757 |
+
|
758 |
+
#########################################
|
759 |
+
## Expand Production Selection
|
760 |
+
#########################################
|
761 |
+
|
762 |
+
def _toggle_grammar(self, *e):
|
763 |
+
if self._show_grammar.get():
|
764 |
+
self._prodframe.pack(
|
765 |
+
fill="both", side="left", padx=2, after=self._feedbackframe
|
766 |
+
)
|
767 |
+
self._lastoper1["text"] = "Show Grammar"
|
768 |
+
else:
|
769 |
+
self._prodframe.pack_forget()
|
770 |
+
self._lastoper1["text"] = "Hide Grammar"
|
771 |
+
self._lastoper2["text"] = ""
|
772 |
+
|
773 |
+
# def toggle_grammar(self, *e):
|
774 |
+
# self._show_grammar = not self._show_grammar
|
775 |
+
# if self._show_grammar:
|
776 |
+
# self._prodframe.pack(fill='both', expand='y', side='left',
|
777 |
+
# after=self._feedbackframe)
|
778 |
+
# self._lastoper1['text'] = 'Show Grammar'
|
779 |
+
# else:
|
780 |
+
# self._prodframe.pack_forget()
|
781 |
+
# self._lastoper1['text'] = 'Hide Grammar'
|
782 |
+
# self._lastoper2['text'] = ''
|
783 |
+
|
784 |
+
def _prodlist_select(self, event):
|
785 |
+
selection = self._prodlist.curselection()
|
786 |
+
if len(selection) != 1:
|
787 |
+
return
|
788 |
+
index = int(selection[0])
|
789 |
+
old_frontier = self._parser.frontier()
|
790 |
+
production = self._parser.expand(self._productions[index])
|
791 |
+
|
792 |
+
if production:
|
793 |
+
self._lastoper1["text"] = "Expand:"
|
794 |
+
self._lastoper2["text"] = production
|
795 |
+
self._prodlist.selection_clear(0, "end")
|
796 |
+
self._prodlist.selection_set(index)
|
797 |
+
self._animate_expand(old_frontier[0])
|
798 |
+
else:
|
799 |
+
# Reset the production selections.
|
800 |
+
self._prodlist.selection_clear(0, "end")
|
801 |
+
for prod in self._parser.expandable_productions():
|
802 |
+
index = self._productions.index(prod)
|
803 |
+
self._prodlist.selection_set(index)
|
804 |
+
|
805 |
+
#########################################
|
806 |
+
## Animation
|
807 |
+
#########################################
|
808 |
+
|
809 |
+
def _animate_expand(self, treeloc):
|
810 |
+
oldwidget = self._get(self._tree, treeloc)
|
811 |
+
oldtree = oldwidget.parent()
|
812 |
+
top = not isinstance(oldtree.parent(), TreeSegmentWidget)
|
813 |
+
|
814 |
+
tree = self._parser.tree()
|
815 |
+
for i in treeloc:
|
816 |
+
tree = tree[i]
|
817 |
+
|
818 |
+
widget = tree_to_treesegment(
|
819 |
+
self._canvas,
|
820 |
+
tree,
|
821 |
+
node_font=self._boldfont,
|
822 |
+
leaf_color="white",
|
823 |
+
tree_width=2,
|
824 |
+
tree_color="white",
|
825 |
+
node_color="white",
|
826 |
+
leaf_font=self._font,
|
827 |
+
)
|
828 |
+
widget.label()["color"] = "#20a050"
|
829 |
+
|
830 |
+
(oldx, oldy) = oldtree.label().bbox()[:2]
|
831 |
+
(newx, newy) = widget.label().bbox()[:2]
|
832 |
+
widget.move(oldx - newx, oldy - newy)
|
833 |
+
|
834 |
+
if top:
|
835 |
+
self._cframe.add_widget(widget, 0, 5)
|
836 |
+
widget.move(30 - widget.label().bbox()[0], 0)
|
837 |
+
self._tree = widget
|
838 |
+
else:
|
839 |
+
oldtree.parent().replace_child(oldtree, widget)
|
840 |
+
|
841 |
+
# Move the children over so they don't overlap.
|
842 |
+
# Line the children up in a strange way.
|
843 |
+
if widget.subtrees():
|
844 |
+
dx = (
|
845 |
+
oldx
|
846 |
+
+ widget.label().width() / 2
|
847 |
+
- widget.subtrees()[0].bbox()[0] / 2
|
848 |
+
- widget.subtrees()[0].bbox()[2] / 2
|
849 |
+
)
|
850 |
+
for subtree in widget.subtrees():
|
851 |
+
subtree.move(dx, 0)
|
852 |
+
|
853 |
+
self._makeroom(widget)
|
854 |
+
|
855 |
+
if top:
|
856 |
+
self._cframe.destroy_widget(oldtree)
|
857 |
+
else:
|
858 |
+
oldtree.destroy()
|
859 |
+
|
860 |
+
colors = [
|
861 |
+
"gray%d" % (10 * int(10 * x / self._animation_frames.get()))
|
862 |
+
for x in range(self._animation_frames.get(), 0, -1)
|
863 |
+
]
|
864 |
+
|
865 |
+
# Move the text string down, if necessary.
|
866 |
+
dy = widget.bbox()[3] + 30 - self._canvas.coords(self._textline)[1]
|
867 |
+
if dy > 0:
|
868 |
+
for twidget in self._textwidgets:
|
869 |
+
twidget.move(0, dy)
|
870 |
+
self._canvas.move(self._textline, 0, dy)
|
871 |
+
|
872 |
+
self._animate_expand_frame(widget, colors)
|
873 |
+
|
874 |
+
def _makeroom(self, treeseg):
|
875 |
+
"""
|
876 |
+
Make sure that no sibling tree bbox's overlap.
|
877 |
+
"""
|
878 |
+
parent = treeseg.parent()
|
879 |
+
if not isinstance(parent, TreeSegmentWidget):
|
880 |
+
return
|
881 |
+
|
882 |
+
index = parent.subtrees().index(treeseg)
|
883 |
+
|
884 |
+
# Handle siblings to the right
|
885 |
+
rsiblings = parent.subtrees()[index + 1 :]
|
886 |
+
if rsiblings:
|
887 |
+
dx = treeseg.bbox()[2] - rsiblings[0].bbox()[0] + 10
|
888 |
+
for sibling in rsiblings:
|
889 |
+
sibling.move(dx, 0)
|
890 |
+
|
891 |
+
# Handle siblings to the left
|
892 |
+
if index > 0:
|
893 |
+
lsibling = parent.subtrees()[index - 1]
|
894 |
+
dx = max(0, lsibling.bbox()[2] - treeseg.bbox()[0] + 10)
|
895 |
+
treeseg.move(dx, 0)
|
896 |
+
|
897 |
+
# Keep working up the tree.
|
898 |
+
self._makeroom(parent)
|
899 |
+
|
900 |
+
def _animate_expand_frame(self, widget, colors):
|
901 |
+
if len(colors) > 0:
|
902 |
+
self._animating_lock = 1
|
903 |
+
widget["color"] = colors[0]
|
904 |
+
for subtree in widget.subtrees():
|
905 |
+
if isinstance(subtree, TreeSegmentWidget):
|
906 |
+
subtree.label()["color"] = colors[0]
|
907 |
+
else:
|
908 |
+
subtree["color"] = colors[0]
|
909 |
+
self._top.after(50, self._animate_expand_frame, widget, colors[1:])
|
910 |
+
else:
|
911 |
+
widget["color"] = "black"
|
912 |
+
for subtree in widget.subtrees():
|
913 |
+
if isinstance(subtree, TreeSegmentWidget):
|
914 |
+
subtree.label()["color"] = "black"
|
915 |
+
else:
|
916 |
+
subtree["color"] = "black"
|
917 |
+
self._redraw_quick()
|
918 |
+
widget.label()["color"] = "black"
|
919 |
+
self._animating_lock = 0
|
920 |
+
if self._autostep:
|
921 |
+
self._step()
|
922 |
+
|
923 |
+
def _animate_backtrack(self, treeloc):
|
924 |
+
# Flash red first, if we're animating.
|
925 |
+
if self._animation_frames.get() == 0:
|
926 |
+
colors = []
|
927 |
+
else:
|
928 |
+
colors = ["#a00000", "#000000", "#a00000"]
|
929 |
+
colors += [
|
930 |
+
"gray%d" % (10 * int(10 * x / (self._animation_frames.get())))
|
931 |
+
for x in range(1, self._animation_frames.get() + 1)
|
932 |
+
]
|
933 |
+
|
934 |
+
widgets = [self._get(self._tree, treeloc).parent()]
|
935 |
+
for subtree in widgets[0].subtrees():
|
936 |
+
if isinstance(subtree, TreeSegmentWidget):
|
937 |
+
widgets.append(subtree.label())
|
938 |
+
else:
|
939 |
+
widgets.append(subtree)
|
940 |
+
|
941 |
+
self._animate_backtrack_frame(widgets, colors)
|
942 |
+
|
943 |
+
def _animate_backtrack_frame(self, widgets, colors):
|
944 |
+
if len(colors) > 0:
|
945 |
+
self._animating_lock = 1
|
946 |
+
for widget in widgets:
|
947 |
+
widget["color"] = colors[0]
|
948 |
+
self._top.after(50, self._animate_backtrack_frame, widgets, colors[1:])
|
949 |
+
else:
|
950 |
+
for widget in widgets[0].subtrees():
|
951 |
+
widgets[0].remove_child(widget)
|
952 |
+
widget.destroy()
|
953 |
+
self._redraw_quick()
|
954 |
+
self._animating_lock = 0
|
955 |
+
if self._autostep:
|
956 |
+
self._step()
|
957 |
+
|
958 |
+
def _animate_match_backtrack(self, treeloc):
|
959 |
+
widget = self._get(self._tree, treeloc)
|
960 |
+
node = widget.parent().label()
|
961 |
+
dy = (node.bbox()[3] - widget.bbox()[1] + 14) / max(
|
962 |
+
1, self._animation_frames.get()
|
963 |
+
)
|
964 |
+
self._animate_match_backtrack_frame(self._animation_frames.get(), widget, dy)
|
965 |
+
|
966 |
+
def _animate_match(self, treeloc):
|
967 |
+
widget = self._get(self._tree, treeloc)
|
968 |
+
|
969 |
+
dy = (self._textwidgets[0].bbox()[1] - widget.bbox()[3] - 10.0) / max(
|
970 |
+
1, self._animation_frames.get()
|
971 |
+
)
|
972 |
+
self._animate_match_frame(self._animation_frames.get(), widget, dy)
|
973 |
+
|
974 |
+
def _animate_match_frame(self, frame, widget, dy):
|
975 |
+
if frame > 0:
|
976 |
+
self._animating_lock = 1
|
977 |
+
widget.move(0, dy)
|
978 |
+
self._top.after(10, self._animate_match_frame, frame - 1, widget, dy)
|
979 |
+
else:
|
980 |
+
widget["color"] = "#006040"
|
981 |
+
self._redraw_quick()
|
982 |
+
self._animating_lock = 0
|
983 |
+
if self._autostep:
|
984 |
+
self._step()
|
985 |
+
|
986 |
+
def _animate_match_backtrack_frame(self, frame, widget, dy):
|
987 |
+
if frame > 0:
|
988 |
+
self._animating_lock = 1
|
989 |
+
widget.move(0, dy)
|
990 |
+
self._top.after(
|
991 |
+
10, self._animate_match_backtrack_frame, frame - 1, widget, dy
|
992 |
+
)
|
993 |
+
else:
|
994 |
+
widget.parent().remove_child(widget)
|
995 |
+
widget.destroy()
|
996 |
+
self._animating_lock = 0
|
997 |
+
if self._autostep:
|
998 |
+
self._step()
|
999 |
+
|
1000 |
+
def edit_grammar(self, *e):
|
1001 |
+
CFGEditor(self._top, self._parser.grammar(), self.set_grammar)
|
1002 |
+
|
1003 |
+
def set_grammar(self, grammar):
|
1004 |
+
self._parser.set_grammar(grammar)
|
1005 |
+
self._productions = list(grammar.productions())
|
1006 |
+
self._prodlist.delete(0, "end")
|
1007 |
+
for production in self._productions:
|
1008 |
+
self._prodlist.insert("end", (" %s" % production))
|
1009 |
+
|
1010 |
+
def edit_sentence(self, *e):
|
1011 |
+
sentence = " ".join(self._sent)
|
1012 |
+
title = "Edit Text"
|
1013 |
+
instr = "Enter a new sentence to parse."
|
1014 |
+
EntryDialog(self._top, sentence, instr, self.set_sentence, title)
|
1015 |
+
|
1016 |
+
def set_sentence(self, sentence):
|
1017 |
+
self._sent = sentence.split() # [XX] use tagged?
|
1018 |
+
self.reset()
|
1019 |
+
|
1020 |
+
|
1021 |
+
def app():
|
1022 |
+
"""
|
1023 |
+
Create a recursive descent parser demo, using a simple grammar and
|
1024 |
+
text.
|
1025 |
+
"""
|
1026 |
+
from nltk.grammar import CFG
|
1027 |
+
|
1028 |
+
grammar = CFG.fromstring(
|
1029 |
+
"""
|
1030 |
+
# Grammatical productions.
|
1031 |
+
S -> NP VP
|
1032 |
+
NP -> Det N PP | Det N
|
1033 |
+
VP -> V NP PP | V NP | V
|
1034 |
+
PP -> P NP
|
1035 |
+
# Lexical productions.
|
1036 |
+
NP -> 'I'
|
1037 |
+
Det -> 'the' | 'a'
|
1038 |
+
N -> 'man' | 'park' | 'dog' | 'telescope'
|
1039 |
+
V -> 'ate' | 'saw'
|
1040 |
+
P -> 'in' | 'under' | 'with'
|
1041 |
+
"""
|
1042 |
+
)
|
1043 |
+
|
1044 |
+
sent = "the dog saw a man in the park".split()
|
1045 |
+
|
1046 |
+
RecursiveDescentApp(grammar, sent).mainloop()
|
1047 |
+
|
1048 |
+
|
1049 |
+
if __name__ == "__main__":
|
1050 |
+
app()
|
1051 |
+
|
1052 |
+
__all__ = ["app"]
|
pipeline/nltk/app/srparser_app.py
ADDED
@@ -0,0 +1,937 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Shift-Reduce Parser Application
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Edward Loper <[email protected]>
|
5 |
+
# URL: <https://www.nltk.org/>
|
6 |
+
# For license information, see LICENSE.TXT
|
7 |
+
|
8 |
+
"""
|
9 |
+
A graphical tool for exploring the shift-reduce parser.
|
10 |
+
|
11 |
+
The shift-reduce parser maintains a stack, which records the structure
|
12 |
+
of the portion of the text that has been parsed. The stack is
|
13 |
+
initially empty. Its contents are shown on the left side of the main
|
14 |
+
canvas.
|
15 |
+
|
16 |
+
On the right side of the main canvas is the remaining text. This is
|
17 |
+
the portion of the text which has not yet been considered by the
|
18 |
+
parser.
|
19 |
+
|
20 |
+
The parser builds up a tree structure for the text using two
|
21 |
+
operations:
|
22 |
+
|
23 |
+
- "shift" moves the first token from the remaining text to the top
|
24 |
+
of the stack. In the demo, the top of the stack is its right-hand
|
25 |
+
side.
|
26 |
+
- "reduce" uses a grammar production to combine the rightmost stack
|
27 |
+
elements into a single tree token.
|
28 |
+
|
29 |
+
You can control the parser's operation by using the "shift" and
|
30 |
+
"reduce" buttons; or you can use the "step" button to let the parser
|
31 |
+
automatically decide which operation to apply. The parser uses the
|
32 |
+
following rules to decide which operation to apply:
|
33 |
+
|
34 |
+
- Only shift if no reductions are available.
|
35 |
+
- If multiple reductions are available, then apply the reduction
|
36 |
+
whose CFG production is listed earliest in the grammar.
|
37 |
+
|
38 |
+
The "reduce" button applies the reduction whose CFG production is
|
39 |
+
listed earliest in the grammar. There are two ways to manually choose
|
40 |
+
which reduction to apply:
|
41 |
+
|
42 |
+
- Click on a CFG production from the list of available reductions,
|
43 |
+
on the left side of the main window. The reduction based on that
|
44 |
+
production will be applied to the top of the stack.
|
45 |
+
- Click on one of the stack elements. A popup window will appear,
|
46 |
+
containing all available reductions. Select one, and it will be
|
47 |
+
applied to the top of the stack.
|
48 |
+
|
49 |
+
Note that reductions can only be applied to the top of the stack.
|
50 |
+
|
51 |
+
Keyboard Shortcuts::
|
52 |
+
[Space]\t Perform the next shift or reduce operation
|
53 |
+
[s]\t Perform a shift operation
|
54 |
+
[r]\t Perform a reduction operation
|
55 |
+
[Ctrl-z]\t Undo most recent operation
|
56 |
+
[Delete]\t Reset the parser
|
57 |
+
[g]\t Show/hide available production list
|
58 |
+
[Ctrl-a]\t Toggle animations
|
59 |
+
[h]\t Help
|
60 |
+
[Ctrl-p]\t Print
|
61 |
+
[q]\t Quit
|
62 |
+
|
63 |
+
"""
|
64 |
+
|
65 |
+
from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
|
66 |
+
from tkinter.font import Font
|
67 |
+
|
68 |
+
from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
|
69 |
+
from nltk.draw.util import CanvasFrame, EntryDialog, ShowText, TextWidget
|
70 |
+
from nltk.parse import SteppingShiftReduceParser
|
71 |
+
from nltk.tree import Tree
|
72 |
+
from nltk.util import in_idle
|
73 |
+
|
74 |
+
"""
|
75 |
+
Possible future improvements:
|
76 |
+
- button/window to change and/or select text. Just pop up a window
|
77 |
+
with an entry, and let them modify the text; and then retokenize
|
78 |
+
it? Maybe give a warning if it contains tokens whose types are
|
79 |
+
not in the grammar.
|
80 |
+
- button/window to change and/or select grammar. Select from
|
81 |
+
several alternative grammars? Or actually change the grammar? If
|
82 |
+
the later, then I'd want to define nltk.draw.cfg, which would be
|
83 |
+
responsible for that.
|
84 |
+
"""
|
85 |
+
|
86 |
+
|
87 |
+
class ShiftReduceApp:
|
88 |
+
"""
|
89 |
+
A graphical tool for exploring the shift-reduce parser. The tool
|
90 |
+
displays the parser's stack and the remaining text, and allows the
|
91 |
+
user to control the parser's operation. In particular, the user
|
92 |
+
can shift tokens onto the stack, and can perform reductions on the
|
93 |
+
top elements of the stack. A "step" button simply steps through
|
94 |
+
the parsing process, performing the operations that
|
95 |
+
``nltk.parse.ShiftReduceParser`` would use.
|
96 |
+
"""
|
97 |
+
|
98 |
+
def __init__(self, grammar, sent, trace=0):
|
99 |
+
self._sent = sent
|
100 |
+
self._parser = SteppingShiftReduceParser(grammar, trace)
|
101 |
+
|
102 |
+
# Set up the main window.
|
103 |
+
self._top = Tk()
|
104 |
+
self._top.title("Shift Reduce Parser Application")
|
105 |
+
|
106 |
+
# Animations. animating_lock is a lock to prevent the demo
|
107 |
+
# from performing new operations while it's animating.
|
108 |
+
self._animating_lock = 0
|
109 |
+
self._animate = IntVar(self._top)
|
110 |
+
self._animate.set(10) # = medium
|
111 |
+
|
112 |
+
# The user can hide the grammar.
|
113 |
+
self._show_grammar = IntVar(self._top)
|
114 |
+
self._show_grammar.set(1)
|
115 |
+
|
116 |
+
# Initialize fonts.
|
117 |
+
self._init_fonts(self._top)
|
118 |
+
|
119 |
+
# Set up key bindings.
|
120 |
+
self._init_bindings()
|
121 |
+
|
122 |
+
# Create the basic frames.
|
123 |
+
self._init_menubar(self._top)
|
124 |
+
self._init_buttons(self._top)
|
125 |
+
self._init_feedback(self._top)
|
126 |
+
self._init_grammar(self._top)
|
127 |
+
self._init_canvas(self._top)
|
128 |
+
|
129 |
+
# A popup menu for reducing.
|
130 |
+
self._reduce_menu = Menu(self._canvas, tearoff=0)
|
131 |
+
|
132 |
+
# Reset the demo, and set the feedback frame to empty.
|
133 |
+
self.reset()
|
134 |
+
self._lastoper1["text"] = ""
|
135 |
+
|
136 |
+
#########################################
|
137 |
+
## Initialization Helpers
|
138 |
+
#########################################
|
139 |
+
|
140 |
+
def _init_fonts(self, root):
|
141 |
+
# See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
|
142 |
+
self._sysfont = Font(font=Button()["font"])
|
143 |
+
root.option_add("*Font", self._sysfont)
|
144 |
+
|
145 |
+
# TWhat's our font size (default=same as sysfont)
|
146 |
+
self._size = IntVar(root)
|
147 |
+
self._size.set(self._sysfont.cget("size"))
|
148 |
+
|
149 |
+
self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
|
150 |
+
self._font = Font(family="helvetica", size=self._size.get())
|
151 |
+
|
152 |
+
def _init_grammar(self, parent):
|
153 |
+
# Grammar view.
|
154 |
+
self._prodframe = listframe = Frame(parent)
|
155 |
+
self._prodframe.pack(fill="both", side="left", padx=2)
|
156 |
+
self._prodlist_label = Label(
|
157 |
+
self._prodframe, font=self._boldfont, text="Available Reductions"
|
158 |
+
)
|
159 |
+
self._prodlist_label.pack()
|
160 |
+
self._prodlist = Listbox(
|
161 |
+
self._prodframe,
|
162 |
+
selectmode="single",
|
163 |
+
relief="groove",
|
164 |
+
background="white",
|
165 |
+
foreground="#909090",
|
166 |
+
font=self._font,
|
167 |
+
selectforeground="#004040",
|
168 |
+
selectbackground="#c0f0c0",
|
169 |
+
)
|
170 |
+
|
171 |
+
self._prodlist.pack(side="right", fill="both", expand=1)
|
172 |
+
|
173 |
+
self._productions = list(self._parser.grammar().productions())
|
174 |
+
for production in self._productions:
|
175 |
+
self._prodlist.insert("end", (" %s" % production))
|
176 |
+
self._prodlist.config(height=min(len(self._productions), 25))
|
177 |
+
|
178 |
+
# Add a scrollbar if there are more than 25 productions.
|
179 |
+
if 1: # len(self._productions) > 25:
|
180 |
+
listscroll = Scrollbar(self._prodframe, orient="vertical")
|
181 |
+
self._prodlist.config(yscrollcommand=listscroll.set)
|
182 |
+
listscroll.config(command=self._prodlist.yview)
|
183 |
+
listscroll.pack(side="left", fill="y")
|
184 |
+
|
185 |
+
# If they select a production, apply it.
|
186 |
+
self._prodlist.bind("<<ListboxSelect>>", self._prodlist_select)
|
187 |
+
|
188 |
+
# When they hover over a production, highlight it.
|
189 |
+
self._hover = -1
|
190 |
+
self._prodlist.bind("<Motion>", self._highlight_hover)
|
191 |
+
self._prodlist.bind("<Leave>", self._clear_hover)
|
192 |
+
|
193 |
+
def _init_bindings(self):
|
194 |
+
# Quit
|
195 |
+
self._top.bind("<Control-q>", self.destroy)
|
196 |
+
self._top.bind("<Control-x>", self.destroy)
|
197 |
+
self._top.bind("<Alt-q>", self.destroy)
|
198 |
+
self._top.bind("<Alt-x>", self.destroy)
|
199 |
+
|
200 |
+
# Ops (step, shift, reduce, undo)
|
201 |
+
self._top.bind("<space>", self.step)
|
202 |
+
self._top.bind("<s>", self.shift)
|
203 |
+
self._top.bind("<Alt-s>", self.shift)
|
204 |
+
self._top.bind("<Control-s>", self.shift)
|
205 |
+
self._top.bind("<r>", self.reduce)
|
206 |
+
self._top.bind("<Alt-r>", self.reduce)
|
207 |
+
self._top.bind("<Control-r>", self.reduce)
|
208 |
+
self._top.bind("<Delete>", self.reset)
|
209 |
+
self._top.bind("<u>", self.undo)
|
210 |
+
self._top.bind("<Alt-u>", self.undo)
|
211 |
+
self._top.bind("<Control-u>", self.undo)
|
212 |
+
self._top.bind("<Control-z>", self.undo)
|
213 |
+
self._top.bind("<BackSpace>", self.undo)
|
214 |
+
|
215 |
+
# Misc
|
216 |
+
self._top.bind("<Control-p>", self.postscript)
|
217 |
+
self._top.bind("<Control-h>", self.help)
|
218 |
+
self._top.bind("<F1>", self.help)
|
219 |
+
self._top.bind("<Control-g>", self.edit_grammar)
|
220 |
+
self._top.bind("<Control-t>", self.edit_sentence)
|
221 |
+
|
222 |
+
# Animation speed control
|
223 |
+
self._top.bind("-", lambda e, a=self._animate: a.set(20))
|
224 |
+
self._top.bind("=", lambda e, a=self._animate: a.set(10))
|
225 |
+
self._top.bind("+", lambda e, a=self._animate: a.set(4))
|
226 |
+
|
227 |
+
def _init_buttons(self, parent):
|
228 |
+
# Set up the frames.
|
229 |
+
self._buttonframe = buttonframe = Frame(parent)
|
230 |
+
buttonframe.pack(fill="none", side="bottom")
|
231 |
+
Button(
|
232 |
+
buttonframe,
|
233 |
+
text="Step",
|
234 |
+
background="#90c0d0",
|
235 |
+
foreground="black",
|
236 |
+
command=self.step,
|
237 |
+
).pack(side="left")
|
238 |
+
Button(
|
239 |
+
buttonframe,
|
240 |
+
text="Shift",
|
241 |
+
underline=0,
|
242 |
+
background="#90f090",
|
243 |
+
foreground="black",
|
244 |
+
command=self.shift,
|
245 |
+
).pack(side="left")
|
246 |
+
Button(
|
247 |
+
buttonframe,
|
248 |
+
text="Reduce",
|
249 |
+
underline=0,
|
250 |
+
background="#90f090",
|
251 |
+
foreground="black",
|
252 |
+
command=self.reduce,
|
253 |
+
).pack(side="left")
|
254 |
+
Button(
|
255 |
+
buttonframe,
|
256 |
+
text="Undo",
|
257 |
+
underline=0,
|
258 |
+
background="#f0a0a0",
|
259 |
+
foreground="black",
|
260 |
+
command=self.undo,
|
261 |
+
).pack(side="left")
|
262 |
+
|
263 |
+
def _init_menubar(self, parent):
|
264 |
+
menubar = Menu(parent)
|
265 |
+
|
266 |
+
filemenu = Menu(menubar, tearoff=0)
|
267 |
+
filemenu.add_command(
|
268 |
+
label="Reset Parser", underline=0, command=self.reset, accelerator="Del"
|
269 |
+
)
|
270 |
+
filemenu.add_command(
|
271 |
+
label="Print to Postscript",
|
272 |
+
underline=0,
|
273 |
+
command=self.postscript,
|
274 |
+
accelerator="Ctrl-p",
|
275 |
+
)
|
276 |
+
filemenu.add_command(
|
277 |
+
label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
|
278 |
+
)
|
279 |
+
menubar.add_cascade(label="File", underline=0, menu=filemenu)
|
280 |
+
|
281 |
+
editmenu = Menu(menubar, tearoff=0)
|
282 |
+
editmenu.add_command(
|
283 |
+
label="Edit Grammar",
|
284 |
+
underline=5,
|
285 |
+
command=self.edit_grammar,
|
286 |
+
accelerator="Ctrl-g",
|
287 |
+
)
|
288 |
+
editmenu.add_command(
|
289 |
+
label="Edit Text",
|
290 |
+
underline=5,
|
291 |
+
command=self.edit_sentence,
|
292 |
+
accelerator="Ctrl-t",
|
293 |
+
)
|
294 |
+
menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
|
295 |
+
|
296 |
+
rulemenu = Menu(menubar, tearoff=0)
|
297 |
+
rulemenu.add_command(
|
298 |
+
label="Step", underline=1, command=self.step, accelerator="Space"
|
299 |
+
)
|
300 |
+
rulemenu.add_separator()
|
301 |
+
rulemenu.add_command(
|
302 |
+
label="Shift", underline=0, command=self.shift, accelerator="Ctrl-s"
|
303 |
+
)
|
304 |
+
rulemenu.add_command(
|
305 |
+
label="Reduce", underline=0, command=self.reduce, accelerator="Ctrl-r"
|
306 |
+
)
|
307 |
+
rulemenu.add_separator()
|
308 |
+
rulemenu.add_command(
|
309 |
+
label="Undo", underline=0, command=self.undo, accelerator="Ctrl-u"
|
310 |
+
)
|
311 |
+
menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
|
312 |
+
|
313 |
+
viewmenu = Menu(menubar, tearoff=0)
|
314 |
+
viewmenu.add_checkbutton(
|
315 |
+
label="Show Grammar",
|
316 |
+
underline=0,
|
317 |
+
variable=self._show_grammar,
|
318 |
+
command=self._toggle_grammar,
|
319 |
+
)
|
320 |
+
viewmenu.add_separator()
|
321 |
+
viewmenu.add_radiobutton(
|
322 |
+
label="Tiny",
|
323 |
+
variable=self._size,
|
324 |
+
underline=0,
|
325 |
+
value=10,
|
326 |
+
command=self.resize,
|
327 |
+
)
|
328 |
+
viewmenu.add_radiobutton(
|
329 |
+
label="Small",
|
330 |
+
variable=self._size,
|
331 |
+
underline=0,
|
332 |
+
value=12,
|
333 |
+
command=self.resize,
|
334 |
+
)
|
335 |
+
viewmenu.add_radiobutton(
|
336 |
+
label="Medium",
|
337 |
+
variable=self._size,
|
338 |
+
underline=0,
|
339 |
+
value=14,
|
340 |
+
command=self.resize,
|
341 |
+
)
|
342 |
+
viewmenu.add_radiobutton(
|
343 |
+
label="Large",
|
344 |
+
variable=self._size,
|
345 |
+
underline=0,
|
346 |
+
value=18,
|
347 |
+
command=self.resize,
|
348 |
+
)
|
349 |
+
viewmenu.add_radiobutton(
|
350 |
+
label="Huge",
|
351 |
+
variable=self._size,
|
352 |
+
underline=0,
|
353 |
+
value=24,
|
354 |
+
command=self.resize,
|
355 |
+
)
|
356 |
+
menubar.add_cascade(label="View", underline=0, menu=viewmenu)
|
357 |
+
|
358 |
+
animatemenu = Menu(menubar, tearoff=0)
|
359 |
+
animatemenu.add_radiobutton(
|
360 |
+
label="No Animation", underline=0, variable=self._animate, value=0
|
361 |
+
)
|
362 |
+
animatemenu.add_radiobutton(
|
363 |
+
label="Slow Animation",
|
364 |
+
underline=0,
|
365 |
+
variable=self._animate,
|
366 |
+
value=20,
|
367 |
+
accelerator="-",
|
368 |
+
)
|
369 |
+
animatemenu.add_radiobutton(
|
370 |
+
label="Normal Animation",
|
371 |
+
underline=0,
|
372 |
+
variable=self._animate,
|
373 |
+
value=10,
|
374 |
+
accelerator="=",
|
375 |
+
)
|
376 |
+
animatemenu.add_radiobutton(
|
377 |
+
label="Fast Animation",
|
378 |
+
underline=0,
|
379 |
+
variable=self._animate,
|
380 |
+
value=4,
|
381 |
+
accelerator="+",
|
382 |
+
)
|
383 |
+
menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
|
384 |
+
|
385 |
+
helpmenu = Menu(menubar, tearoff=0)
|
386 |
+
helpmenu.add_command(label="About", underline=0, command=self.about)
|
387 |
+
helpmenu.add_command(
|
388 |
+
label="Instructions", underline=0, command=self.help, accelerator="F1"
|
389 |
+
)
|
390 |
+
menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
|
391 |
+
|
392 |
+
parent.config(menu=menubar)
|
393 |
+
|
394 |
+
def _init_feedback(self, parent):
|
395 |
+
self._feedbackframe = feedbackframe = Frame(parent)
|
396 |
+
feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3)
|
397 |
+
self._lastoper_label = Label(
|
398 |
+
feedbackframe, text="Last Operation:", font=self._font
|
399 |
+
)
|
400 |
+
self._lastoper_label.pack(side="left")
|
401 |
+
lastoperframe = Frame(feedbackframe, relief="sunken", border=1)
|
402 |
+
lastoperframe.pack(fill="x", side="right", expand=1, padx=5)
|
403 |
+
self._lastoper1 = Label(
|
404 |
+
lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font
|
405 |
+
)
|
406 |
+
self._lastoper2 = Label(
|
407 |
+
lastoperframe,
|
408 |
+
anchor="w",
|
409 |
+
width=30,
|
410 |
+
foreground="#004040",
|
411 |
+
background="#f0f0f0",
|
412 |
+
font=self._font,
|
413 |
+
)
|
414 |
+
self._lastoper1.pack(side="left")
|
415 |
+
self._lastoper2.pack(side="left", fill="x", expand=1)
|
416 |
+
|
417 |
+
def _init_canvas(self, parent):
|
418 |
+
self._cframe = CanvasFrame(
|
419 |
+
parent,
|
420 |
+
background="white",
|
421 |
+
width=525,
|
422 |
+
closeenough=10,
|
423 |
+
border=2,
|
424 |
+
relief="sunken",
|
425 |
+
)
|
426 |
+
self._cframe.pack(expand=1, fill="both", side="top", pady=2)
|
427 |
+
canvas = self._canvas = self._cframe.canvas()
|
428 |
+
|
429 |
+
self._stackwidgets = []
|
430 |
+
self._rtextwidgets = []
|
431 |
+
self._titlebar = canvas.create_rectangle(
|
432 |
+
0, 0, 0, 0, fill="#c0f0f0", outline="black"
|
433 |
+
)
|
434 |
+
self._exprline = canvas.create_line(0, 0, 0, 0, dash=".")
|
435 |
+
self._stacktop = canvas.create_line(0, 0, 0, 0, fill="#408080")
|
436 |
+
size = self._size.get() + 4
|
437 |
+
self._stacklabel = TextWidget(
|
438 |
+
canvas, "Stack", color="#004040", font=self._boldfont
|
439 |
+
)
|
440 |
+
self._rtextlabel = TextWidget(
|
441 |
+
canvas, "Remaining Text", color="#004040", font=self._boldfont
|
442 |
+
)
|
443 |
+
self._cframe.add_widget(self._stacklabel)
|
444 |
+
self._cframe.add_widget(self._rtextlabel)
|
445 |
+
|
446 |
+
#########################################
|
447 |
+
## Main draw procedure
|
448 |
+
#########################################
|
449 |
+
|
450 |
+
def _redraw(self):
|
451 |
+
scrollregion = self._canvas["scrollregion"].split()
|
452 |
+
(cx1, cy1, cx2, cy2) = (int(c) for c in scrollregion)
|
453 |
+
|
454 |
+
# Delete the old stack & rtext widgets.
|
455 |
+
for stackwidget in self._stackwidgets:
|
456 |
+
self._cframe.destroy_widget(stackwidget)
|
457 |
+
self._stackwidgets = []
|
458 |
+
for rtextwidget in self._rtextwidgets:
|
459 |
+
self._cframe.destroy_widget(rtextwidget)
|
460 |
+
self._rtextwidgets = []
|
461 |
+
|
462 |
+
# Position the titlebar & exprline
|
463 |
+
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
464 |
+
y = y2 - y1 + 10
|
465 |
+
self._canvas.coords(self._titlebar, -5000, 0, 5000, y - 4)
|
466 |
+
self._canvas.coords(self._exprline, 0, y * 2 - 10, 5000, y * 2 - 10)
|
467 |
+
|
468 |
+
# Position the titlebar labels..
|
469 |
+
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
470 |
+
self._stacklabel.move(5 - x1, 3 - y1)
|
471 |
+
(x1, y1, x2, y2) = self._rtextlabel.bbox()
|
472 |
+
self._rtextlabel.move(cx2 - x2 - 5, 3 - y1)
|
473 |
+
|
474 |
+
# Draw the stack.
|
475 |
+
stackx = 5
|
476 |
+
for tok in self._parser.stack():
|
477 |
+
if isinstance(tok, Tree):
|
478 |
+
attribs = {
|
479 |
+
"tree_color": "#4080a0",
|
480 |
+
"tree_width": 2,
|
481 |
+
"node_font": self._boldfont,
|
482 |
+
"node_color": "#006060",
|
483 |
+
"leaf_color": "#006060",
|
484 |
+
"leaf_font": self._font,
|
485 |
+
}
|
486 |
+
widget = tree_to_treesegment(self._canvas, tok, **attribs)
|
487 |
+
widget.label()["color"] = "#000000"
|
488 |
+
else:
|
489 |
+
widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
|
490 |
+
widget.bind_click(self._popup_reduce)
|
491 |
+
self._stackwidgets.append(widget)
|
492 |
+
self._cframe.add_widget(widget, stackx, y)
|
493 |
+
stackx = widget.bbox()[2] + 10
|
494 |
+
|
495 |
+
# Draw the remaining text.
|
496 |
+
rtextwidth = 0
|
497 |
+
for tok in self._parser.remaining_text():
|
498 |
+
widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
|
499 |
+
self._rtextwidgets.append(widget)
|
500 |
+
self._cframe.add_widget(widget, rtextwidth, y)
|
501 |
+
rtextwidth = widget.bbox()[2] + 4
|
502 |
+
|
503 |
+
# Allow enough room to shift the next token (for animations)
|
504 |
+
if len(self._rtextwidgets) > 0:
|
505 |
+
stackx += self._rtextwidgets[0].width()
|
506 |
+
|
507 |
+
# Move the remaining text to the correct location (keep it
|
508 |
+
# right-justified, when possible); and move the remaining text
|
509 |
+
# label, if necessary.
|
510 |
+
stackx = max(stackx, self._stacklabel.width() + 25)
|
511 |
+
rlabelwidth = self._rtextlabel.width() + 10
|
512 |
+
if stackx >= cx2 - max(rtextwidth, rlabelwidth):
|
513 |
+
cx2 = stackx + max(rtextwidth, rlabelwidth)
|
514 |
+
for rtextwidget in self._rtextwidgets:
|
515 |
+
rtextwidget.move(4 + cx2 - rtextwidth, 0)
|
516 |
+
self._rtextlabel.move(cx2 - self._rtextlabel.bbox()[2] - 5, 0)
|
517 |
+
|
518 |
+
midx = (stackx + cx2 - max(rtextwidth, rlabelwidth)) / 2
|
519 |
+
self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
|
520 |
+
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
521 |
+
|
522 |
+
# Set up binding to allow them to shift a token by dragging it.
|
523 |
+
if len(self._rtextwidgets) > 0:
|
524 |
+
|
525 |
+
def drag_shift(widget, midx=midx, self=self):
|
526 |
+
if widget.bbox()[0] < midx:
|
527 |
+
self.shift()
|
528 |
+
else:
|
529 |
+
self._redraw()
|
530 |
+
|
531 |
+
self._rtextwidgets[0].bind_drag(drag_shift)
|
532 |
+
self._rtextwidgets[0].bind_click(self.shift)
|
533 |
+
|
534 |
+
# Draw the stack top.
|
535 |
+
self._highlight_productions()
|
536 |
+
|
537 |
+
def _draw_stack_top(self, widget):
|
538 |
+
# hack..
|
539 |
+
midx = widget.bbox()[2] + 50
|
540 |
+
self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
|
541 |
+
|
542 |
+
def _highlight_productions(self):
|
543 |
+
# Highlight the productions that can be reduced.
|
544 |
+
self._prodlist.selection_clear(0, "end")
|
545 |
+
for prod in self._parser.reducible_productions():
|
546 |
+
index = self._productions.index(prod)
|
547 |
+
self._prodlist.selection_set(index)
|
548 |
+
|
549 |
+
#########################################
|
550 |
+
## Button Callbacks
|
551 |
+
#########################################
|
552 |
+
|
553 |
+
def destroy(self, *e):
|
554 |
+
if self._top is None:
|
555 |
+
return
|
556 |
+
self._top.destroy()
|
557 |
+
self._top = None
|
558 |
+
|
559 |
+
def reset(self, *e):
|
560 |
+
self._parser.initialize(self._sent)
|
561 |
+
self._lastoper1["text"] = "Reset App"
|
562 |
+
self._lastoper2["text"] = ""
|
563 |
+
self._redraw()
|
564 |
+
|
565 |
+
def step(self, *e):
|
566 |
+
if self.reduce():
|
567 |
+
return True
|
568 |
+
elif self.shift():
|
569 |
+
return True
|
570 |
+
else:
|
571 |
+
if list(self._parser.parses()):
|
572 |
+
self._lastoper1["text"] = "Finished:"
|
573 |
+
self._lastoper2["text"] = "Success"
|
574 |
+
else:
|
575 |
+
self._lastoper1["text"] = "Finished:"
|
576 |
+
self._lastoper2["text"] = "Failure"
|
577 |
+
|
578 |
+
def shift(self, *e):
|
579 |
+
if self._animating_lock:
|
580 |
+
return
|
581 |
+
if self._parser.shift():
|
582 |
+
tok = self._parser.stack()[-1]
|
583 |
+
self._lastoper1["text"] = "Shift:"
|
584 |
+
self._lastoper2["text"] = "%r" % tok
|
585 |
+
if self._animate.get():
|
586 |
+
self._animate_shift()
|
587 |
+
else:
|
588 |
+
self._redraw()
|
589 |
+
return True
|
590 |
+
return False
|
591 |
+
|
592 |
+
def reduce(self, *e):
|
593 |
+
if self._animating_lock:
|
594 |
+
return
|
595 |
+
production = self._parser.reduce()
|
596 |
+
if production:
|
597 |
+
self._lastoper1["text"] = "Reduce:"
|
598 |
+
self._lastoper2["text"] = "%s" % production
|
599 |
+
if self._animate.get():
|
600 |
+
self._animate_reduce()
|
601 |
+
else:
|
602 |
+
self._redraw()
|
603 |
+
return production
|
604 |
+
|
605 |
+
def undo(self, *e):
|
606 |
+
if self._animating_lock:
|
607 |
+
return
|
608 |
+
if self._parser.undo():
|
609 |
+
self._redraw()
|
610 |
+
|
611 |
+
def postscript(self, *e):
|
612 |
+
self._cframe.print_to_file()
|
613 |
+
|
614 |
+
def mainloop(self, *args, **kwargs):
|
615 |
+
"""
|
616 |
+
Enter the Tkinter mainloop. This function must be called if
|
617 |
+
this demo is created from a non-interactive program (e.g.
|
618 |
+
from a secript); otherwise, the demo will close as soon as
|
619 |
+
the script completes.
|
620 |
+
"""
|
621 |
+
if in_idle():
|
622 |
+
return
|
623 |
+
self._top.mainloop(*args, **kwargs)
|
624 |
+
|
625 |
+
#########################################
|
626 |
+
## Menubar callbacks
|
627 |
+
#########################################
|
628 |
+
|
629 |
+
def resize(self, size=None):
|
630 |
+
if size is not None:
|
631 |
+
self._size.set(size)
|
632 |
+
size = self._size.get()
|
633 |
+
self._font.configure(size=-(abs(size)))
|
634 |
+
self._boldfont.configure(size=-(abs(size)))
|
635 |
+
self._sysfont.configure(size=-(abs(size)))
|
636 |
+
|
637 |
+
# self._stacklabel['font'] = ('helvetica', -size-4, 'bold')
|
638 |
+
# self._rtextlabel['font'] = ('helvetica', -size-4, 'bold')
|
639 |
+
# self._lastoper_label['font'] = ('helvetica', -size)
|
640 |
+
# self._lastoper1['font'] = ('helvetica', -size)
|
641 |
+
# self._lastoper2['font'] = ('helvetica', -size)
|
642 |
+
# self._prodlist['font'] = ('helvetica', -size)
|
643 |
+
# self._prodlist_label['font'] = ('helvetica', -size-2, 'bold')
|
644 |
+
self._redraw()
|
645 |
+
|
646 |
+
def help(self, *e):
|
647 |
+
# The default font's not very legible; try using 'fixed' instead.
|
648 |
+
try:
|
649 |
+
ShowText(
|
650 |
+
self._top,
|
651 |
+
"Help: Shift-Reduce Parser Application",
|
652 |
+
(__doc__ or "").strip(),
|
653 |
+
width=75,
|
654 |
+
font="fixed",
|
655 |
+
)
|
656 |
+
except:
|
657 |
+
ShowText(
|
658 |
+
self._top,
|
659 |
+
"Help: Shift-Reduce Parser Application",
|
660 |
+
(__doc__ or "").strip(),
|
661 |
+
width=75,
|
662 |
+
)
|
663 |
+
|
664 |
+
def about(self, *e):
|
665 |
+
ABOUT = "NLTK Shift-Reduce Parser Application\n" + "Written by Edward Loper"
|
666 |
+
TITLE = "About: Shift-Reduce Parser Application"
|
667 |
+
try:
|
668 |
+
from tkinter.messagebox import Message
|
669 |
+
|
670 |
+
Message(message=ABOUT, title=TITLE).show()
|
671 |
+
except:
|
672 |
+
ShowText(self._top, TITLE, ABOUT)
|
673 |
+
|
674 |
+
def edit_grammar(self, *e):
|
675 |
+
CFGEditor(self._top, self._parser.grammar(), self.set_grammar)
|
676 |
+
|
677 |
+
def set_grammar(self, grammar):
|
678 |
+
self._parser.set_grammar(grammar)
|
679 |
+
self._productions = list(grammar.productions())
|
680 |
+
self._prodlist.delete(0, "end")
|
681 |
+
for production in self._productions:
|
682 |
+
self._prodlist.insert("end", (" %s" % production))
|
683 |
+
|
684 |
+
def edit_sentence(self, *e):
|
685 |
+
sentence = " ".join(self._sent)
|
686 |
+
title = "Edit Text"
|
687 |
+
instr = "Enter a new sentence to parse."
|
688 |
+
EntryDialog(self._top, sentence, instr, self.set_sentence, title)
|
689 |
+
|
690 |
+
def set_sentence(self, sent):
|
691 |
+
self._sent = sent.split() # [XX] use tagged?
|
692 |
+
self.reset()
|
693 |
+
|
694 |
+
#########################################
|
695 |
+
## Reduce Production Selection
|
696 |
+
#########################################
|
697 |
+
|
698 |
+
def _toggle_grammar(self, *e):
|
699 |
+
if self._show_grammar.get():
|
700 |
+
self._prodframe.pack(
|
701 |
+
fill="both", side="left", padx=2, after=self._feedbackframe
|
702 |
+
)
|
703 |
+
self._lastoper1["text"] = "Show Grammar"
|
704 |
+
else:
|
705 |
+
self._prodframe.pack_forget()
|
706 |
+
self._lastoper1["text"] = "Hide Grammar"
|
707 |
+
self._lastoper2["text"] = ""
|
708 |
+
|
709 |
+
def _prodlist_select(self, event):
|
710 |
+
selection = self._prodlist.curselection()
|
711 |
+
if len(selection) != 1:
|
712 |
+
return
|
713 |
+
index = int(selection[0])
|
714 |
+
production = self._parser.reduce(self._productions[index])
|
715 |
+
if production:
|
716 |
+
self._lastoper1["text"] = "Reduce:"
|
717 |
+
self._lastoper2["text"] = "%s" % production
|
718 |
+
if self._animate.get():
|
719 |
+
self._animate_reduce()
|
720 |
+
else:
|
721 |
+
self._redraw()
|
722 |
+
else:
|
723 |
+
# Reset the production selections.
|
724 |
+
self._prodlist.selection_clear(0, "end")
|
725 |
+
for prod in self._parser.reducible_productions():
|
726 |
+
index = self._productions.index(prod)
|
727 |
+
self._prodlist.selection_set(index)
|
728 |
+
|
729 |
+
def _popup_reduce(self, widget):
|
730 |
+
# Remove old commands.
|
731 |
+
productions = self._parser.reducible_productions()
|
732 |
+
if len(productions) == 0:
|
733 |
+
return
|
734 |
+
|
735 |
+
self._reduce_menu.delete(0, "end")
|
736 |
+
for production in productions:
|
737 |
+
self._reduce_menu.add_command(label=str(production), command=self.reduce)
|
738 |
+
self._reduce_menu.post(
|
739 |
+
self._canvas.winfo_pointerx(), self._canvas.winfo_pointery()
|
740 |
+
)
|
741 |
+
|
742 |
+
#########################################
|
743 |
+
## Animations
|
744 |
+
#########################################
|
745 |
+
|
746 |
+
def _animate_shift(self):
|
747 |
+
# What widget are we shifting?
|
748 |
+
widget = self._rtextwidgets[0]
|
749 |
+
|
750 |
+
# Where are we shifting from & to?
|
751 |
+
right = widget.bbox()[0]
|
752 |
+
if len(self._stackwidgets) == 0:
|
753 |
+
left = 5
|
754 |
+
else:
|
755 |
+
left = self._stackwidgets[-1].bbox()[2] + 10
|
756 |
+
|
757 |
+
# Start animating.
|
758 |
+
dt = self._animate.get()
|
759 |
+
dx = (left - right) * 1.0 / dt
|
760 |
+
self._animate_shift_frame(dt, widget, dx)
|
761 |
+
|
762 |
+
def _animate_shift_frame(self, frame, widget, dx):
|
763 |
+
if frame > 0:
|
764 |
+
self._animating_lock = 1
|
765 |
+
widget.move(dx, 0)
|
766 |
+
self._top.after(10, self._animate_shift_frame, frame - 1, widget, dx)
|
767 |
+
else:
|
768 |
+
# but: stacktop??
|
769 |
+
|
770 |
+
# Shift the widget to the stack.
|
771 |
+
del self._rtextwidgets[0]
|
772 |
+
self._stackwidgets.append(widget)
|
773 |
+
self._animating_lock = 0
|
774 |
+
|
775 |
+
# Display the available productions.
|
776 |
+
self._draw_stack_top(widget)
|
777 |
+
self._highlight_productions()
|
778 |
+
|
779 |
+
def _animate_reduce(self):
|
780 |
+
# What widgets are we shifting?
|
781 |
+
numwidgets = len(self._parser.stack()[-1]) # number of children
|
782 |
+
widgets = self._stackwidgets[-numwidgets:]
|
783 |
+
|
784 |
+
# How far are we moving?
|
785 |
+
if isinstance(widgets[0], TreeSegmentWidget):
|
786 |
+
ydist = 15 + widgets[0].label().height()
|
787 |
+
else:
|
788 |
+
ydist = 15 + widgets[0].height()
|
789 |
+
|
790 |
+
# Start animating.
|
791 |
+
dt = self._animate.get()
|
792 |
+
dy = ydist * 2.0 / dt
|
793 |
+
self._animate_reduce_frame(dt / 2, widgets, dy)
|
794 |
+
|
795 |
+
def _animate_reduce_frame(self, frame, widgets, dy):
|
796 |
+
if frame > 0:
|
797 |
+
self._animating_lock = 1
|
798 |
+
for widget in widgets:
|
799 |
+
widget.move(0, dy)
|
800 |
+
self._top.after(10, self._animate_reduce_frame, frame - 1, widgets, dy)
|
801 |
+
else:
|
802 |
+
del self._stackwidgets[-len(widgets) :]
|
803 |
+
for widget in widgets:
|
804 |
+
self._cframe.remove_widget(widget)
|
805 |
+
tok = self._parser.stack()[-1]
|
806 |
+
if not isinstance(tok, Tree):
|
807 |
+
raise ValueError()
|
808 |
+
label = TextWidget(
|
809 |
+
self._canvas, str(tok.label()), color="#006060", font=self._boldfont
|
810 |
+
)
|
811 |
+
widget = TreeSegmentWidget(self._canvas, label, widgets, width=2)
|
812 |
+
(x1, y1, x2, y2) = self._stacklabel.bbox()
|
813 |
+
y = y2 - y1 + 10
|
814 |
+
if not self._stackwidgets:
|
815 |
+
x = 5
|
816 |
+
else:
|
817 |
+
x = self._stackwidgets[-1].bbox()[2] + 10
|
818 |
+
self._cframe.add_widget(widget, x, y)
|
819 |
+
self._stackwidgets.append(widget)
|
820 |
+
|
821 |
+
# Display the available productions.
|
822 |
+
self._draw_stack_top(widget)
|
823 |
+
self._highlight_productions()
|
824 |
+
|
825 |
+
# # Delete the old widgets..
|
826 |
+
# del self._stackwidgets[-len(widgets):]
|
827 |
+
# for widget in widgets:
|
828 |
+
# self._cframe.destroy_widget(widget)
|
829 |
+
#
|
830 |
+
# # Make a new one.
|
831 |
+
# tok = self._parser.stack()[-1]
|
832 |
+
# if isinstance(tok, Tree):
|
833 |
+
# attribs = {'tree_color': '#4080a0', 'tree_width': 2,
|
834 |
+
# 'node_font': bold, 'node_color': '#006060',
|
835 |
+
# 'leaf_color': '#006060', 'leaf_font':self._font}
|
836 |
+
# widget = tree_to_treesegment(self._canvas, tok.type(),
|
837 |
+
# **attribs)
|
838 |
+
# widget.node()['color'] = '#000000'
|
839 |
+
# else:
|
840 |
+
# widget = TextWidget(self._canvas, tok.type(),
|
841 |
+
# color='#000000', font=self._font)
|
842 |
+
# widget.bind_click(self._popup_reduce)
|
843 |
+
# (x1, y1, x2, y2) = self._stacklabel.bbox()
|
844 |
+
# y = y2-y1+10
|
845 |
+
# if not self._stackwidgets: x = 5
|
846 |
+
# else: x = self._stackwidgets[-1].bbox()[2] + 10
|
847 |
+
# self._cframe.add_widget(widget, x, y)
|
848 |
+
# self._stackwidgets.append(widget)
|
849 |
+
|
850 |
+
# self._redraw()
|
851 |
+
self._animating_lock = 0
|
852 |
+
|
853 |
+
#########################################
|
854 |
+
## Hovering.
|
855 |
+
#########################################
|
856 |
+
|
857 |
+
def _highlight_hover(self, event):
|
858 |
+
# What production are we hovering over?
|
859 |
+
index = self._prodlist.nearest(event.y)
|
860 |
+
if self._hover == index:
|
861 |
+
return
|
862 |
+
|
863 |
+
# Clear any previous hover highlighting.
|
864 |
+
self._clear_hover()
|
865 |
+
|
866 |
+
# If the production corresponds to an available reduction,
|
867 |
+
# highlight the stack.
|
868 |
+
selection = [int(s) for s in self._prodlist.curselection()]
|
869 |
+
if index in selection:
|
870 |
+
rhslen = len(self._productions[index].rhs())
|
871 |
+
for stackwidget in self._stackwidgets[-rhslen:]:
|
872 |
+
if isinstance(stackwidget, TreeSegmentWidget):
|
873 |
+
stackwidget.label()["color"] = "#00a000"
|
874 |
+
else:
|
875 |
+
stackwidget["color"] = "#00a000"
|
876 |
+
|
877 |
+
# Remember what production we're hovering over.
|
878 |
+
self._hover = index
|
879 |
+
|
880 |
+
def _clear_hover(self, *event):
|
881 |
+
# Clear any previous hover highlighting.
|
882 |
+
if self._hover == -1:
|
883 |
+
return
|
884 |
+
self._hover = -1
|
885 |
+
for stackwidget in self._stackwidgets:
|
886 |
+
if isinstance(stackwidget, TreeSegmentWidget):
|
887 |
+
stackwidget.label()["color"] = "black"
|
888 |
+
else:
|
889 |
+
stackwidget["color"] = "black"
|
890 |
+
|
891 |
+
|
892 |
+
def app():
|
893 |
+
"""
|
894 |
+
Create a shift reduce parser app, using a simple grammar and
|
895 |
+
text.
|
896 |
+
"""
|
897 |
+
|
898 |
+
from nltk.grammar import CFG, Nonterminal, Production
|
899 |
+
|
900 |
+
nonterminals = "S VP NP PP P N Name V Det"
|
901 |
+
(S, VP, NP, PP, P, N, Name, V, Det) = (Nonterminal(s) for s in nonterminals.split())
|
902 |
+
|
903 |
+
productions = (
|
904 |
+
# Syntactic Productions
|
905 |
+
Production(S, [NP, VP]),
|
906 |
+
Production(NP, [Det, N]),
|
907 |
+
Production(NP, [NP, PP]),
|
908 |
+
Production(VP, [VP, PP]),
|
909 |
+
Production(VP, [V, NP, PP]),
|
910 |
+
Production(VP, [V, NP]),
|
911 |
+
Production(PP, [P, NP]),
|
912 |
+
# Lexical Productions
|
913 |
+
Production(NP, ["I"]),
|
914 |
+
Production(Det, ["the"]),
|
915 |
+
Production(Det, ["a"]),
|
916 |
+
Production(N, ["man"]),
|
917 |
+
Production(V, ["saw"]),
|
918 |
+
Production(P, ["in"]),
|
919 |
+
Production(P, ["with"]),
|
920 |
+
Production(N, ["park"]),
|
921 |
+
Production(N, ["dog"]),
|
922 |
+
Production(N, ["statue"]),
|
923 |
+
Production(Det, ["my"]),
|
924 |
+
)
|
925 |
+
|
926 |
+
grammar = CFG(S, productions)
|
927 |
+
|
928 |
+
# tokenize the sentence
|
929 |
+
sent = "my dog saw a man in the park with a statue".split()
|
930 |
+
|
931 |
+
ShiftReduceApp(grammar, sent).mainloop()
|
932 |
+
|
933 |
+
|
934 |
+
if __name__ == "__main__":
|
935 |
+
app()
|
936 |
+
|
937 |
+
__all__ = ["app"]
|
pipeline/nltk/app/wordfreq_app.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Wordfreq Application
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Sumukh Ghodke <[email protected]>
|
5 |
+
# URL: <https://www.nltk.org/>
|
6 |
+
# For license information, see LICENSE.TXT
|
7 |
+
|
8 |
+
from matplotlib import pylab
|
9 |
+
|
10 |
+
from nltk.corpus import gutenberg
|
11 |
+
from nltk.text import Text
|
12 |
+
|
13 |
+
|
14 |
+
def plot_word_freq_dist(text):
|
15 |
+
fd = text.vocab()
|
16 |
+
|
17 |
+
samples = [item for item, _ in fd.most_common(50)]
|
18 |
+
values = [fd[sample] for sample in samples]
|
19 |
+
values = [sum(values[: i + 1]) * 100.0 / fd.N() for i in range(len(values))]
|
20 |
+
pylab.title(text.name)
|
21 |
+
pylab.xlabel("Samples")
|
22 |
+
pylab.ylabel("Cumulative Percentage")
|
23 |
+
pylab.plot(values)
|
24 |
+
pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90)
|
25 |
+
pylab.show()
|
26 |
+
|
27 |
+
|
28 |
+
def app():
|
29 |
+
t1 = Text(gutenberg.words("melville-moby_dick.txt"))
|
30 |
+
plot_word_freq_dist(t1)
|
31 |
+
|
32 |
+
|
33 |
+
if __name__ == "__main__":
|
34 |
+
app()
|
35 |
+
|
36 |
+
__all__ = ["app"]
|
pipeline/nltk/app/wordnet_app.py
ADDED
@@ -0,0 +1,1005 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: WordNet Browser Application
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Jussi Salmela <[email protected]>
|
5 |
+
# Paul Bone <[email protected]>
|
6 |
+
# URL: <https://www.nltk.org/>
|
7 |
+
# For license information, see LICENSE.TXT
|
8 |
+
|
9 |
+
"""
|
10 |
+
A WordNet Browser application which launches the default browser
|
11 |
+
(if it is not already running) and opens a new tab with a connection
|
12 |
+
to http://localhost:port/ . It also starts an HTTP server on the
|
13 |
+
specified port and begins serving browser requests. The default
|
14 |
+
port is 8000. (For command-line help, run "python wordnet -h")
|
15 |
+
This application requires that the user's web browser supports
|
16 |
+
Javascript.
|
17 |
+
|
18 |
+
BrowServer is a server for browsing the NLTK Wordnet database It first
|
19 |
+
launches a browser client to be used for browsing and then starts
|
20 |
+
serving the requests of that and maybe other clients
|
21 |
+
|
22 |
+
Usage::
|
23 |
+
|
24 |
+
browserver.py -h
|
25 |
+
browserver.py [-s] [-p <port>]
|
26 |
+
|
27 |
+
Options::
|
28 |
+
|
29 |
+
-h or --help
|
30 |
+
Display this help message.
|
31 |
+
|
32 |
+
-l <file> or --log-file <file>
|
33 |
+
Logs messages to the given file, If this option is not specified
|
34 |
+
messages are silently dropped.
|
35 |
+
|
36 |
+
-p <port> or --port <port>
|
37 |
+
Run the web server on this TCP port, defaults to 8000.
|
38 |
+
|
39 |
+
-s or --server-mode
|
40 |
+
Do not start a web browser, and do not allow a user to
|
41 |
+
shutdown the server through the web interface.
|
42 |
+
"""
|
43 |
+
# TODO: throughout this package variable names and docstrings need
|
44 |
+
# modifying to be compliant with NLTK's coding standards. Tests also
|
45 |
+
# need to be develop to ensure this continues to work in the face of
|
46 |
+
# changes to other NLTK packages.
|
47 |
+
|
48 |
+
import base64
|
49 |
+
import copy
|
50 |
+
import getopt
|
51 |
+
import io
|
52 |
+
import os
|
53 |
+
import pickle
|
54 |
+
import sys
|
55 |
+
import threading
|
56 |
+
import time
|
57 |
+
import webbrowser
|
58 |
+
from collections import defaultdict
|
59 |
+
from http.server import BaseHTTPRequestHandler, HTTPServer
|
60 |
+
|
61 |
+
# Allow this program to run inside the NLTK source tree.
|
62 |
+
from sys import argv
|
63 |
+
from urllib.parse import unquote_plus
|
64 |
+
|
65 |
+
from nltk.corpus import wordnet as wn
|
66 |
+
from nltk.corpus.reader.wordnet import Lemma, Synset
|
67 |
+
|
68 |
+
firstClient = True
|
69 |
+
|
70 |
+
# True if we're not also running a web browser. The value f server_mode
|
71 |
+
# gets set by demo().
|
72 |
+
server_mode = None
|
73 |
+
|
74 |
+
# If set this is a file object for writing log messages.
|
75 |
+
logfile = None
|
76 |
+
|
77 |
+
|
78 |
+
class MyServerHandler(BaseHTTPRequestHandler):
|
79 |
+
def do_HEAD(self):
|
80 |
+
self.send_head()
|
81 |
+
|
82 |
+
def do_GET(self):
|
83 |
+
global firstClient
|
84 |
+
sp = self.path[1:]
|
85 |
+
if unquote_plus(sp) == "SHUTDOWN THE SERVER":
|
86 |
+
if server_mode:
|
87 |
+
page = "Server must be killed with SIGTERM."
|
88 |
+
type = "text/plain"
|
89 |
+
else:
|
90 |
+
print("Server shutting down!")
|
91 |
+
os._exit(0)
|
92 |
+
|
93 |
+
elif sp == "": # First request.
|
94 |
+
type = "text/html"
|
95 |
+
if not server_mode and firstClient:
|
96 |
+
firstClient = False
|
97 |
+
page = get_static_index_page(True)
|
98 |
+
else:
|
99 |
+
page = get_static_index_page(False)
|
100 |
+
word = "green"
|
101 |
+
|
102 |
+
elif sp.endswith(".html"): # Trying to fetch a HTML file TODO:
|
103 |
+
type = "text/html"
|
104 |
+
usp = unquote_plus(sp)
|
105 |
+
if usp == "NLTK Wordnet Browser Database Info.html":
|
106 |
+
word = "* Database Info *"
|
107 |
+
if os.path.isfile(usp):
|
108 |
+
with open(usp) as infile:
|
109 |
+
page = infile.read()
|
110 |
+
else:
|
111 |
+
page = (
|
112 |
+
(html_header % word) + "<p>The database info file:"
|
113 |
+
"<p><b>"
|
114 |
+
+ usp
|
115 |
+
+ "</b>"
|
116 |
+
+ "<p>was not found. Run this:"
|
117 |
+
+ "<p><b>python dbinfo_html.py</b>"
|
118 |
+
+ "<p>to produce it."
|
119 |
+
+ html_trailer
|
120 |
+
)
|
121 |
+
else:
|
122 |
+
# Handle files here.
|
123 |
+
word = sp
|
124 |
+
try:
|
125 |
+
page = get_static_page_by_path(usp)
|
126 |
+
except FileNotFoundError:
|
127 |
+
page = "Internal error: Path for static page '%s' is unknown" % usp
|
128 |
+
# Set type to plain to prevent XSS by printing the path as HTML
|
129 |
+
type = "text/plain"
|
130 |
+
elif sp.startswith("search"):
|
131 |
+
# This doesn't seem to work with MWEs.
|
132 |
+
type = "text/html"
|
133 |
+
parts = (sp.split("?")[1]).split("&")
|
134 |
+
word = [
|
135 |
+
p.split("=")[1].replace("+", " ")
|
136 |
+
for p in parts
|
137 |
+
if p.startswith("nextWord")
|
138 |
+
][0]
|
139 |
+
page, word = page_from_word(word)
|
140 |
+
elif sp.startswith("lookup_"):
|
141 |
+
# TODO add a variation of this that takes a non ecoded word or MWE.
|
142 |
+
type = "text/html"
|
143 |
+
sp = sp[len("lookup_") :]
|
144 |
+
page, word = page_from_href(sp)
|
145 |
+
elif sp == "start_page":
|
146 |
+
# if this is the first request we should display help
|
147 |
+
# information, and possibly set a default word.
|
148 |
+
type = "text/html"
|
149 |
+
page, word = page_from_word("wordnet")
|
150 |
+
else:
|
151 |
+
type = "text/plain"
|
152 |
+
page = "Could not parse request: '%s'" % sp
|
153 |
+
|
154 |
+
# Send result.
|
155 |
+
self.send_head(type)
|
156 |
+
self.wfile.write(page.encode("utf8"))
|
157 |
+
|
158 |
+
def send_head(self, type=None):
|
159 |
+
self.send_response(200)
|
160 |
+
self.send_header("Content-type", type)
|
161 |
+
self.end_headers()
|
162 |
+
|
163 |
+
def log_message(self, format, *args):
|
164 |
+
global logfile
|
165 |
+
|
166 |
+
if logfile:
|
167 |
+
logfile.write(
|
168 |
+
"%s - - [%s] %s\n"
|
169 |
+
% (self.address_string(), self.log_date_time_string(), format % args)
|
170 |
+
)
|
171 |
+
|
172 |
+
|
173 |
+
def get_unique_counter_from_url(sp):
|
174 |
+
"""
|
175 |
+
Extract the unique counter from the URL if it has one. Otherwise return
|
176 |
+
null.
|
177 |
+
"""
|
178 |
+
pos = sp.rfind("%23")
|
179 |
+
if pos != -1:
|
180 |
+
return int(sp[(pos + 3) :])
|
181 |
+
else:
|
182 |
+
return None
|
183 |
+
|
184 |
+
|
185 |
+
def wnb(port=8000, runBrowser=True, logfilename=None):
|
186 |
+
"""
|
187 |
+
Run NLTK Wordnet Browser Server.
|
188 |
+
|
189 |
+
:param port: The port number for the server to listen on, defaults to
|
190 |
+
8000
|
191 |
+
:type port: int
|
192 |
+
|
193 |
+
:param runBrowser: True to start a web browser and point it at the web
|
194 |
+
server.
|
195 |
+
:type runBrowser: bool
|
196 |
+
"""
|
197 |
+
# The webbrowser module is unpredictable, typically it blocks if it uses
|
198 |
+
# a console web browser, and doesn't block if it uses a GUI webbrowser,
|
199 |
+
# so we need to force it to have a clear correct behaviour.
|
200 |
+
#
|
201 |
+
# Normally the server should run for as long as the user wants. they
|
202 |
+
# should idealy be able to control this from the UI by closing the
|
203 |
+
# window or tab. Second best would be clicking a button to say
|
204 |
+
# 'Shutdown' that first shutsdown the server and closes the window or
|
205 |
+
# tab, or exits the text-mode browser. Both of these are unfreasable.
|
206 |
+
#
|
207 |
+
# The next best alternative is to start the server, have it close when
|
208 |
+
# it receives SIGTERM (default), and run the browser as well. The user
|
209 |
+
# may have to shutdown both programs.
|
210 |
+
#
|
211 |
+
# Since webbrowser may block, and the webserver will block, we must run
|
212 |
+
# them in separate threads.
|
213 |
+
#
|
214 |
+
global server_mode, logfile
|
215 |
+
server_mode = not runBrowser
|
216 |
+
|
217 |
+
# Setup logging.
|
218 |
+
if logfilename:
|
219 |
+
try:
|
220 |
+
logfile = open(logfilename, "a", 1) # 1 means 'line buffering'
|
221 |
+
except OSError as e:
|
222 |
+
sys.stderr.write("Couldn't open %s for writing: %s", logfilename, e)
|
223 |
+
sys.exit(1)
|
224 |
+
else:
|
225 |
+
logfile = None
|
226 |
+
|
227 |
+
# Compute URL and start web browser
|
228 |
+
url = "http://localhost:" + str(port)
|
229 |
+
|
230 |
+
server_ready = None
|
231 |
+
browser_thread = None
|
232 |
+
|
233 |
+
if runBrowser:
|
234 |
+
server_ready = threading.Event()
|
235 |
+
browser_thread = startBrowser(url, server_ready)
|
236 |
+
|
237 |
+
# Start the server.
|
238 |
+
server = HTTPServer(("", port), MyServerHandler)
|
239 |
+
if logfile:
|
240 |
+
logfile.write("NLTK Wordnet browser server running serving: %s\n" % url)
|
241 |
+
if runBrowser:
|
242 |
+
server_ready.set()
|
243 |
+
|
244 |
+
try:
|
245 |
+
server.serve_forever()
|
246 |
+
except KeyboardInterrupt:
|
247 |
+
pass
|
248 |
+
|
249 |
+
if runBrowser:
|
250 |
+
browser_thread.join()
|
251 |
+
|
252 |
+
if logfile:
|
253 |
+
logfile.close()
|
254 |
+
|
255 |
+
|
256 |
+
def startBrowser(url, server_ready):
|
257 |
+
def run():
|
258 |
+
server_ready.wait()
|
259 |
+
time.sleep(1) # Wait a little bit more, there's still the chance of
|
260 |
+
# a race condition.
|
261 |
+
webbrowser.open(url, new=2, autoraise=1)
|
262 |
+
|
263 |
+
t = threading.Thread(target=run)
|
264 |
+
t.start()
|
265 |
+
return t
|
266 |
+
|
267 |
+
|
268 |
+
#####################################################################
|
269 |
+
# Utilities
|
270 |
+
#####################################################################
|
271 |
+
|
272 |
+
|
273 |
+
"""
|
274 |
+
WordNet Browser Utilities.
|
275 |
+
|
276 |
+
This provides a backend to both wxbrowse and browserver.py.
|
277 |
+
"""
|
278 |
+
|
279 |
+
################################################################################
|
280 |
+
#
|
281 |
+
# Main logic for wordnet browser.
|
282 |
+
#
|
283 |
+
|
284 |
+
# This is wrapped inside a function since wn is only available if the
|
285 |
+
# WordNet corpus is installed.
|
286 |
+
def _pos_tuples():
|
287 |
+
return [
|
288 |
+
(wn.NOUN, "N", "noun"),
|
289 |
+
(wn.VERB, "V", "verb"),
|
290 |
+
(wn.ADJ, "J", "adj"),
|
291 |
+
(wn.ADV, "R", "adv"),
|
292 |
+
]
|
293 |
+
|
294 |
+
|
295 |
+
def _pos_match(pos_tuple):
|
296 |
+
"""
|
297 |
+
This function returns the complete pos tuple for the partial pos
|
298 |
+
tuple given to it. It attempts to match it against the first
|
299 |
+
non-null component of the given pos tuple.
|
300 |
+
"""
|
301 |
+
if pos_tuple[0] == "s":
|
302 |
+
pos_tuple = ("a", pos_tuple[1], pos_tuple[2])
|
303 |
+
for n, x in enumerate(pos_tuple):
|
304 |
+
if x is not None:
|
305 |
+
break
|
306 |
+
for pt in _pos_tuples():
|
307 |
+
if pt[n] == pos_tuple[n]:
|
308 |
+
return pt
|
309 |
+
return None
|
310 |
+
|
311 |
+
|
312 |
+
HYPONYM = 0
|
313 |
+
HYPERNYM = 1
|
314 |
+
CLASS_REGIONAL = 2
|
315 |
+
PART_HOLONYM = 3
|
316 |
+
PART_MERONYM = 4
|
317 |
+
ATTRIBUTE = 5
|
318 |
+
SUBSTANCE_HOLONYM = 6
|
319 |
+
SUBSTANCE_MERONYM = 7
|
320 |
+
MEMBER_HOLONYM = 8
|
321 |
+
MEMBER_MERONYM = 9
|
322 |
+
VERB_GROUP = 10
|
323 |
+
INSTANCE_HYPONYM = 12
|
324 |
+
INSTANCE_HYPERNYM = 13
|
325 |
+
CAUSE = 14
|
326 |
+
ALSO_SEE = 15
|
327 |
+
SIMILAR = 16
|
328 |
+
ENTAILMENT = 17
|
329 |
+
ANTONYM = 18
|
330 |
+
FRAMES = 19
|
331 |
+
PERTAINYM = 20
|
332 |
+
|
333 |
+
CLASS_CATEGORY = 21
|
334 |
+
CLASS_USAGE = 22
|
335 |
+
CLASS_REGIONAL = 23
|
336 |
+
CLASS_USAGE = 24
|
337 |
+
CLASS_CATEGORY = 11
|
338 |
+
|
339 |
+
DERIVATIONALLY_RELATED_FORM = 25
|
340 |
+
|
341 |
+
INDIRECT_HYPERNYMS = 26
|
342 |
+
|
343 |
+
|
344 |
+
def lemma_property(word, synset, func):
|
345 |
+
def flattern(l):
|
346 |
+
if l == []:
|
347 |
+
return []
|
348 |
+
else:
|
349 |
+
return l[0] + flattern(l[1:])
|
350 |
+
|
351 |
+
return flattern([func(l) for l in synset.lemmas() if l.name == word])
|
352 |
+
|
353 |
+
|
354 |
+
def rebuild_tree(orig_tree):
|
355 |
+
node = orig_tree[0]
|
356 |
+
children = orig_tree[1:]
|
357 |
+
return (node, [rebuild_tree(t) for t in children])
|
358 |
+
|
359 |
+
|
360 |
+
def get_relations_data(word, synset):
|
361 |
+
"""
|
362 |
+
Get synset relations data for a synset. Note that this doesn't
|
363 |
+
yet support things such as full hyponym vs direct hyponym.
|
364 |
+
"""
|
365 |
+
if synset.pos() == wn.NOUN:
|
366 |
+
return (
|
367 |
+
(HYPONYM, "Hyponyms", synset.hyponyms()),
|
368 |
+
(INSTANCE_HYPONYM, "Instance hyponyms", synset.instance_hyponyms()),
|
369 |
+
(HYPERNYM, "Direct hypernyms", synset.hypernyms()),
|
370 |
+
(
|
371 |
+
INDIRECT_HYPERNYMS,
|
372 |
+
"Indirect hypernyms",
|
373 |
+
rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1],
|
374 |
+
),
|
375 |
+
# hypernyms', 'Sister terms',
|
376 |
+
(INSTANCE_HYPERNYM, "Instance hypernyms", synset.instance_hypernyms()),
|
377 |
+
# (CLASS_REGIONAL, ['domain term region'], ),
|
378 |
+
(PART_HOLONYM, "Part holonyms", synset.part_holonyms()),
|
379 |
+
(PART_MERONYM, "Part meronyms", synset.part_meronyms()),
|
380 |
+
(SUBSTANCE_HOLONYM, "Substance holonyms", synset.substance_holonyms()),
|
381 |
+
(SUBSTANCE_MERONYM, "Substance meronyms", synset.substance_meronyms()),
|
382 |
+
(MEMBER_HOLONYM, "Member holonyms", synset.member_holonyms()),
|
383 |
+
(MEMBER_MERONYM, "Member meronyms", synset.member_meronyms()),
|
384 |
+
(ATTRIBUTE, "Attributes", synset.attributes()),
|
385 |
+
(ANTONYM, "Antonyms", lemma_property(word, synset, lambda l: l.antonyms())),
|
386 |
+
(
|
387 |
+
DERIVATIONALLY_RELATED_FORM,
|
388 |
+
"Derivationally related form",
|
389 |
+
lemma_property(
|
390 |
+
word, synset, lambda l: l.derivationally_related_forms()
|
391 |
+
),
|
392 |
+
),
|
393 |
+
)
|
394 |
+
elif synset.pos() == wn.VERB:
|
395 |
+
return (
|
396 |
+
(ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())),
|
397 |
+
(HYPONYM, "Hyponym", synset.hyponyms()),
|
398 |
+
(HYPERNYM, "Direct hypernyms", synset.hypernyms()),
|
399 |
+
(
|
400 |
+
INDIRECT_HYPERNYMS,
|
401 |
+
"Indirect hypernyms",
|
402 |
+
rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1],
|
403 |
+
),
|
404 |
+
(ENTAILMENT, "Entailments", synset.entailments()),
|
405 |
+
(CAUSE, "Causes", synset.causes()),
|
406 |
+
(ALSO_SEE, "Also see", synset.also_sees()),
|
407 |
+
(VERB_GROUP, "Verb Groups", synset.verb_groups()),
|
408 |
+
(
|
409 |
+
DERIVATIONALLY_RELATED_FORM,
|
410 |
+
"Derivationally related form",
|
411 |
+
lemma_property(
|
412 |
+
word, synset, lambda l: l.derivationally_related_forms()
|
413 |
+
),
|
414 |
+
),
|
415 |
+
)
|
416 |
+
elif synset.pos() == wn.ADJ or synset.pos == wn.ADJ_SAT:
|
417 |
+
return (
|
418 |
+
(ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())),
|
419 |
+
(SIMILAR, "Similar to", synset.similar_tos()),
|
420 |
+
# Participle of verb - not supported by corpus
|
421 |
+
(
|
422 |
+
PERTAINYM,
|
423 |
+
"Pertainyms",
|
424 |
+
lemma_property(word, synset, lambda l: l.pertainyms()),
|
425 |
+
),
|
426 |
+
(ATTRIBUTE, "Attributes", synset.attributes()),
|
427 |
+
(ALSO_SEE, "Also see", synset.also_sees()),
|
428 |
+
)
|
429 |
+
elif synset.pos() == wn.ADV:
|
430 |
+
# This is weird. adverbs such as 'quick' and 'fast' don't seem
|
431 |
+
# to have antonyms returned by the corpus.a
|
432 |
+
return (
|
433 |
+
(ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())),
|
434 |
+
)
|
435 |
+
# Derived from adjective - not supported by corpus
|
436 |
+
else:
|
437 |
+
raise TypeError("Unhandles synset POS type: " + str(synset.pos()))
|
438 |
+
|
439 |
+
|
440 |
+
html_header = """
|
441 |
+
<!DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
|
442 |
+
'http://www.w3.org/TR/html4/strict.dtd'>
|
443 |
+
<html>
|
444 |
+
<head>
|
445 |
+
<meta name='generator' content=
|
446 |
+
'HTML Tidy for Windows (vers 14 February 2006), see www.w3.org'>
|
447 |
+
<meta http-equiv='Content-Type' content=
|
448 |
+
'text/html; charset=us-ascii'>
|
449 |
+
<title>NLTK Wordnet Browser display of: %s</title></head>
|
450 |
+
<body bgcolor='#F5F5F5' text='#000000'>
|
451 |
+
"""
|
452 |
+
html_trailer = """
|
453 |
+
</body>
|
454 |
+
</html>
|
455 |
+
"""
|
456 |
+
|
457 |
+
explanation = """
|
458 |
+
<h3>Search Help</h3>
|
459 |
+
<ul><li>The display below the line is an example of the output the browser
|
460 |
+
shows you when you enter a search word. The search word was <b>green</b>.</li>
|
461 |
+
<li>The search result shows for different parts of speech the <b>synsets</b>
|
462 |
+
i.e. different meanings for the word.</li>
|
463 |
+
<li>All underlined texts are hypertext links. There are two types of links:
|
464 |
+
word links and others. Clicking a word link carries out a search for the word
|
465 |
+
in the Wordnet database.</li>
|
466 |
+
<li>Clicking a link of the other type opens a display section of data attached
|
467 |
+
to that link. Clicking that link a second time closes the section again.</li>
|
468 |
+
<li>Clicking <u>S:</u> opens a section showing the relations for that synset.
|
469 |
+
</li>
|
470 |
+
<li>Clicking on a relation name opens a section that displays the associated
|
471 |
+
synsets.</li>
|
472 |
+
<li>Type a search word in the <b>Word</b> field and start the search by the
|
473 |
+
<b>Enter/Return</b> key or click the <b>Search</b> button.</li>
|
474 |
+
</ul>
|
475 |
+
<hr width='100%'>
|
476 |
+
"""
|
477 |
+
|
478 |
+
# HTML oriented functions
|
479 |
+
|
480 |
+
|
481 |
+
def _bold(txt):
|
482 |
+
return "<b>%s</b>" % txt
|
483 |
+
|
484 |
+
|
485 |
+
def _center(txt):
|
486 |
+
return "<center>%s</center>" % txt
|
487 |
+
|
488 |
+
|
489 |
+
def _hlev(n, txt):
|
490 |
+
return "<h%d>%s</h%d>" % (n, txt, n)
|
491 |
+
|
492 |
+
|
493 |
+
def _italic(txt):
|
494 |
+
return "<i>%s</i>" % txt
|
495 |
+
|
496 |
+
|
497 |
+
def _li(txt):
|
498 |
+
return "<li>%s</li>" % txt
|
499 |
+
|
500 |
+
|
501 |
+
def pg(word, body):
|
502 |
+
"""
|
503 |
+
Return a HTML page of NLTK Browser format constructed from the
|
504 |
+
word and body
|
505 |
+
|
506 |
+
:param word: The word that the body corresponds to
|
507 |
+
:type word: str
|
508 |
+
:param body: The HTML body corresponding to the word
|
509 |
+
:type body: str
|
510 |
+
:return: a HTML page for the word-body combination
|
511 |
+
:rtype: str
|
512 |
+
"""
|
513 |
+
return (html_header % word) + body + html_trailer
|
514 |
+
|
515 |
+
|
516 |
+
def _ul(txt):
|
517 |
+
return "<ul>" + txt + "</ul>"
|
518 |
+
|
519 |
+
|
520 |
+
def _abbc(txt):
|
521 |
+
"""
|
522 |
+
abbc = asterisks, breaks, bold, center
|
523 |
+
"""
|
524 |
+
return _center(_bold("<br>" * 10 + "*" * 10 + " " + txt + " " + "*" * 10))
|
525 |
+
|
526 |
+
|
527 |
+
full_hyponym_cont_text = _ul(_li(_italic("(has full hyponym continuation)"))) + "\n"
|
528 |
+
|
529 |
+
|
530 |
+
def _get_synset(synset_key):
|
531 |
+
"""
|
532 |
+
The synset key is the unique name of the synset, this can be
|
533 |
+
retrieved via synset.name()
|
534 |
+
"""
|
535 |
+
return wn.synset(synset_key)
|
536 |
+
|
537 |
+
|
538 |
+
def _collect_one_synset(word, synset, synset_relations):
|
539 |
+
"""
|
540 |
+
Returns the HTML string for one synset or word
|
541 |
+
|
542 |
+
:param word: the current word
|
543 |
+
:type word: str
|
544 |
+
:param synset: a synset
|
545 |
+
:type synset: synset
|
546 |
+
:param synset_relations: information about which synset relations
|
547 |
+
to display.
|
548 |
+
:type synset_relations: dict(synset_key, set(relation_id))
|
549 |
+
:return: The HTML string built for this synset
|
550 |
+
:rtype: str
|
551 |
+
"""
|
552 |
+
if isinstance(synset, tuple): # It's a word
|
553 |
+
raise NotImplementedError("word not supported by _collect_one_synset")
|
554 |
+
|
555 |
+
typ = "S"
|
556 |
+
pos_tuple = _pos_match((synset.pos(), None, None))
|
557 |
+
assert pos_tuple is not None, "pos_tuple is null: synset.pos(): %s" % synset.pos()
|
558 |
+
descr = pos_tuple[2]
|
559 |
+
ref = copy.deepcopy(Reference(word, synset_relations))
|
560 |
+
ref.toggle_synset(synset)
|
561 |
+
synset_label = typ + ";"
|
562 |
+
if synset.name() in synset_relations:
|
563 |
+
synset_label = _bold(synset_label)
|
564 |
+
s = f"<li>{make_lookup_link(ref, synset_label)} ({descr}) "
|
565 |
+
|
566 |
+
def format_lemma(w):
|
567 |
+
w = w.replace("_", " ")
|
568 |
+
if w.lower() == word:
|
569 |
+
return _bold(w)
|
570 |
+
else:
|
571 |
+
ref = Reference(w)
|
572 |
+
return make_lookup_link(ref, w)
|
573 |
+
|
574 |
+
s += ", ".join(format_lemma(l.name()) for l in synset.lemmas())
|
575 |
+
|
576 |
+
gl = " ({}) <i>{}</i> ".format(
|
577 |
+
synset.definition(),
|
578 |
+
"; ".join('"%s"' % e for e in synset.examples()),
|
579 |
+
)
|
580 |
+
return s + gl + _synset_relations(word, synset, synset_relations) + "</li>\n"
|
581 |
+
|
582 |
+
|
583 |
+
def _collect_all_synsets(word, pos, synset_relations=dict()):
|
584 |
+
"""
|
585 |
+
Return a HTML unordered list of synsets for the given word and
|
586 |
+
part of speech.
|
587 |
+
"""
|
588 |
+
return "<ul>%s\n</ul>\n" % "".join(
|
589 |
+
_collect_one_synset(word, synset, synset_relations)
|
590 |
+
for synset in wn.synsets(word, pos)
|
591 |
+
)
|
592 |
+
|
593 |
+
|
594 |
+
def _synset_relations(word, synset, synset_relations):
|
595 |
+
"""
|
596 |
+
Builds the HTML string for the relations of a synset
|
597 |
+
|
598 |
+
:param word: The current word
|
599 |
+
:type word: str
|
600 |
+
:param synset: The synset for which we're building the relations.
|
601 |
+
:type synset: Synset
|
602 |
+
:param synset_relations: synset keys and relation types for which to display relations.
|
603 |
+
:type synset_relations: dict(synset_key, set(relation_type))
|
604 |
+
:return: The HTML for a synset's relations
|
605 |
+
:rtype: str
|
606 |
+
"""
|
607 |
+
|
608 |
+
if not synset.name() in synset_relations:
|
609 |
+
return ""
|
610 |
+
ref = Reference(word, synset_relations)
|
611 |
+
|
612 |
+
def relation_html(r):
|
613 |
+
if isinstance(r, Synset):
|
614 |
+
return make_lookup_link(Reference(r.lemma_names()[0]), r.lemma_names()[0])
|
615 |
+
elif isinstance(r, Lemma):
|
616 |
+
return relation_html(r.synset())
|
617 |
+
elif isinstance(r, tuple):
|
618 |
+
# It's probably a tuple containing a Synset and a list of
|
619 |
+
# similar tuples. This forms a tree of synsets.
|
620 |
+
return "{}\n<ul>{}</ul>\n".format(
|
621 |
+
relation_html(r[0]),
|
622 |
+
"".join("<li>%s</li>\n" % relation_html(sr) for sr in r[1]),
|
623 |
+
)
|
624 |
+
else:
|
625 |
+
raise TypeError(
|
626 |
+
"r must be a synset, lemma or list, it was: type(r) = %s, r = %s"
|
627 |
+
% (type(r), r)
|
628 |
+
)
|
629 |
+
|
630 |
+
def make_synset_html(db_name, disp_name, rels):
|
631 |
+
synset_html = "<i>%s</i>\n" % make_lookup_link(
|
632 |
+
copy.deepcopy(ref).toggle_synset_relation(synset, db_name),
|
633 |
+
disp_name,
|
634 |
+
)
|
635 |
+
|
636 |
+
if db_name in ref.synset_relations[synset.name()]:
|
637 |
+
synset_html += "<ul>%s</ul>\n" % "".join(
|
638 |
+
"<li>%s</li>\n" % relation_html(r) for r in rels
|
639 |
+
)
|
640 |
+
|
641 |
+
return synset_html
|
642 |
+
|
643 |
+
html = (
|
644 |
+
"<ul>"
|
645 |
+
+ "\n".join(
|
646 |
+
"<li>%s</li>" % make_synset_html(*rel_data)
|
647 |
+
for rel_data in get_relations_data(word, synset)
|
648 |
+
if rel_data[2] != []
|
649 |
+
)
|
650 |
+
+ "</ul>"
|
651 |
+
)
|
652 |
+
|
653 |
+
return html
|
654 |
+
|
655 |
+
|
656 |
+
class RestrictedUnpickler(pickle.Unpickler):
|
657 |
+
"""
|
658 |
+
Unpickler that prevents any class or function from being used during loading.
|
659 |
+
"""
|
660 |
+
|
661 |
+
def find_class(self, module, name):
|
662 |
+
# Forbid every function
|
663 |
+
raise pickle.UnpicklingError(f"global '{module}.{name}' is forbidden")
|
664 |
+
|
665 |
+
|
666 |
+
class Reference:
|
667 |
+
"""
|
668 |
+
A reference to a page that may be generated by page_word
|
669 |
+
"""
|
670 |
+
|
671 |
+
def __init__(self, word, synset_relations=dict()):
|
672 |
+
"""
|
673 |
+
Build a reference to a new page.
|
674 |
+
|
675 |
+
word is the word or words (separated by commas) for which to
|
676 |
+
search for synsets of
|
677 |
+
|
678 |
+
synset_relations is a dictionary of synset keys to sets of
|
679 |
+
synset relation identifaiers to unfold a list of synset
|
680 |
+
relations for.
|
681 |
+
"""
|
682 |
+
self.word = word
|
683 |
+
self.synset_relations = synset_relations
|
684 |
+
|
685 |
+
def encode(self):
|
686 |
+
"""
|
687 |
+
Encode this reference into a string to be used in a URL.
|
688 |
+
"""
|
689 |
+
# This uses a tuple rather than an object since the python
|
690 |
+
# pickle representation is much smaller and there is no need
|
691 |
+
# to represent the complete object.
|
692 |
+
string = pickle.dumps((self.word, self.synset_relations), -1)
|
693 |
+
return base64.urlsafe_b64encode(string).decode()
|
694 |
+
|
695 |
+
@staticmethod
|
696 |
+
def decode(string):
|
697 |
+
"""
|
698 |
+
Decode a reference encoded with Reference.encode
|
699 |
+
"""
|
700 |
+
string = base64.urlsafe_b64decode(string.encode())
|
701 |
+
word, synset_relations = RestrictedUnpickler(io.BytesIO(string)).load()
|
702 |
+
return Reference(word, synset_relations)
|
703 |
+
|
704 |
+
def toggle_synset_relation(self, synset, relation):
|
705 |
+
"""
|
706 |
+
Toggle the display of the relations for the given synset and
|
707 |
+
relation type.
|
708 |
+
|
709 |
+
This function will throw a KeyError if the synset is currently
|
710 |
+
not being displayed.
|
711 |
+
"""
|
712 |
+
if relation in self.synset_relations[synset.name()]:
|
713 |
+
self.synset_relations[synset.name()].remove(relation)
|
714 |
+
else:
|
715 |
+
self.synset_relations[synset.name()].add(relation)
|
716 |
+
|
717 |
+
return self
|
718 |
+
|
719 |
+
def toggle_synset(self, synset):
|
720 |
+
"""
|
721 |
+
Toggle displaying of the relation types for the given synset
|
722 |
+
"""
|
723 |
+
if synset.name() in self.synset_relations:
|
724 |
+
del self.synset_relations[synset.name()]
|
725 |
+
else:
|
726 |
+
self.synset_relations[synset.name()] = set()
|
727 |
+
|
728 |
+
return self
|
729 |
+
|
730 |
+
|
731 |
+
def make_lookup_link(ref, label):
|
732 |
+
return f'<a href="lookup_{ref.encode()}">{label}</a>'
|
733 |
+
|
734 |
+
|
735 |
+
def page_from_word(word):
|
736 |
+
"""
|
737 |
+
Return a HTML page for the given word.
|
738 |
+
|
739 |
+
:type word: str
|
740 |
+
:param word: The currently active word
|
741 |
+
:return: A tuple (page,word), where page is the new current HTML page
|
742 |
+
to be sent to the browser and
|
743 |
+
word is the new current word
|
744 |
+
:rtype: A tuple (str,str)
|
745 |
+
"""
|
746 |
+
return page_from_reference(Reference(word))
|
747 |
+
|
748 |
+
|
749 |
+
def page_from_href(href):
|
750 |
+
"""
|
751 |
+
Returns a tuple of the HTML page built and the new current word
|
752 |
+
|
753 |
+
:param href: The hypertext reference to be solved
|
754 |
+
:type href: str
|
755 |
+
:return: A tuple (page,word), where page is the new current HTML page
|
756 |
+
to be sent to the browser and
|
757 |
+
word is the new current word
|
758 |
+
:rtype: A tuple (str,str)
|
759 |
+
"""
|
760 |
+
return page_from_reference(Reference.decode(href))
|
761 |
+
|
762 |
+
|
763 |
+
def page_from_reference(href):
|
764 |
+
"""
|
765 |
+
Returns a tuple of the HTML page built and the new current word
|
766 |
+
|
767 |
+
:param href: The hypertext reference to be solved
|
768 |
+
:type href: str
|
769 |
+
:return: A tuple (page,word), where page is the new current HTML page
|
770 |
+
to be sent to the browser and
|
771 |
+
word is the new current word
|
772 |
+
:rtype: A tuple (str,str)
|
773 |
+
"""
|
774 |
+
word = href.word
|
775 |
+
pos_forms = defaultdict(list)
|
776 |
+
words = word.split(",")
|
777 |
+
words = [w for w in [w.strip().lower().replace(" ", "_") for w in words] if w != ""]
|
778 |
+
if len(words) == 0:
|
779 |
+
# No words were found.
|
780 |
+
return "", "Please specify a word to search for."
|
781 |
+
|
782 |
+
# This looks up multiple words at once. This is probably not
|
783 |
+
# necessary and may lead to problems.
|
784 |
+
for w in words:
|
785 |
+
for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADV]:
|
786 |
+
form = wn.morphy(w, pos)
|
787 |
+
if form and form not in pos_forms[pos]:
|
788 |
+
pos_forms[pos].append(form)
|
789 |
+
body = ""
|
790 |
+
for pos, pos_str, name in _pos_tuples():
|
791 |
+
if pos in pos_forms:
|
792 |
+
body += _hlev(3, name) + "\n"
|
793 |
+
for w in pos_forms[pos]:
|
794 |
+
# Not all words of exc files are in the database, skip
|
795 |
+
# to the next word if a KeyError is raised.
|
796 |
+
try:
|
797 |
+
body += _collect_all_synsets(w, pos, href.synset_relations)
|
798 |
+
except KeyError:
|
799 |
+
pass
|
800 |
+
if not body:
|
801 |
+
body = "The word or words '%s' were not found in the dictionary." % word
|
802 |
+
return body, word
|
803 |
+
|
804 |
+
|
805 |
+
#####################################################################
|
806 |
+
# Static pages
|
807 |
+
#####################################################################
|
808 |
+
|
809 |
+
|
810 |
+
def get_static_page_by_path(path):
|
811 |
+
"""
|
812 |
+
Return a static HTML page from the path given.
|
813 |
+
"""
|
814 |
+
if path == "index_2.html":
|
815 |
+
return get_static_index_page(False)
|
816 |
+
elif path == "index.html":
|
817 |
+
return get_static_index_page(True)
|
818 |
+
elif path == "NLTK Wordnet Browser Database Info.html":
|
819 |
+
return "Display of Wordnet Database Statistics is not supported"
|
820 |
+
elif path == "upper_2.html":
|
821 |
+
return get_static_upper_page(False)
|
822 |
+
elif path == "upper.html":
|
823 |
+
return get_static_upper_page(True)
|
824 |
+
elif path == "web_help.html":
|
825 |
+
return get_static_web_help_page()
|
826 |
+
elif path == "wx_help.html":
|
827 |
+
return get_static_wx_help_page()
|
828 |
+
raise FileNotFoundError()
|
829 |
+
|
830 |
+
|
831 |
+
def get_static_web_help_page():
|
832 |
+
"""
|
833 |
+
Return the static web help page.
|
834 |
+
"""
|
835 |
+
return """
|
836 |
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
837 |
+
<html>
|
838 |
+
<!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
|
839 |
+
Copyright (C) 2001-2023 NLTK Project
|
840 |
+
Author: Jussi Salmela <[email protected]>
|
841 |
+
URL: <https://www.nltk.org/>
|
842 |
+
For license information, see LICENSE.TXT -->
|
843 |
+
<head>
|
844 |
+
<meta http-equiv='Content-Type' content='text/html; charset=us-ascii'>
|
845 |
+
<title>NLTK Wordnet Browser display of: * Help *</title>
|
846 |
+
</head>
|
847 |
+
<body bgcolor='#F5F5F5' text='#000000'>
|
848 |
+
<h2>NLTK Wordnet Browser Help</h2>
|
849 |
+
<p>The NLTK Wordnet Browser is a tool to use in browsing the Wordnet database. It tries to behave like the Wordnet project's web browser but the difference is that the NLTK Wordnet Browser uses a local Wordnet database.
|
850 |
+
<p><b>You are using the Javascript client part of the NLTK Wordnet BrowseServer.</b> We assume your browser is in tab sheets enabled mode.</p>
|
851 |
+
<p>For background information on Wordnet, see the Wordnet project home page: <a href="https://wordnet.princeton.edu/"><b> https://wordnet.princeton.edu/</b></a>. For more information on the NLTK project, see the project home:
|
852 |
+
<a href="https://www.nltk.org/"><b>https://www.nltk.org/</b></a>. To get an idea of what the Wordnet version used by this browser includes choose <b>Show Database Info</b> from the <b>View</b> submenu.</p>
|
853 |
+
<h3>Word search</h3>
|
854 |
+
<p>The word to be searched is typed into the <b>New Word</b> field and the search started with Enter or by clicking the <b>Search</b> button. There is no uppercase/lowercase distinction: the search word is transformed to lowercase before the search.</p>
|
855 |
+
<p>In addition, the word does not have to be in base form. The browser tries to find the possible base form(s) by making certain morphological substitutions. Typing <b>fLIeS</b> as an obscure example gives one <a href="MfLIeS">this</a>. Click the previous link to see what this kind of search looks like and then come back to this page by using the <b>Alt+LeftArrow</b> key combination.</p>
|
856 |
+
<p>The result of a search is a display of one or more
|
857 |
+
<b>synsets</b> for every part of speech in which a form of the
|
858 |
+
search word was found to occur. A synset is a set of words
|
859 |
+
having the same sense or meaning. Each word in a synset that is
|
860 |
+
underlined is a hyperlink which can be clicked to trigger an
|
861 |
+
automatic search for that word.</p>
|
862 |
+
<p>Every synset has a hyperlink <b>S:</b> at the start of its
|
863 |
+
display line. Clicking that symbol shows you the name of every
|
864 |
+
<b>relation</b> that this synset is part of. Every relation name is a hyperlink that opens up a display for that relation. Clicking it another time closes the display again. Clicking another relation name on a line that has an opened relation closes the open relation and opens the clicked relation.</p>
|
865 |
+
<p>It is also possible to give two or more words or collocations to be searched at the same time separating them with a comma like this <a href="Mcheer up,clear up">cheer up,clear up</a>, for example. Click the previous link to see what this kind of search looks like and then come back to this page by using the <b>Alt+LeftArrow</b> key combination. As you could see the search result includes the synsets found in the same order than the forms were given in the search field.</p>
|
866 |
+
<p>
|
867 |
+
There are also word level (lexical) relations recorded in the Wordnet database. Opening this kind of relation displays lines with a hyperlink <b>W:</b> at their beginning. Clicking this link shows more info on the word in question.</p>
|
868 |
+
<h3>The Buttons</h3>
|
869 |
+
<p>The <b>Search</b> and <b>Help</b> buttons need no more explanation. </p>
|
870 |
+
<p>The <b>Show Database Info</b> button shows a collection of Wordnet database statistics.</p>
|
871 |
+
<p>The <b>Shutdown the Server</b> button is shown for the first client of the BrowServer program i.e. for the client that is automatically launched when the BrowServer is started but not for the succeeding clients in order to protect the server from accidental shutdowns.
|
872 |
+
</p></body>
|
873 |
+
</html>
|
874 |
+
"""
|
875 |
+
|
876 |
+
|
877 |
+
def get_static_welcome_message():
|
878 |
+
"""
|
879 |
+
Get the static welcome page.
|
880 |
+
"""
|
881 |
+
return """
|
882 |
+
<h3>Search Help</h3>
|
883 |
+
<ul><li>The display below the line is an example of the output the browser
|
884 |
+
shows you when you enter a search word. The search word was <b>green</b>.</li>
|
885 |
+
<li>The search result shows for different parts of speech the <b>synsets</b>
|
886 |
+
i.e. different meanings for the word.</li>
|
887 |
+
<li>All underlined texts are hypertext links. There are two types of links:
|
888 |
+
word links and others. Clicking a word link carries out a search for the word
|
889 |
+
in the Wordnet database.</li>
|
890 |
+
<li>Clicking a link of the other type opens a display section of data attached
|
891 |
+
to that link. Clicking that link a second time closes the section again.</li>
|
892 |
+
<li>Clicking <u>S:</u> opens a section showing the relations for that synset.</li>
|
893 |
+
<li>Clicking on a relation name opens a section that displays the associated
|
894 |
+
synsets.</li>
|
895 |
+
<li>Type a search word in the <b>Next Word</b> field and start the search by the
|
896 |
+
<b>Enter/Return</b> key or click the <b>Search</b> button.</li>
|
897 |
+
</ul>
|
898 |
+
"""
|
899 |
+
|
900 |
+
|
901 |
+
def get_static_index_page(with_shutdown):
|
902 |
+
"""
|
903 |
+
Get the static index page.
|
904 |
+
"""
|
905 |
+
template = """
|
906 |
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">
|
907 |
+
<HTML>
|
908 |
+
<!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
|
909 |
+
Copyright (C) 2001-2023 NLTK Project
|
910 |
+
Author: Jussi Salmela <[email protected]>
|
911 |
+
URL: <https://www.nltk.org/>
|
912 |
+
For license information, see LICENSE.TXT -->
|
913 |
+
<HEAD>
|
914 |
+
<TITLE>NLTK Wordnet Browser</TITLE>
|
915 |
+
</HEAD>
|
916 |
+
|
917 |
+
<frameset rows="7%%,93%%">
|
918 |
+
<frame src="%s" name="header">
|
919 |
+
<frame src="start_page" name="body">
|
920 |
+
</frameset>
|
921 |
+
</HTML>
|
922 |
+
"""
|
923 |
+
if with_shutdown:
|
924 |
+
upper_link = "upper.html"
|
925 |
+
else:
|
926 |
+
upper_link = "upper_2.html"
|
927 |
+
|
928 |
+
return template % upper_link
|
929 |
+
|
930 |
+
|
931 |
+
def get_static_upper_page(with_shutdown):
|
932 |
+
"""
|
933 |
+
Return the upper frame page,
|
934 |
+
|
935 |
+
If with_shutdown is True then a 'shutdown' button is also provided
|
936 |
+
to shutdown the server.
|
937 |
+
"""
|
938 |
+
template = """
|
939 |
+
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
|
940 |
+
<html>
|
941 |
+
<!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
|
942 |
+
Copyright (C) 2001-2023 NLTK Project
|
943 |
+
Author: Jussi Salmela <[email protected]>
|
944 |
+
URL: <https://www.nltk.org/>
|
945 |
+
For license information, see LICENSE.TXT -->
|
946 |
+
<head>
|
947 |
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
948 |
+
<title>Untitled Document</title>
|
949 |
+
</head>
|
950 |
+
<body>
|
951 |
+
<form method="GET" action="search" target="body">
|
952 |
+
Current Word: <input type="text" id="currentWord" size="10" disabled>
|
953 |
+
Next Word: <input type="text" id="nextWord" name="nextWord" size="10">
|
954 |
+
<input name="searchButton" type="submit" value="Search">
|
955 |
+
</form>
|
956 |
+
<a target="body" href="web_help.html">Help</a>
|
957 |
+
%s
|
958 |
+
|
959 |
+
</body>
|
960 |
+
</html>
|
961 |
+
"""
|
962 |
+
if with_shutdown:
|
963 |
+
shutdown_link = '<a href="SHUTDOWN THE SERVER">Shutdown</a>'
|
964 |
+
else:
|
965 |
+
shutdown_link = ""
|
966 |
+
|
967 |
+
return template % shutdown_link
|
968 |
+
|
969 |
+
|
970 |
+
def usage():
|
971 |
+
"""
|
972 |
+
Display the command line help message.
|
973 |
+
"""
|
974 |
+
print(__doc__)
|
975 |
+
|
976 |
+
|
977 |
+
def app():
|
978 |
+
# Parse and interpret options.
|
979 |
+
(opts, _) = getopt.getopt(
|
980 |
+
argv[1:], "l:p:sh", ["logfile=", "port=", "server-mode", "help"]
|
981 |
+
)
|
982 |
+
port = 8000
|
983 |
+
server_mode = False
|
984 |
+
help_mode = False
|
985 |
+
logfilename = None
|
986 |
+
for (opt, value) in opts:
|
987 |
+
if (opt == "-l") or (opt == "--logfile"):
|
988 |
+
logfilename = str(value)
|
989 |
+
elif (opt == "-p") or (opt == "--port"):
|
990 |
+
port = int(value)
|
991 |
+
elif (opt == "-s") or (opt == "--server-mode"):
|
992 |
+
server_mode = True
|
993 |
+
elif (opt == "-h") or (opt == "--help"):
|
994 |
+
help_mode = True
|
995 |
+
|
996 |
+
if help_mode:
|
997 |
+
usage()
|
998 |
+
else:
|
999 |
+
wnb(port, not server_mode, logfilename)
|
1000 |
+
|
1001 |
+
|
1002 |
+
if __name__ == "__main__":
|
1003 |
+
app()
|
1004 |
+
|
1005 |
+
__all__ = ["app"]
|
pipeline/nltk/book.py
ADDED
@@ -0,0 +1,213 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Some texts for exploration in chapter 1 of the book
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Steven Bird <[email protected]>
|
5 |
+
#
|
6 |
+
# URL: <https://www.nltk.org/>
|
7 |
+
# For license information, see LICENSE.TXT
|
8 |
+
|
9 |
+
from nltk.corpus import (
|
10 |
+
genesis,
|
11 |
+
gutenberg,
|
12 |
+
inaugural,
|
13 |
+
nps_chat,
|
14 |
+
treebank,
|
15 |
+
webtext,
|
16 |
+
wordnet,
|
17 |
+
)
|
18 |
+
from nltk.probability import FreqDist
|
19 |
+
from nltk.text import Text
|
20 |
+
from nltk.util import bigrams
|
21 |
+
|
22 |
+
print("*** Introductory Examples for the NLTK Book ***")
|
23 |
+
print("Loading text1, ..., text9 and sent1, ..., sent9")
|
24 |
+
print("Type the name of the text or sentence to view it.")
|
25 |
+
print("Type: 'texts()' or 'sents()' to list the materials.")
|
26 |
+
|
27 |
+
text1 = Text(gutenberg.words("melville-moby_dick.txt"))
|
28 |
+
print("text1:", text1.name)
|
29 |
+
|
30 |
+
text2 = Text(gutenberg.words("austen-sense.txt"))
|
31 |
+
print("text2:", text2.name)
|
32 |
+
|
33 |
+
text3 = Text(genesis.words("english-kjv.txt"), name="The Book of Genesis")
|
34 |
+
print("text3:", text3.name)
|
35 |
+
|
36 |
+
text4 = Text(inaugural.words(), name="Inaugural Address Corpus")
|
37 |
+
print("text4:", text4.name)
|
38 |
+
|
39 |
+
text5 = Text(nps_chat.words(), name="Chat Corpus")
|
40 |
+
print("text5:", text5.name)
|
41 |
+
|
42 |
+
text6 = Text(webtext.words("grail.txt"), name="Monty Python and the Holy Grail")
|
43 |
+
print("text6:", text6.name)
|
44 |
+
|
45 |
+
text7 = Text(treebank.words(), name="Wall Street Journal")
|
46 |
+
print("text7:", text7.name)
|
47 |
+
|
48 |
+
text8 = Text(webtext.words("singles.txt"), name="Personals Corpus")
|
49 |
+
print("text8:", text8.name)
|
50 |
+
|
51 |
+
text9 = Text(gutenberg.words("chesterton-thursday.txt"))
|
52 |
+
print("text9:", text9.name)
|
53 |
+
|
54 |
+
|
55 |
+
def texts():
|
56 |
+
print("text1:", text1.name)
|
57 |
+
print("text2:", text2.name)
|
58 |
+
print("text3:", text3.name)
|
59 |
+
print("text4:", text4.name)
|
60 |
+
print("text5:", text5.name)
|
61 |
+
print("text6:", text6.name)
|
62 |
+
print("text7:", text7.name)
|
63 |
+
print("text8:", text8.name)
|
64 |
+
print("text9:", text9.name)
|
65 |
+
|
66 |
+
|
67 |
+
sent1 = ["Call", "me", "Ishmael", "."]
|
68 |
+
sent2 = [
|
69 |
+
"The",
|
70 |
+
"family",
|
71 |
+
"of",
|
72 |
+
"Dashwood",
|
73 |
+
"had",
|
74 |
+
"long",
|
75 |
+
"been",
|
76 |
+
"settled",
|
77 |
+
"in",
|
78 |
+
"Sussex",
|
79 |
+
".",
|
80 |
+
]
|
81 |
+
sent3 = [
|
82 |
+
"In",
|
83 |
+
"the",
|
84 |
+
"beginning",
|
85 |
+
"God",
|
86 |
+
"created",
|
87 |
+
"the",
|
88 |
+
"heaven",
|
89 |
+
"and",
|
90 |
+
"the",
|
91 |
+
"earth",
|
92 |
+
".",
|
93 |
+
]
|
94 |
+
sent4 = [
|
95 |
+
"Fellow",
|
96 |
+
"-",
|
97 |
+
"Citizens",
|
98 |
+
"of",
|
99 |
+
"the",
|
100 |
+
"Senate",
|
101 |
+
"and",
|
102 |
+
"of",
|
103 |
+
"the",
|
104 |
+
"House",
|
105 |
+
"of",
|
106 |
+
"Representatives",
|
107 |
+
":",
|
108 |
+
]
|
109 |
+
sent5 = [
|
110 |
+
"I",
|
111 |
+
"have",
|
112 |
+
"a",
|
113 |
+
"problem",
|
114 |
+
"with",
|
115 |
+
"people",
|
116 |
+
"PMing",
|
117 |
+
"me",
|
118 |
+
"to",
|
119 |
+
"lol",
|
120 |
+
"JOIN",
|
121 |
+
]
|
122 |
+
sent6 = [
|
123 |
+
"SCENE",
|
124 |
+
"1",
|
125 |
+
":",
|
126 |
+
"[",
|
127 |
+
"wind",
|
128 |
+
"]",
|
129 |
+
"[",
|
130 |
+
"clop",
|
131 |
+
"clop",
|
132 |
+
"clop",
|
133 |
+
"]",
|
134 |
+
"KING",
|
135 |
+
"ARTHUR",
|
136 |
+
":",
|
137 |
+
"Whoa",
|
138 |
+
"there",
|
139 |
+
"!",
|
140 |
+
]
|
141 |
+
sent7 = [
|
142 |
+
"Pierre",
|
143 |
+
"Vinken",
|
144 |
+
",",
|
145 |
+
"61",
|
146 |
+
"years",
|
147 |
+
"old",
|
148 |
+
",",
|
149 |
+
"will",
|
150 |
+
"join",
|
151 |
+
"the",
|
152 |
+
"board",
|
153 |
+
"as",
|
154 |
+
"a",
|
155 |
+
"nonexecutive",
|
156 |
+
"director",
|
157 |
+
"Nov.",
|
158 |
+
"29",
|
159 |
+
".",
|
160 |
+
]
|
161 |
+
sent8 = [
|
162 |
+
"25",
|
163 |
+
"SEXY",
|
164 |
+
"MALE",
|
165 |
+
",",
|
166 |
+
"seeks",
|
167 |
+
"attrac",
|
168 |
+
"older",
|
169 |
+
"single",
|
170 |
+
"lady",
|
171 |
+
",",
|
172 |
+
"for",
|
173 |
+
"discreet",
|
174 |
+
"encounters",
|
175 |
+
".",
|
176 |
+
]
|
177 |
+
sent9 = [
|
178 |
+
"THE",
|
179 |
+
"suburb",
|
180 |
+
"of",
|
181 |
+
"Saffron",
|
182 |
+
"Park",
|
183 |
+
"lay",
|
184 |
+
"on",
|
185 |
+
"the",
|
186 |
+
"sunset",
|
187 |
+
"side",
|
188 |
+
"of",
|
189 |
+
"London",
|
190 |
+
",",
|
191 |
+
"as",
|
192 |
+
"red",
|
193 |
+
"and",
|
194 |
+
"ragged",
|
195 |
+
"as",
|
196 |
+
"a",
|
197 |
+
"cloud",
|
198 |
+
"of",
|
199 |
+
"sunset",
|
200 |
+
".",
|
201 |
+
]
|
202 |
+
|
203 |
+
|
204 |
+
def sents():
|
205 |
+
print("sent1:", " ".join(sent1))
|
206 |
+
print("sent2:", " ".join(sent2))
|
207 |
+
print("sent3:", " ".join(sent3))
|
208 |
+
print("sent4:", " ".join(sent4))
|
209 |
+
print("sent5:", " ".join(sent5))
|
210 |
+
print("sent6:", " ".join(sent6))
|
211 |
+
print("sent7:", " ".join(sent7))
|
212 |
+
print("sent8:", " ".join(sent8))
|
213 |
+
print("sent9:", " ".join(sent9))
|
pipeline/nltk/ccg/__init__.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Natural Language Toolkit: Combinatory Categorial Grammar
|
2 |
+
#
|
3 |
+
# Copyright (C) 2001-2023 NLTK Project
|
4 |
+
# Author: Graeme Gange <[email protected]>
|
5 |
+
# URL: <https://www.nltk.org/>
|
6 |
+
# For license information, see LICENSE.TXT
|
7 |
+
|
8 |
+
"""
|
9 |
+
Combinatory Categorial Grammar.
|
10 |
+
|
11 |
+
For more information see nltk/doc/contrib/ccg/ccg.pdf
|
12 |
+
"""
|
13 |
+
|
14 |
+
from nltk.ccg.chart import CCGChart, CCGChartParser, CCGEdge, CCGLeafEdge
|
15 |
+
from nltk.ccg.combinator import (
|
16 |
+
BackwardApplication,
|
17 |
+
BackwardBx,
|
18 |
+
BackwardCombinator,
|
19 |
+
BackwardComposition,
|
20 |
+
BackwardSx,
|
21 |
+
BackwardT,
|
22 |
+
DirectedBinaryCombinator,
|
23 |
+
ForwardApplication,
|
24 |
+
ForwardCombinator,
|
25 |
+
ForwardComposition,
|
26 |
+
ForwardSubstitution,
|
27 |
+
ForwardT,
|
28 |
+
UndirectedBinaryCombinator,
|
29 |
+
UndirectedComposition,
|
30 |
+
UndirectedFunctionApplication,
|
31 |
+
UndirectedSubstitution,
|
32 |
+
UndirectedTypeRaise,
|
33 |
+
)
|
34 |
+
from nltk.ccg.lexicon import CCGLexicon
|
pipeline/nltk/ccg/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (963 Bytes). View file
|
|
pipeline/nltk/ccg/__pycache__/api.cpython-39.pyc
ADDED
Binary file (11.9 kB). View file
|
|