sunnychenxiwang commited on
Commit
d916065
1 Parent(s): 24c4def

update nltk

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. pipeline/nltk/VERSION +1 -0
  2. pipeline/nltk/__init__.py +209 -0
  3. pipeline/nltk/__pycache__/__init__.cpython-39.pyc +0 -0
  4. pipeline/nltk/__pycache__/book.cpython-39.pyc +0 -0
  5. pipeline/nltk/__pycache__/cli.cpython-39.pyc +0 -0
  6. pipeline/nltk/__pycache__/collections.cpython-39.pyc +0 -0
  7. pipeline/nltk/__pycache__/collocations.cpython-39.pyc +0 -0
  8. pipeline/nltk/__pycache__/compat.cpython-39.pyc +0 -0
  9. pipeline/nltk/__pycache__/data.cpython-39.pyc +0 -0
  10. pipeline/nltk/__pycache__/decorators.cpython-39.pyc +0 -0
  11. pipeline/nltk/__pycache__/downloader.cpython-39.pyc +0 -0
  12. pipeline/nltk/__pycache__/featstruct.cpython-39.pyc +0 -0
  13. pipeline/nltk/__pycache__/grammar.cpython-39.pyc +0 -0
  14. pipeline/nltk/__pycache__/help.cpython-39.pyc +0 -0
  15. pipeline/nltk/__pycache__/internals.cpython-39.pyc +0 -0
  16. pipeline/nltk/__pycache__/jsontags.cpython-39.pyc +0 -0
  17. pipeline/nltk/__pycache__/langnames.cpython-39.pyc +0 -0
  18. pipeline/nltk/__pycache__/lazyimport.cpython-39.pyc +0 -0
  19. pipeline/nltk/__pycache__/probability.cpython-39.pyc +0 -0
  20. pipeline/nltk/__pycache__/text.cpython-39.pyc +0 -0
  21. pipeline/nltk/__pycache__/tgrep.cpython-39.pyc +0 -0
  22. pipeline/nltk/__pycache__/toolbox.cpython-39.pyc +0 -0
  23. pipeline/nltk/__pycache__/treeprettyprinter.cpython-39.pyc +0 -0
  24. pipeline/nltk/__pycache__/treetransforms.cpython-39.pyc +0 -0
  25. pipeline/nltk/__pycache__/util.cpython-39.pyc +0 -0
  26. pipeline/nltk/__pycache__/wsd.cpython-39.pyc +0 -0
  27. pipeline/nltk/app/__init__.py +47 -0
  28. pipeline/nltk/app/__pycache__/__init__.cpython-39.pyc +0 -0
  29. pipeline/nltk/app/__pycache__/chartparser_app.cpython-39.pyc +0 -0
  30. pipeline/nltk/app/__pycache__/chunkparser_app.cpython-39.pyc +0 -0
  31. pipeline/nltk/app/__pycache__/collocations_app.cpython-39.pyc +0 -0
  32. pipeline/nltk/app/__pycache__/concordance_app.cpython-39.pyc +0 -0
  33. pipeline/nltk/app/__pycache__/nemo_app.cpython-39.pyc +0 -0
  34. pipeline/nltk/app/__pycache__/rdparser_app.cpython-39.pyc +0 -0
  35. pipeline/nltk/app/__pycache__/srparser_app.cpython-39.pyc +0 -0
  36. pipeline/nltk/app/__pycache__/wordfreq_app.cpython-39.pyc +0 -0
  37. pipeline/nltk/app/__pycache__/wordnet_app.cpython-39.pyc +0 -0
  38. pipeline/nltk/app/chartparser_app.py +2569 -0
  39. pipeline/nltk/app/chunkparser_app.py +1500 -0
  40. pipeline/nltk/app/collocations_app.py +438 -0
  41. pipeline/nltk/app/concordance_app.py +709 -0
  42. pipeline/nltk/app/nemo_app.py +163 -0
  43. pipeline/nltk/app/rdparser_app.py +1052 -0
  44. pipeline/nltk/app/srparser_app.py +937 -0
  45. pipeline/nltk/app/wordfreq_app.py +36 -0
  46. pipeline/nltk/app/wordnet_app.py +1005 -0
  47. pipeline/nltk/book.py +213 -0
  48. pipeline/nltk/ccg/__init__.py +34 -0
  49. pipeline/nltk/ccg/__pycache__/__init__.cpython-39.pyc +0 -0
  50. pipeline/nltk/ccg/__pycache__/api.cpython-39.pyc +0 -0
pipeline/nltk/VERSION ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.8.1
pipeline/nltk/__init__.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit (NLTK)
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Authors: Steven Bird <[email protected]>
5
+ # Edward Loper <[email protected]>
6
+ # URL: <https://www.nltk.org/>
7
+ # For license information, see LICENSE.TXT
8
+
9
+ """
10
+ The Natural Language Toolkit (NLTK) is an open source Python library
11
+ for Natural Language Processing. A free online book is available.
12
+ (If you use the library for academic research, please cite the book.)
13
+
14
+ Steven Bird, Ewan Klein, and Edward Loper (2009).
15
+ Natural Language Processing with Python. O'Reilly Media Inc.
16
+ https://www.nltk.org/book/
17
+
18
+ isort:skip_file
19
+ """
20
+
21
+ import os
22
+
23
+ # //////////////////////////////////////////////////////
24
+ # Metadata
25
+ # //////////////////////////////////////////////////////
26
+
27
+ # Version. For each new release, the version number should be updated
28
+ # in the file VERSION.
29
+ try:
30
+ # If a VERSION file exists, use it!
31
+ version_file = os.path.join(os.path.dirname(__file__), "VERSION")
32
+ with open(version_file) as infile:
33
+ __version__ = infile.read().strip()
34
+ except NameError:
35
+ __version__ = "unknown (running code interactively?)"
36
+ except OSError as ex:
37
+ __version__ = "unknown (%s)" % ex
38
+
39
+ if __doc__ is not None: # fix for the ``python -OO``
40
+ __doc__ += "\n@version: " + __version__
41
+
42
+
43
+ # Copyright notice
44
+ __copyright__ = """\
45
+ Copyright (C) 2001-2023 NLTK Project.
46
+
47
+ Distributed and Licensed under the Apache License, Version 2.0,
48
+ which is included by reference.
49
+ """
50
+
51
+ __license__ = "Apache License, Version 2.0"
52
+ # Description of the toolkit, keywords, and the project's primary URL.
53
+ __longdescr__ = """\
54
+ The Natural Language Toolkit (NLTK) is a Python package for
55
+ natural language processing. NLTK requires Python 3.7, 3.8, 3.9, 3.10 or 3.11."""
56
+ __keywords__ = [
57
+ "NLP",
58
+ "CL",
59
+ "natural language processing",
60
+ "computational linguistics",
61
+ "parsing",
62
+ "tagging",
63
+ "tokenizing",
64
+ "syntax",
65
+ "linguistics",
66
+ "language",
67
+ "natural language",
68
+ "text analytics",
69
+ ]
70
+ __url__ = "https://www.nltk.org/"
71
+
72
+ # Maintainer, contributors, etc.
73
+ __maintainer__ = "NLTK Team"
74
+ __maintainer_email__ = "[email protected]"
75
+ __author__ = __maintainer__
76
+ __author_email__ = __maintainer_email__
77
+
78
+ # "Trove" classifiers for Python Package Index.
79
+ __classifiers__ = [
80
+ "Development Status :: 5 - Production/Stable",
81
+ "Intended Audience :: Developers",
82
+ "Intended Audience :: Education",
83
+ "Intended Audience :: Information Technology",
84
+ "Intended Audience :: Science/Research",
85
+ "License :: OSI Approved :: Apache Software License",
86
+ "Operating System :: OS Independent",
87
+ "Programming Language :: Python :: 3.7",
88
+ "Programming Language :: Python :: 3.8",
89
+ "Programming Language :: Python :: 3.9",
90
+ "Programming Language :: Python :: 3.10",
91
+ "Programming Language :: Python :: 3.11",
92
+ "Topic :: Scientific/Engineering",
93
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
94
+ "Topic :: Scientific/Engineering :: Human Machine Interfaces",
95
+ "Topic :: Scientific/Engineering :: Information Analysis",
96
+ "Topic :: Text Processing",
97
+ "Topic :: Text Processing :: Filters",
98
+ "Topic :: Text Processing :: General",
99
+ "Topic :: Text Processing :: Indexing",
100
+ "Topic :: Text Processing :: Linguistic",
101
+ ]
102
+
103
+ from nltk.internals import config_java
104
+
105
+ # support numpy from pypy
106
+ try:
107
+ import numpypy
108
+ except ImportError:
109
+ pass
110
+
111
+ # Override missing methods on environments where it cannot be used like GAE.
112
+ import subprocess
113
+
114
+ if not hasattr(subprocess, "PIPE"):
115
+
116
+ def _fake_PIPE(*args, **kwargs):
117
+ raise NotImplementedError("subprocess.PIPE is not supported.")
118
+
119
+ subprocess.PIPE = _fake_PIPE
120
+ if not hasattr(subprocess, "Popen"):
121
+
122
+ def _fake_Popen(*args, **kwargs):
123
+ raise NotImplementedError("subprocess.Popen is not supported.")
124
+
125
+ subprocess.Popen = _fake_Popen
126
+
127
+ ###########################################################
128
+ # TOP-LEVEL MODULES
129
+ ###########################################################
130
+
131
+ # Import top-level functionality into top-level namespace
132
+
133
+ from nltk.collocations import *
134
+ from nltk.decorators import decorator, memoize
135
+ from nltk.featstruct import *
136
+ from nltk.grammar import *
137
+ from nltk.probability import *
138
+ from nltk.text import *
139
+ from nltk.util import *
140
+ from nltk.jsontags import *
141
+
142
+ ###########################################################
143
+ # PACKAGES
144
+ ###########################################################
145
+
146
+ from nltk.chunk import *
147
+ from nltk.classify import *
148
+ from nltk.inference import *
149
+ from nltk.metrics import *
150
+ from nltk.parse import *
151
+ from nltk.tag import *
152
+ from nltk.tokenize import *
153
+ from nltk.translate import *
154
+ from nltk.tree import *
155
+ from nltk.sem import *
156
+ from nltk.stem import *
157
+
158
+ # Packages which can be lazily imported
159
+ # (a) we don't import *
160
+ # (b) they're slow to import or have run-time dependencies
161
+ # that can safely fail at run time
162
+
163
+ from nltk import lazyimport
164
+
165
+ app = lazyimport.LazyModule("app", locals(), globals())
166
+ chat = lazyimport.LazyModule("chat", locals(), globals())
167
+ corpus = lazyimport.LazyModule("corpus", locals(), globals())
168
+ draw = lazyimport.LazyModule("draw", locals(), globals())
169
+ toolbox = lazyimport.LazyModule("toolbox", locals(), globals())
170
+
171
+ # Optional loading
172
+
173
+ try:
174
+ import numpy
175
+ except ImportError:
176
+ pass
177
+ else:
178
+ from nltk import cluster
179
+
180
+ from nltk.downloader import download, download_shell
181
+
182
+ try:
183
+ import tkinter
184
+ except ImportError:
185
+ pass
186
+ else:
187
+ try:
188
+ from nltk.downloader import download_gui
189
+ except RuntimeError as e:
190
+ import warnings
191
+
192
+ warnings.warn(
193
+ "Corpus downloader GUI not loaded "
194
+ "(RuntimeError during import: %s)" % str(e)
195
+ )
196
+
197
+ # explicitly import all top-level modules (ensuring
198
+ # they override the same names inadvertently imported
199
+ # from a subpackage)
200
+
201
+ from nltk import ccg, chunk, classify, collocations
202
+ from nltk import data, featstruct, grammar, help, inference, metrics
203
+ from nltk import misc, parse, probability, sem, stem, wsd
204
+ from nltk import tag, tbl, text, tokenize, translate, tree, util
205
+
206
+
207
+ # FIXME: override any accidentally imported demo, see https://github.com/nltk/nltk/issues/2116
208
+ def demo():
209
+ print("To run the demo code for a module, type nltk.module.demo()")
pipeline/nltk/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (4.84 kB). View file
 
pipeline/nltk/__pycache__/book.cpython-39.pyc ADDED
Binary file (2.99 kB). View file
 
pipeline/nltk/__pycache__/cli.cpython-39.pyc ADDED
Binary file (1.63 kB). View file
 
pipeline/nltk/__pycache__/collections.cpython-39.pyc ADDED
Binary file (23.4 kB). View file
 
pipeline/nltk/__pycache__/collocations.cpython-39.pyc ADDED
Binary file (14.9 kB). View file
 
pipeline/nltk/__pycache__/compat.cpython-39.pyc ADDED
Binary file (1.13 kB). View file
 
pipeline/nltk/__pycache__/data.cpython-39.pyc ADDED
Binary file (38.6 kB). View file
 
pipeline/nltk/__pycache__/decorators.cpython-39.pyc ADDED
Binary file (6.43 kB). View file
 
pipeline/nltk/__pycache__/downloader.cpython-39.pyc ADDED
Binary file (61.9 kB). View file
 
pipeline/nltk/__pycache__/featstruct.cpython-39.pyc ADDED
Binary file (74.1 kB). View file
 
pipeline/nltk/__pycache__/grammar.cpython-39.pyc ADDED
Binary file (53.7 kB). View file
 
pipeline/nltk/__pycache__/help.cpython-39.pyc ADDED
Binary file (1.63 kB). View file
 
pipeline/nltk/__pycache__/internals.cpython-39.pyc ADDED
Binary file (29 kB). View file
 
pipeline/nltk/__pycache__/jsontags.cpython-39.pyc ADDED
Binary file (2.31 kB). View file
 
pipeline/nltk/__pycache__/langnames.cpython-39.pyc ADDED
Binary file (15.3 kB). View file
 
pipeline/nltk/__pycache__/lazyimport.cpython-39.pyc ADDED
Binary file (3.73 kB). View file
 
pipeline/nltk/__pycache__/probability.cpython-39.pyc ADDED
Binary file (87.2 kB). View file
 
pipeline/nltk/__pycache__/text.cpython-39.pyc ADDED
Binary file (28.4 kB). View file
 
pipeline/nltk/__pycache__/tgrep.cpython-39.pyc ADDED
Binary file (35.3 kB). View file
 
pipeline/nltk/__pycache__/toolbox.cpython-39.pyc ADDED
Binary file (15.8 kB). View file
 
pipeline/nltk/__pycache__/treeprettyprinter.cpython-39.pyc ADDED
Binary file (952 Bytes). View file
 
pipeline/nltk/__pycache__/treetransforms.cpython-39.pyc ADDED
Binary file (4.99 kB). View file
 
pipeline/nltk/__pycache__/util.cpython-39.pyc ADDED
Binary file (32.5 kB). View file
 
pipeline/nltk/__pycache__/wsd.cpython-39.pyc ADDED
Binary file (1.82 kB). View file
 
pipeline/nltk/app/__init__.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Applications package
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Edward Loper <[email protected]>
5
+ # Steven Bird <[email protected]>
6
+ # URL: <https://www.nltk.org/>
7
+ # For license information, see LICENSE.TXT
8
+
9
+ """
10
+ Interactive NLTK Applications:
11
+
12
+ chartparser: Chart Parser
13
+ chunkparser: Regular-Expression Chunk Parser
14
+ collocations: Find collocations in text
15
+ concordance: Part-of-speech concordancer
16
+ nemo: Finding (and Replacing) Nemo regular expression tool
17
+ rdparser: Recursive Descent Parser
18
+ srparser: Shift-Reduce Parser
19
+ wordnet: WordNet Browser
20
+ """
21
+
22
+
23
+ # Import Tkinter-based modules if Tkinter is installed
24
+ try:
25
+ import tkinter
26
+ except ImportError:
27
+ import warnings
28
+
29
+ warnings.warn("nltk.app package not loaded (please install Tkinter library).")
30
+ else:
31
+ from nltk.app.chartparser_app import app as chartparser
32
+ from nltk.app.chunkparser_app import app as chunkparser
33
+ from nltk.app.collocations_app import app as collocations
34
+ from nltk.app.concordance_app import app as concordance
35
+ from nltk.app.nemo_app import app as nemo
36
+ from nltk.app.rdparser_app import app as rdparser
37
+ from nltk.app.srparser_app import app as srparser
38
+ from nltk.app.wordnet_app import app as wordnet
39
+
40
+ try:
41
+ from matplotlib import pylab
42
+ except ImportError:
43
+ import warnings
44
+
45
+ warnings.warn("nltk.app.wordfreq not loaded (requires the matplotlib library).")
46
+ else:
47
+ from nltk.app.wordfreq_app import app as wordfreq
pipeline/nltk/app/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (1.31 kB). View file
 
pipeline/nltk/app/__pycache__/chartparser_app.cpython-39.pyc ADDED
Binary file (63.2 kB). View file
 
pipeline/nltk/app/__pycache__/chunkparser_app.cpython-39.pyc ADDED
Binary file (33.4 kB). View file
 
pipeline/nltk/app/__pycache__/collocations_app.cpython-39.pyc ADDED
Binary file (14.8 kB). View file
 
pipeline/nltk/app/__pycache__/concordance_app.cpython-39.pyc ADDED
Binary file (22.7 kB). View file
 
pipeline/nltk/app/__pycache__/nemo_app.cpython-39.pyc ADDED
Binary file (12.3 kB). View file
 
pipeline/nltk/app/__pycache__/rdparser_app.cpython-39.pyc ADDED
Binary file (26 kB). View file
 
pipeline/nltk/app/__pycache__/srparser_app.cpython-39.pyc ADDED
Binary file (22 kB). View file
 
pipeline/nltk/app/__pycache__/wordfreq_app.cpython-39.pyc ADDED
Binary file (1.46 kB). View file
 
pipeline/nltk/app/__pycache__/wordnet_app.cpython-39.pyc ADDED
Binary file (31.1 kB). View file
 
pipeline/nltk/app/chartparser_app.py ADDED
@@ -0,0 +1,2569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Chart Parser Application
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Edward Loper <[email protected]>
5
+ # Jean Mark Gawron <[email protected]>
6
+ # Steven Bird <[email protected]>
7
+ # URL: <https://www.nltk.org/>
8
+ # For license information, see LICENSE.TXT
9
+
10
+ """
11
+ A graphical tool for exploring chart parsing.
12
+
13
+ Chart parsing is a flexible parsing algorithm that uses a data
14
+ structure called a "chart" to record hypotheses about syntactic
15
+ constituents. Each hypothesis is represented by a single "edge" on
16
+ the chart. A set of "chart rules" determine when new edges can be
17
+ added to the chart. This set of rules controls the overall behavior
18
+ of the parser (e.g. whether it parses top-down or bottom-up).
19
+
20
+ The chart parsing tool demonstrates the process of parsing a single
21
+ sentence, with a given grammar and lexicon. Its display is divided
22
+ into three sections: the bottom section displays the chart; the middle
23
+ section displays the sentence; and the top section displays the
24
+ partial syntax tree corresponding to the selected edge. Buttons along
25
+ the bottom of the window are used to control the execution of the
26
+ algorithm.
27
+
28
+ The chart parsing tool allows for flexible control of the parsing
29
+ algorithm. At each step of the algorithm, you can select which rule
30
+ or strategy you wish to apply. This allows you to experiment with
31
+ mixing different strategies (e.g. top-down and bottom-up). You can
32
+ exercise fine-grained control over the algorithm by selecting which
33
+ edge you wish to apply a rule to.
34
+ """
35
+
36
+ # At some point, we should rewrite this tool to use the new canvas
37
+ # widget system.
38
+
39
+
40
+ import os.path
41
+ import pickle
42
+ from tkinter import (
43
+ Button,
44
+ Canvas,
45
+ Checkbutton,
46
+ Frame,
47
+ IntVar,
48
+ Label,
49
+ Menu,
50
+ Scrollbar,
51
+ Tk,
52
+ Toplevel,
53
+ )
54
+ from tkinter.filedialog import askopenfilename, asksaveasfilename
55
+ from tkinter.font import Font
56
+ from tkinter.messagebox import showerror, showinfo
57
+
58
+ from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
59
+ from nltk.draw.util import (
60
+ CanvasFrame,
61
+ ColorizedList,
62
+ EntryDialog,
63
+ MutableOptionMenu,
64
+ ShowText,
65
+ SymbolWidget,
66
+ )
67
+ from nltk.grammar import CFG, Nonterminal
68
+ from nltk.parse.chart import (
69
+ BottomUpPredictCombineRule,
70
+ BottomUpPredictRule,
71
+ Chart,
72
+ LeafEdge,
73
+ LeafInitRule,
74
+ SingleEdgeFundamentalRule,
75
+ SteppingChartParser,
76
+ TopDownInitRule,
77
+ TopDownPredictRule,
78
+ TreeEdge,
79
+ )
80
+ from nltk.tree import Tree
81
+ from nltk.util import in_idle
82
+
83
+ # Known bug: ChartView doesn't handle edges generated by epsilon
84
+ # productions (e.g., [Production: PP -> ]) very well.
85
+
86
+ #######################################################################
87
+ # Edge List
88
+ #######################################################################
89
+
90
+
91
+ class EdgeList(ColorizedList):
92
+ ARROW = SymbolWidget.SYMBOLS["rightarrow"]
93
+
94
+ def _init_colortags(self, textwidget, options):
95
+ textwidget.tag_config("terminal", foreground="#006000")
96
+ textwidget.tag_config("arrow", font="symbol", underline="0")
97
+ textwidget.tag_config("dot", foreground="#000000")
98
+ textwidget.tag_config(
99
+ "nonterminal", foreground="blue", font=("helvetica", -12, "bold")
100
+ )
101
+
102
+ def _item_repr(self, item):
103
+ contents = []
104
+ contents.append(("%s\t" % item.lhs(), "nonterminal"))
105
+ contents.append((self.ARROW, "arrow"))
106
+ for i, elt in enumerate(item.rhs()):
107
+ if i == item.dot():
108
+ contents.append((" *", "dot"))
109
+ if isinstance(elt, Nonterminal):
110
+ contents.append((" %s" % elt.symbol(), "nonterminal"))
111
+ else:
112
+ contents.append((" %r" % elt, "terminal"))
113
+ if item.is_complete():
114
+ contents.append((" *", "dot"))
115
+ return contents
116
+
117
+
118
+ #######################################################################
119
+ # Chart Matrix View
120
+ #######################################################################
121
+
122
+
123
+ class ChartMatrixView:
124
+ """
125
+ A view of a chart that displays the contents of the corresponding matrix.
126
+ """
127
+
128
+ def __init__(
129
+ self, parent, chart, toplevel=True, title="Chart Matrix", show_numedges=False
130
+ ):
131
+ self._chart = chart
132
+ self._cells = []
133
+ self._marks = []
134
+
135
+ self._selected_cell = None
136
+
137
+ if toplevel:
138
+ self._root = Toplevel(parent)
139
+ self._root.title(title)
140
+ self._root.bind("<Control-q>", self.destroy)
141
+ self._init_quit(self._root)
142
+ else:
143
+ self._root = Frame(parent)
144
+
145
+ self._init_matrix(self._root)
146
+ self._init_list(self._root)
147
+ if show_numedges:
148
+ self._init_numedges(self._root)
149
+ else:
150
+ self._numedges_label = None
151
+
152
+ self._callbacks = {}
153
+
154
+ self._num_edges = 0
155
+
156
+ self.draw()
157
+
158
+ def _init_quit(self, root):
159
+ quit = Button(root, text="Quit", command=self.destroy)
160
+ quit.pack(side="bottom", expand=0, fill="none")
161
+
162
+ def _init_matrix(self, root):
163
+ cframe = Frame(root, border=2, relief="sunken")
164
+ cframe.pack(expand=0, fill="none", padx=1, pady=3, side="top")
165
+ self._canvas = Canvas(cframe, width=200, height=200, background="white")
166
+ self._canvas.pack(expand=0, fill="none")
167
+
168
+ def _init_numedges(self, root):
169
+ self._numedges_label = Label(root, text="0 edges")
170
+ self._numedges_label.pack(expand=0, fill="none", side="top")
171
+
172
+ def _init_list(self, root):
173
+ self._list = EdgeList(root, [], width=20, height=5)
174
+ self._list.pack(side="top", expand=1, fill="both", pady=3)
175
+
176
+ def cb(edge, self=self):
177
+ self._fire_callbacks("select", edge)
178
+
179
+ self._list.add_callback("select", cb)
180
+ self._list.focus()
181
+
182
+ def destroy(self, *e):
183
+ if self._root is None:
184
+ return
185
+ try:
186
+ self._root.destroy()
187
+ except:
188
+ pass
189
+ self._root = None
190
+
191
+ def set_chart(self, chart):
192
+ if chart is not self._chart:
193
+ self._chart = chart
194
+ self._num_edges = 0
195
+ self.draw()
196
+
197
+ def update(self):
198
+ if self._root is None:
199
+ return
200
+
201
+ # Count the edges in each cell
202
+ N = len(self._cells)
203
+ cell_edges = [[0 for i in range(N)] for j in range(N)]
204
+ for edge in self._chart:
205
+ cell_edges[edge.start()][edge.end()] += 1
206
+
207
+ # Color the cells correspondingly.
208
+ for i in range(N):
209
+ for j in range(i, N):
210
+ if cell_edges[i][j] == 0:
211
+ color = "gray20"
212
+ else:
213
+ color = "#00{:02x}{:02x}".format(
214
+ min(255, 50 + 128 * cell_edges[i][j] / 10),
215
+ max(0, 128 - 128 * cell_edges[i][j] / 10),
216
+ )
217
+ cell_tag = self._cells[i][j]
218
+ self._canvas.itemconfig(cell_tag, fill=color)
219
+ if (i, j) == self._selected_cell:
220
+ self._canvas.itemconfig(cell_tag, outline="#00ffff", width=3)
221
+ self._canvas.tag_raise(cell_tag)
222
+ else:
223
+ self._canvas.itemconfig(cell_tag, outline="black", width=1)
224
+
225
+ # Update the edge list.
226
+ edges = list(self._chart.select(span=self._selected_cell))
227
+ self._list.set(edges)
228
+
229
+ # Update our edge count.
230
+ self._num_edges = self._chart.num_edges()
231
+ if self._numedges_label is not None:
232
+ self._numedges_label["text"] = "%d edges" % self._num_edges
233
+
234
+ def activate(self):
235
+ self._canvas.itemconfig("inactivebox", state="hidden")
236
+ self.update()
237
+
238
+ def inactivate(self):
239
+ self._canvas.itemconfig("inactivebox", state="normal")
240
+ self.update()
241
+
242
+ def add_callback(self, event, func):
243
+ self._callbacks.setdefault(event, {})[func] = 1
244
+
245
+ def remove_callback(self, event, func=None):
246
+ if func is None:
247
+ del self._callbacks[event]
248
+ else:
249
+ try:
250
+ del self._callbacks[event][func]
251
+ except:
252
+ pass
253
+
254
+ def _fire_callbacks(self, event, *args):
255
+ if event not in self._callbacks:
256
+ return
257
+ for cb_func in list(self._callbacks[event].keys()):
258
+ cb_func(*args)
259
+
260
+ def select_cell(self, i, j):
261
+ if self._root is None:
262
+ return
263
+
264
+ # If the cell is already selected (and the chart contents
265
+ # haven't changed), then do nothing.
266
+ if (i, j) == self._selected_cell and self._chart.num_edges() == self._num_edges:
267
+ return
268
+
269
+ self._selected_cell = (i, j)
270
+ self.update()
271
+
272
+ # Fire the callback.
273
+ self._fire_callbacks("select_cell", i, j)
274
+
275
+ def deselect_cell(self):
276
+ if self._root is None:
277
+ return
278
+ self._selected_cell = None
279
+ self._list.set([])
280
+ self.update()
281
+
282
+ def _click_cell(self, i, j):
283
+ if self._selected_cell == (i, j):
284
+ self.deselect_cell()
285
+ else:
286
+ self.select_cell(i, j)
287
+
288
+ def view_edge(self, edge):
289
+ self.select_cell(*edge.span())
290
+ self._list.view(edge)
291
+
292
+ def mark_edge(self, edge):
293
+ if self._root is None:
294
+ return
295
+ self.select_cell(*edge.span())
296
+ self._list.mark(edge)
297
+
298
+ def unmark_edge(self, edge=None):
299
+ if self._root is None:
300
+ return
301
+ self._list.unmark(edge)
302
+
303
+ def markonly_edge(self, edge):
304
+ if self._root is None:
305
+ return
306
+ self.select_cell(*edge.span())
307
+ self._list.markonly(edge)
308
+
309
+ def draw(self):
310
+ if self._root is None:
311
+ return
312
+ LEFT_MARGIN = BOT_MARGIN = 15
313
+ TOP_MARGIN = 5
314
+ c = self._canvas
315
+ c.delete("all")
316
+ N = self._chart.num_leaves() + 1
317
+ dx = (int(c["width"]) - LEFT_MARGIN) / N
318
+ dy = (int(c["height"]) - TOP_MARGIN - BOT_MARGIN) / N
319
+
320
+ c.delete("all")
321
+
322
+ # Labels and dotted lines
323
+ for i in range(N):
324
+ c.create_text(
325
+ LEFT_MARGIN - 2, i * dy + dy / 2 + TOP_MARGIN, text=repr(i), anchor="e"
326
+ )
327
+ c.create_text(
328
+ i * dx + dx / 2 + LEFT_MARGIN,
329
+ N * dy + TOP_MARGIN + 1,
330
+ text=repr(i),
331
+ anchor="n",
332
+ )
333
+ c.create_line(
334
+ LEFT_MARGIN,
335
+ dy * (i + 1) + TOP_MARGIN,
336
+ dx * N + LEFT_MARGIN,
337
+ dy * (i + 1) + TOP_MARGIN,
338
+ dash=".",
339
+ )
340
+ c.create_line(
341
+ dx * i + LEFT_MARGIN,
342
+ TOP_MARGIN,
343
+ dx * i + LEFT_MARGIN,
344
+ dy * N + TOP_MARGIN,
345
+ dash=".",
346
+ )
347
+
348
+ # A box around the whole thing
349
+ c.create_rectangle(
350
+ LEFT_MARGIN, TOP_MARGIN, LEFT_MARGIN + dx * N, dy * N + TOP_MARGIN, width=2
351
+ )
352
+
353
+ # Cells
354
+ self._cells = [[None for i in range(N)] for j in range(N)]
355
+ for i in range(N):
356
+ for j in range(i, N):
357
+ t = c.create_rectangle(
358
+ j * dx + LEFT_MARGIN,
359
+ i * dy + TOP_MARGIN,
360
+ (j + 1) * dx + LEFT_MARGIN,
361
+ (i + 1) * dy + TOP_MARGIN,
362
+ fill="gray20",
363
+ )
364
+ self._cells[i][j] = t
365
+
366
+ def cb(event, self=self, i=i, j=j):
367
+ self._click_cell(i, j)
368
+
369
+ c.tag_bind(t, "<Button-1>", cb)
370
+
371
+ # Inactive box
372
+ xmax, ymax = int(c["width"]), int(c["height"])
373
+ t = c.create_rectangle(
374
+ -100,
375
+ -100,
376
+ xmax + 100,
377
+ ymax + 100,
378
+ fill="gray50",
379
+ state="hidden",
380
+ tag="inactivebox",
381
+ )
382
+ c.tag_lower(t)
383
+
384
+ # Update the cells.
385
+ self.update()
386
+
387
+ def pack(self, *args, **kwargs):
388
+ self._root.pack(*args, **kwargs)
389
+
390
+
391
+ #######################################################################
392
+ # Chart Results View
393
+ #######################################################################
394
+
395
+
396
+ class ChartResultsView:
397
+ def __init__(self, parent, chart, grammar, toplevel=True):
398
+ self._chart = chart
399
+ self._grammar = grammar
400
+ self._trees = []
401
+ self._y = 10
402
+ self._treewidgets = []
403
+ self._selection = None
404
+ self._selectbox = None
405
+
406
+ if toplevel:
407
+ self._root = Toplevel(parent)
408
+ self._root.title("Chart Parser Application: Results")
409
+ self._root.bind("<Control-q>", self.destroy)
410
+ else:
411
+ self._root = Frame(parent)
412
+
413
+ # Buttons
414
+ if toplevel:
415
+ buttons = Frame(self._root)
416
+ buttons.pack(side="bottom", expand=0, fill="x")
417
+ Button(buttons, text="Quit", command=self.destroy).pack(side="right")
418
+ Button(buttons, text="Print All", command=self.print_all).pack(side="left")
419
+ Button(buttons, text="Print Selection", command=self.print_selection).pack(
420
+ side="left"
421
+ )
422
+
423
+ # Canvas frame.
424
+ self._cframe = CanvasFrame(self._root, closeenough=20)
425
+ self._cframe.pack(side="top", expand=1, fill="both")
426
+
427
+ # Initial update
428
+ self.update()
429
+
430
+ def update(self, edge=None):
431
+ if self._root is None:
432
+ return
433
+ # If the edge isn't a parse edge, do nothing.
434
+ if edge is not None:
435
+ if edge.lhs() != self._grammar.start():
436
+ return
437
+ if edge.span() != (0, self._chart.num_leaves()):
438
+ return
439
+
440
+ for parse in self._chart.parses(self._grammar.start()):
441
+ if parse not in self._trees:
442
+ self._add(parse)
443
+
444
+ def _add(self, parse):
445
+ # Add it to self._trees.
446
+ self._trees.append(parse)
447
+
448
+ # Create a widget for it.
449
+ c = self._cframe.canvas()
450
+ treewidget = tree_to_treesegment(c, parse)
451
+
452
+ # Add it to the canvas frame.
453
+ self._treewidgets.append(treewidget)
454
+ self._cframe.add_widget(treewidget, 10, self._y)
455
+
456
+ # Register callbacks.
457
+ treewidget.bind_click(self._click)
458
+
459
+ # Update y.
460
+ self._y = treewidget.bbox()[3] + 10
461
+
462
+ def _click(self, widget):
463
+ c = self._cframe.canvas()
464
+ if self._selection is not None:
465
+ c.delete(self._selectbox)
466
+ self._selection = widget
467
+ (x1, y1, x2, y2) = widget.bbox()
468
+ self._selectbox = c.create_rectangle(x1, y1, x2, y2, width=2, outline="#088")
469
+
470
+ def _color(self, treewidget, color):
471
+ treewidget.label()["color"] = color
472
+ for child in treewidget.subtrees():
473
+ if isinstance(child, TreeSegmentWidget):
474
+ self._color(child, color)
475
+ else:
476
+ child["color"] = color
477
+
478
+ def print_all(self, *e):
479
+ if self._root is None:
480
+ return
481
+ self._cframe.print_to_file()
482
+
483
+ def print_selection(self, *e):
484
+ if self._root is None:
485
+ return
486
+ if self._selection is None:
487
+ showerror("Print Error", "No tree selected")
488
+ else:
489
+ c = self._cframe.canvas()
490
+ for widget in self._treewidgets:
491
+ if widget is not self._selection:
492
+ self._cframe.destroy_widget(widget)
493
+ c.delete(self._selectbox)
494
+ (x1, y1, x2, y2) = self._selection.bbox()
495
+ self._selection.move(10 - x1, 10 - y1)
496
+ c["scrollregion"] = f"0 0 {x2 - x1 + 20} {y2 - y1 + 20}"
497
+ self._cframe.print_to_file()
498
+
499
+ # Restore our state.
500
+ self._treewidgets = [self._selection]
501
+ self.clear()
502
+ self.update()
503
+
504
+ def clear(self):
505
+ if self._root is None:
506
+ return
507
+ for treewidget in self._treewidgets:
508
+ self._cframe.destroy_widget(treewidget)
509
+ self._trees = []
510
+ self._treewidgets = []
511
+ if self._selection is not None:
512
+ self._cframe.canvas().delete(self._selectbox)
513
+ self._selection = None
514
+ self._y = 10
515
+
516
+ def set_chart(self, chart):
517
+ self.clear()
518
+ self._chart = chart
519
+ self.update()
520
+
521
+ def set_grammar(self, grammar):
522
+ self.clear()
523
+ self._grammar = grammar
524
+ self.update()
525
+
526
+ def destroy(self, *e):
527
+ if self._root is None:
528
+ return
529
+ try:
530
+ self._root.destroy()
531
+ except:
532
+ pass
533
+ self._root = None
534
+
535
+ def pack(self, *args, **kwargs):
536
+ self._root.pack(*args, **kwargs)
537
+
538
+
539
+ #######################################################################
540
+ # Chart Comparer
541
+ #######################################################################
542
+
543
+
544
+ class ChartComparer:
545
+ """
546
+
547
+ :ivar _root: The root window
548
+
549
+ :ivar _charts: A dictionary mapping names to charts. When
550
+ charts are loaded, they are added to this dictionary.
551
+
552
+ :ivar _left_chart: The left ``Chart``.
553
+ :ivar _left_name: The name ``_left_chart`` (derived from filename)
554
+ :ivar _left_matrix: The ``ChartMatrixView`` for ``_left_chart``
555
+ :ivar _left_selector: The drop-down ``MutableOptionsMenu`` used
556
+ to select ``_left_chart``.
557
+
558
+ :ivar _right_chart: The right ``Chart``.
559
+ :ivar _right_name: The name ``_right_chart`` (derived from filename)
560
+ :ivar _right_matrix: The ``ChartMatrixView`` for ``_right_chart``
561
+ :ivar _right_selector: The drop-down ``MutableOptionsMenu`` used
562
+ to select ``_right_chart``.
563
+
564
+ :ivar _out_chart: The out ``Chart``.
565
+ :ivar _out_name: The name ``_out_chart`` (derived from filename)
566
+ :ivar _out_matrix: The ``ChartMatrixView`` for ``_out_chart``
567
+ :ivar _out_label: The label for ``_out_chart``.
568
+
569
+ :ivar _op_label: A Label containing the most recent operation.
570
+ """
571
+
572
+ _OPSYMBOL = {
573
+ "-": "-",
574
+ "and": SymbolWidget.SYMBOLS["intersection"],
575
+ "or": SymbolWidget.SYMBOLS["union"],
576
+ }
577
+
578
+ def __init__(self, *chart_filenames):
579
+ # This chart is displayed when we don't have a value (eg
580
+ # before any chart is loaded).
581
+ faketok = [""] * 8
582
+ self._emptychart = Chart(faketok)
583
+
584
+ # The left & right charts start out empty.
585
+ self._left_name = "None"
586
+ self._right_name = "None"
587
+ self._left_chart = self._emptychart
588
+ self._right_chart = self._emptychart
589
+
590
+ # The charts that have been loaded.
591
+ self._charts = {"None": self._emptychart}
592
+
593
+ # The output chart.
594
+ self._out_chart = self._emptychart
595
+
596
+ # The most recent operation
597
+ self._operator = None
598
+
599
+ # Set up the root window.
600
+ self._root = Tk()
601
+ self._root.title("Chart Comparison")
602
+ self._root.bind("<Control-q>", self.destroy)
603
+ self._root.bind("<Control-x>", self.destroy)
604
+
605
+ # Initialize all widgets, etc.
606
+ self._init_menubar(self._root)
607
+ self._init_chartviews(self._root)
608
+ self._init_divider(self._root)
609
+ self._init_buttons(self._root)
610
+ self._init_bindings(self._root)
611
+
612
+ # Load any specified charts.
613
+ for filename in chart_filenames:
614
+ self.load_chart(filename)
615
+
616
+ def destroy(self, *e):
617
+ if self._root is None:
618
+ return
619
+ try:
620
+ self._root.destroy()
621
+ except:
622
+ pass
623
+ self._root = None
624
+
625
+ def mainloop(self, *args, **kwargs):
626
+ return
627
+ self._root.mainloop(*args, **kwargs)
628
+
629
+ # ////////////////////////////////////////////////////////////
630
+ # Initialization
631
+ # ////////////////////////////////////////////////////////////
632
+
633
+ def _init_menubar(self, root):
634
+ menubar = Menu(root)
635
+
636
+ # File menu
637
+ filemenu = Menu(menubar, tearoff=0)
638
+ filemenu.add_command(
639
+ label="Load Chart",
640
+ accelerator="Ctrl-o",
641
+ underline=0,
642
+ command=self.load_chart_dialog,
643
+ )
644
+ filemenu.add_command(
645
+ label="Save Output",
646
+ accelerator="Ctrl-s",
647
+ underline=0,
648
+ command=self.save_chart_dialog,
649
+ )
650
+ filemenu.add_separator()
651
+ filemenu.add_command(
652
+ label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
653
+ )
654
+ menubar.add_cascade(label="File", underline=0, menu=filemenu)
655
+
656
+ # Compare menu
657
+ opmenu = Menu(menubar, tearoff=0)
658
+ opmenu.add_command(
659
+ label="Intersection", command=self._intersection, accelerator="+"
660
+ )
661
+ opmenu.add_command(label="Union", command=self._union, accelerator="*")
662
+ opmenu.add_command(
663
+ label="Difference", command=self._difference, accelerator="-"
664
+ )
665
+ opmenu.add_separator()
666
+ opmenu.add_command(label="Swap Charts", command=self._swapcharts)
667
+ menubar.add_cascade(label="Compare", underline=0, menu=opmenu)
668
+
669
+ # Add the menu
670
+ self._root.config(menu=menubar)
671
+
672
+ def _init_divider(self, root):
673
+ divider = Frame(root, border=2, relief="sunken")
674
+ divider.pack(side="top", fill="x", ipady=2)
675
+
676
+ def _init_chartviews(self, root):
677
+ opfont = ("symbol", -36) # Font for operator.
678
+ eqfont = ("helvetica", -36) # Font for equals sign.
679
+
680
+ frame = Frame(root, background="#c0c0c0")
681
+ frame.pack(side="top", expand=1, fill="both")
682
+
683
+ # The left matrix.
684
+ cv1_frame = Frame(frame, border=3, relief="groove")
685
+ cv1_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both")
686
+ self._left_selector = MutableOptionMenu(
687
+ cv1_frame, list(self._charts.keys()), command=self._select_left
688
+ )
689
+ self._left_selector.pack(side="top", pady=5, fill="x")
690
+ self._left_matrix = ChartMatrixView(
691
+ cv1_frame, self._emptychart, toplevel=False, show_numedges=True
692
+ )
693
+ self._left_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both")
694
+ self._left_matrix.add_callback("select", self.select_edge)
695
+ self._left_matrix.add_callback("select_cell", self.select_cell)
696
+ self._left_matrix.inactivate()
697
+
698
+ # The operator.
699
+ self._op_label = Label(
700
+ frame, text=" ", width=3, background="#c0c0c0", font=opfont
701
+ )
702
+ self._op_label.pack(side="left", padx=5, pady=5)
703
+
704
+ # The right matrix.
705
+ cv2_frame = Frame(frame, border=3, relief="groove")
706
+ cv2_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both")
707
+ self._right_selector = MutableOptionMenu(
708
+ cv2_frame, list(self._charts.keys()), command=self._select_right
709
+ )
710
+ self._right_selector.pack(side="top", pady=5, fill="x")
711
+ self._right_matrix = ChartMatrixView(
712
+ cv2_frame, self._emptychart, toplevel=False, show_numedges=True
713
+ )
714
+ self._right_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both")
715
+ self._right_matrix.add_callback("select", self.select_edge)
716
+ self._right_matrix.add_callback("select_cell", self.select_cell)
717
+ self._right_matrix.inactivate()
718
+
719
+ # The equals sign
720
+ Label(frame, text="=", width=3, background="#c0c0c0", font=eqfont).pack(
721
+ side="left", padx=5, pady=5
722
+ )
723
+
724
+ # The output matrix.
725
+ out_frame = Frame(frame, border=3, relief="groove")
726
+ out_frame.pack(side="left", padx=8, pady=7, expand=1, fill="both")
727
+ self._out_label = Label(out_frame, text="Output")
728
+ self._out_label.pack(side="top", pady=9)
729
+ self._out_matrix = ChartMatrixView(
730
+ out_frame, self._emptychart, toplevel=False, show_numedges=True
731
+ )
732
+ self._out_matrix.pack(side="bottom", padx=5, pady=5, expand=1, fill="both")
733
+ self._out_matrix.add_callback("select", self.select_edge)
734
+ self._out_matrix.add_callback("select_cell", self.select_cell)
735
+ self._out_matrix.inactivate()
736
+
737
+ def _init_buttons(self, root):
738
+ buttons = Frame(root)
739
+ buttons.pack(side="bottom", pady=5, fill="x", expand=0)
740
+ Button(buttons, text="Intersection", command=self._intersection).pack(
741
+ side="left"
742
+ )
743
+ Button(buttons, text="Union", command=self._union).pack(side="left")
744
+ Button(buttons, text="Difference", command=self._difference).pack(side="left")
745
+ Frame(buttons, width=20).pack(side="left")
746
+ Button(buttons, text="Swap Charts", command=self._swapcharts).pack(side="left")
747
+
748
+ Button(buttons, text="Detach Output", command=self._detach_out).pack(
749
+ side="right"
750
+ )
751
+
752
+ def _init_bindings(self, root):
753
+ # root.bind('<Control-s>', self.save_chart)
754
+ root.bind("<Control-o>", self.load_chart_dialog)
755
+ # root.bind('<Control-r>', self.reset)
756
+
757
+ # ////////////////////////////////////////////////////////////
758
+ # Input Handling
759
+ # ////////////////////////////////////////////////////////////
760
+
761
+ def _select_left(self, name):
762
+ self._left_name = name
763
+ self._left_chart = self._charts[name]
764
+ self._left_matrix.set_chart(self._left_chart)
765
+ if name == "None":
766
+ self._left_matrix.inactivate()
767
+ self._apply_op()
768
+
769
+ def _select_right(self, name):
770
+ self._right_name = name
771
+ self._right_chart = self._charts[name]
772
+ self._right_matrix.set_chart(self._right_chart)
773
+ if name == "None":
774
+ self._right_matrix.inactivate()
775
+ self._apply_op()
776
+
777
+ def _apply_op(self):
778
+ if self._operator == "-":
779
+ self._difference()
780
+ elif self._operator == "or":
781
+ self._union()
782
+ elif self._operator == "and":
783
+ self._intersection()
784
+
785
+ # ////////////////////////////////////////////////////////////
786
+ # File
787
+ # ////////////////////////////////////////////////////////////
788
+ CHART_FILE_TYPES = [("Pickle file", ".pickle"), ("All files", "*")]
789
+
790
+ def save_chart_dialog(self, *args):
791
+ filename = asksaveasfilename(
792
+ filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
793
+ )
794
+ if not filename:
795
+ return
796
+ try:
797
+ with open(filename, "wb") as outfile:
798
+ pickle.dump(self._out_chart, outfile)
799
+ except Exception as e:
800
+ showerror("Error Saving Chart", f"Unable to open file: {filename!r}\n{e}")
801
+
802
+ def load_chart_dialog(self, *args):
803
+ filename = askopenfilename(
804
+ filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
805
+ )
806
+ if not filename:
807
+ return
808
+ try:
809
+ self.load_chart(filename)
810
+ except Exception as e:
811
+ showerror("Error Loading Chart", f"Unable to open file: {filename!r}\n{e}")
812
+
813
+ def load_chart(self, filename):
814
+ with open(filename, "rb") as infile:
815
+ chart = pickle.load(infile)
816
+ name = os.path.basename(filename)
817
+ if name.endswith(".pickle"):
818
+ name = name[:-7]
819
+ if name.endswith(".chart"):
820
+ name = name[:-6]
821
+ self._charts[name] = chart
822
+ self._left_selector.add(name)
823
+ self._right_selector.add(name)
824
+
825
+ # If either left_matrix or right_matrix is empty, then
826
+ # display the new chart.
827
+ if self._left_chart is self._emptychart:
828
+ self._left_selector.set(name)
829
+ elif self._right_chart is self._emptychart:
830
+ self._right_selector.set(name)
831
+
832
+ def _update_chartviews(self):
833
+ self._left_matrix.update()
834
+ self._right_matrix.update()
835
+ self._out_matrix.update()
836
+
837
+ # ////////////////////////////////////////////////////////////
838
+ # Selection
839
+ # ////////////////////////////////////////////////////////////
840
+
841
+ def select_edge(self, edge):
842
+ if edge in self._left_chart:
843
+ self._left_matrix.markonly_edge(edge)
844
+ else:
845
+ self._left_matrix.unmark_edge()
846
+ if edge in self._right_chart:
847
+ self._right_matrix.markonly_edge(edge)
848
+ else:
849
+ self._right_matrix.unmark_edge()
850
+ if edge in self._out_chart:
851
+ self._out_matrix.markonly_edge(edge)
852
+ else:
853
+ self._out_matrix.unmark_edge()
854
+
855
+ def select_cell(self, i, j):
856
+ self._left_matrix.select_cell(i, j)
857
+ self._right_matrix.select_cell(i, j)
858
+ self._out_matrix.select_cell(i, j)
859
+
860
+ # ////////////////////////////////////////////////////////////
861
+ # Operations
862
+ # ////////////////////////////////////////////////////////////
863
+
864
+ def _difference(self):
865
+ if not self._checkcompat():
866
+ return
867
+
868
+ out_chart = Chart(self._left_chart.tokens())
869
+ for edge in self._left_chart:
870
+ if edge not in self._right_chart:
871
+ out_chart.insert(edge, [])
872
+
873
+ self._update("-", out_chart)
874
+
875
+ def _intersection(self):
876
+ if not self._checkcompat():
877
+ return
878
+
879
+ out_chart = Chart(self._left_chart.tokens())
880
+ for edge in self._left_chart:
881
+ if edge in self._right_chart:
882
+ out_chart.insert(edge, [])
883
+
884
+ self._update("and", out_chart)
885
+
886
+ def _union(self):
887
+ if not self._checkcompat():
888
+ return
889
+
890
+ out_chart = Chart(self._left_chart.tokens())
891
+ for edge in self._left_chart:
892
+ out_chart.insert(edge, [])
893
+ for edge in self._right_chart:
894
+ out_chart.insert(edge, [])
895
+
896
+ self._update("or", out_chart)
897
+
898
+ def _swapcharts(self):
899
+ left, right = self._left_name, self._right_name
900
+ self._left_selector.set(right)
901
+ self._right_selector.set(left)
902
+
903
+ def _checkcompat(self):
904
+ if (
905
+ self._left_chart.tokens() != self._right_chart.tokens()
906
+ or self._left_chart.property_names() != self._right_chart.property_names()
907
+ or self._left_chart == self._emptychart
908
+ or self._right_chart == self._emptychart
909
+ ):
910
+ # Clear & inactivate the output chart.
911
+ self._out_chart = self._emptychart
912
+ self._out_matrix.set_chart(self._out_chart)
913
+ self._out_matrix.inactivate()
914
+ self._out_label["text"] = "Output"
915
+ # Issue some other warning?
916
+ return False
917
+ else:
918
+ return True
919
+
920
+ def _update(self, operator, out_chart):
921
+ self._operator = operator
922
+ self._op_label["text"] = self._OPSYMBOL[operator]
923
+ self._out_chart = out_chart
924
+ self._out_matrix.set_chart(out_chart)
925
+ self._out_label["text"] = "{} {} {}".format(
926
+ self._left_name,
927
+ self._operator,
928
+ self._right_name,
929
+ )
930
+
931
+ def _clear_out_chart(self):
932
+ self._out_chart = self._emptychart
933
+ self._out_matrix.set_chart(self._out_chart)
934
+ self._op_label["text"] = " "
935
+ self._out_matrix.inactivate()
936
+
937
+ def _detach_out(self):
938
+ ChartMatrixView(self._root, self._out_chart, title=self._out_label["text"])
939
+
940
+
941
+ #######################################################################
942
+ # Chart View
943
+ #######################################################################
944
+
945
+
946
+ class ChartView:
947
+ """
948
+ A component for viewing charts. This is used by ``ChartParserApp`` to
949
+ allow students to interactively experiment with various chart
950
+ parsing techniques. It is also used by ``Chart.draw()``.
951
+
952
+ :ivar _chart: The chart that we are giving a view of. This chart
953
+ may be modified; after it is modified, you should call
954
+ ``update``.
955
+ :ivar _sentence: The list of tokens that the chart spans.
956
+
957
+ :ivar _root: The root window.
958
+ :ivar _chart_canvas: The canvas we're using to display the chart
959
+ itself.
960
+ :ivar _tree_canvas: The canvas we're using to display the tree
961
+ that each edge spans. May be None, if we're not displaying
962
+ trees.
963
+ :ivar _sentence_canvas: The canvas we're using to display the sentence
964
+ text. May be None, if we're not displaying the sentence text.
965
+ :ivar _edgetags: A dictionary mapping from edges to the tags of
966
+ the canvas elements (lines, etc) used to display that edge.
967
+ The values of this dictionary have the form
968
+ ``(linetag, rhstag1, dottag, rhstag2, lhstag)``.
969
+ :ivar _treetags: A list of all the tags that make up the tree;
970
+ used to erase the tree (without erasing the loclines).
971
+ :ivar _chart_height: The height of the chart canvas.
972
+ :ivar _sentence_height: The height of the sentence canvas.
973
+ :ivar _tree_height: The height of the tree
974
+
975
+ :ivar _text_height: The height of a text string (in the normal
976
+ font).
977
+
978
+ :ivar _edgelevels: A list of edges at each level of the chart (the
979
+ top level is the 0th element). This list is used to remember
980
+ where edges should be drawn; and to make sure that no edges
981
+ are overlapping on the chart view.
982
+
983
+ :ivar _unitsize: Pixel size of one unit (from the location). This
984
+ is determined by the span of the chart's location, and the
985
+ width of the chart display canvas.
986
+
987
+ :ivar _fontsize: The current font size
988
+
989
+ :ivar _marks: A dictionary from edges to marks. Marks are
990
+ strings, specifying colors (e.g. 'green').
991
+ """
992
+
993
+ _LEAF_SPACING = 10
994
+ _MARGIN = 10
995
+ _TREE_LEVEL_SIZE = 12
996
+ _CHART_LEVEL_SIZE = 40
997
+
998
+ def __init__(self, chart, root=None, **kw):
999
+ """
1000
+ Construct a new ``Chart`` display.
1001
+ """
1002
+ # Process keyword args.
1003
+ draw_tree = kw.get("draw_tree", 0)
1004
+ draw_sentence = kw.get("draw_sentence", 1)
1005
+ self._fontsize = kw.get("fontsize", -12)
1006
+
1007
+ # The chart!
1008
+ self._chart = chart
1009
+
1010
+ # Callback functions
1011
+ self._callbacks = {}
1012
+
1013
+ # Keep track of drawn edges
1014
+ self._edgelevels = []
1015
+ self._edgetags = {}
1016
+
1017
+ # Keep track of which edges are marked.
1018
+ self._marks = {}
1019
+
1020
+ # These are used to keep track of the set of tree tokens
1021
+ # currently displayed in the tree canvas.
1022
+ self._treetoks = []
1023
+ self._treetoks_edge = None
1024
+ self._treetoks_index = 0
1025
+
1026
+ # Keep track of the tags used to draw the tree
1027
+ self._tree_tags = []
1028
+
1029
+ # Put multiple edges on each level?
1030
+ self._compact = 0
1031
+
1032
+ # If they didn't provide a main window, then set one up.
1033
+ if root is None:
1034
+ top = Tk()
1035
+ top.title("Chart View")
1036
+
1037
+ def destroy1(e, top=top):
1038
+ top.destroy()
1039
+
1040
+ def destroy2(top=top):
1041
+ top.destroy()
1042
+
1043
+ top.bind("q", destroy1)
1044
+ b = Button(top, text="Done", command=destroy2)
1045
+ b.pack(side="bottom")
1046
+ self._root = top
1047
+ else:
1048
+ self._root = root
1049
+
1050
+ # Create some fonts.
1051
+ self._init_fonts(root)
1052
+
1053
+ # Create the chart canvas.
1054
+ (self._chart_sb, self._chart_canvas) = self._sb_canvas(self._root)
1055
+ self._chart_canvas["height"] = 300
1056
+ self._chart_canvas["closeenough"] = 15
1057
+
1058
+ # Create the sentence canvas.
1059
+ if draw_sentence:
1060
+ cframe = Frame(self._root, relief="sunk", border=2)
1061
+ cframe.pack(fill="both", side="bottom")
1062
+ self._sentence_canvas = Canvas(cframe, height=50)
1063
+ self._sentence_canvas["background"] = "#e0e0e0"
1064
+ self._sentence_canvas.pack(fill="both")
1065
+ # self._sentence_canvas['height'] = self._sentence_height
1066
+ else:
1067
+ self._sentence_canvas = None
1068
+
1069
+ # Create the tree canvas.
1070
+ if draw_tree:
1071
+ (sb, canvas) = self._sb_canvas(self._root, "n", "x")
1072
+ (self._tree_sb, self._tree_canvas) = (sb, canvas)
1073
+ self._tree_canvas["height"] = 200
1074
+ else:
1075
+ self._tree_canvas = None
1076
+
1077
+ # Do some analysis to figure out how big the window should be
1078
+ self._analyze()
1079
+ self.draw()
1080
+ self._resize()
1081
+ self._grow()
1082
+
1083
+ # Set up the configure callback, which will be called whenever
1084
+ # the window is resized.
1085
+ self._chart_canvas.bind("<Configure>", self._configure)
1086
+
1087
+ def _init_fonts(self, root):
1088
+ self._boldfont = Font(family="helvetica", weight="bold", size=self._fontsize)
1089
+ self._font = Font(family="helvetica", size=self._fontsize)
1090
+ # See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
1091
+ self._sysfont = Font(font=Button()["font"])
1092
+ root.option_add("*Font", self._sysfont)
1093
+
1094
+ def _sb_canvas(self, root, expand="y", fill="both", side="bottom"):
1095
+ """
1096
+ Helper for __init__: construct a canvas with a scrollbar.
1097
+ """
1098
+ cframe = Frame(root, relief="sunk", border=2)
1099
+ cframe.pack(fill=fill, expand=expand, side=side)
1100
+ canvas = Canvas(cframe, background="#e0e0e0")
1101
+
1102
+ # Give the canvas a scrollbar.
1103
+ sb = Scrollbar(cframe, orient="vertical")
1104
+ sb.pack(side="right", fill="y")
1105
+ canvas.pack(side="left", fill=fill, expand="yes")
1106
+
1107
+ # Connect the scrollbars to the canvas.
1108
+ sb["command"] = canvas.yview
1109
+ canvas["yscrollcommand"] = sb.set
1110
+
1111
+ return (sb, canvas)
1112
+
1113
+ def scroll_up(self, *e):
1114
+ self._chart_canvas.yview("scroll", -1, "units")
1115
+
1116
+ def scroll_down(self, *e):
1117
+ self._chart_canvas.yview("scroll", 1, "units")
1118
+
1119
+ def page_up(self, *e):
1120
+ self._chart_canvas.yview("scroll", -1, "pages")
1121
+
1122
+ def page_down(self, *e):
1123
+ self._chart_canvas.yview("scroll", 1, "pages")
1124
+
1125
+ def _grow(self):
1126
+ """
1127
+ Grow the window, if necessary
1128
+ """
1129
+ # Grow, if need-be
1130
+ N = self._chart.num_leaves()
1131
+ width = max(
1132
+ int(self._chart_canvas["width"]), N * self._unitsize + ChartView._MARGIN * 2
1133
+ )
1134
+
1135
+ # It won't resize without the second (height) line, but I
1136
+ # don't understand why not.
1137
+ self._chart_canvas.configure(width=width)
1138
+ self._chart_canvas.configure(height=self._chart_canvas["height"])
1139
+
1140
+ self._unitsize = (width - 2 * ChartView._MARGIN) / N
1141
+
1142
+ # Reset the height for the sentence window.
1143
+ if self._sentence_canvas is not None:
1144
+ self._sentence_canvas["height"] = self._sentence_height
1145
+
1146
+ def set_font_size(self, size):
1147
+ self._font.configure(size=-abs(size))
1148
+ self._boldfont.configure(size=-abs(size))
1149
+ self._sysfont.configure(size=-abs(size))
1150
+ self._analyze()
1151
+ self._grow()
1152
+ self.draw()
1153
+
1154
+ def get_font_size(self):
1155
+ return abs(self._fontsize)
1156
+
1157
+ def _configure(self, e):
1158
+ """
1159
+ The configure callback. This is called whenever the window is
1160
+ resized. It is also called when the window is first mapped.
1161
+ It figures out the unit size, and redraws the contents of each
1162
+ canvas.
1163
+ """
1164
+ N = self._chart.num_leaves()
1165
+ self._unitsize = (e.width - 2 * ChartView._MARGIN) / N
1166
+ self.draw()
1167
+
1168
+ def update(self, chart=None):
1169
+ """
1170
+ Draw any edges that have not been drawn. This is typically
1171
+ called when a after modifies the canvas that a CanvasView is
1172
+ displaying. ``update`` will cause any edges that have been
1173
+ added to the chart to be drawn.
1174
+
1175
+ If update is given a ``chart`` argument, then it will replace
1176
+ the current chart with the given chart.
1177
+ """
1178
+ if chart is not None:
1179
+ self._chart = chart
1180
+ self._edgelevels = []
1181
+ self._marks = {}
1182
+ self._analyze()
1183
+ self._grow()
1184
+ self.draw()
1185
+ self.erase_tree()
1186
+ self._resize()
1187
+ else:
1188
+ for edge in self._chart:
1189
+ if edge not in self._edgetags:
1190
+ self._add_edge(edge)
1191
+ self._resize()
1192
+
1193
+ def _edge_conflict(self, edge, lvl):
1194
+ """
1195
+ Return True if the given edge overlaps with any edge on the given
1196
+ level. This is used by _add_edge to figure out what level a
1197
+ new edge should be added to.
1198
+ """
1199
+ (s1, e1) = edge.span()
1200
+ for otheredge in self._edgelevels[lvl]:
1201
+ (s2, e2) = otheredge.span()
1202
+ if (s1 <= s2 < e1) or (s2 <= s1 < e2) or (s1 == s2 == e1 == e2):
1203
+ return True
1204
+ return False
1205
+
1206
+ def _analyze_edge(self, edge):
1207
+ """
1208
+ Given a new edge, recalculate:
1209
+
1210
+ - _text_height
1211
+ - _unitsize (if the edge text is too big for the current
1212
+ _unitsize, then increase _unitsize)
1213
+ """
1214
+ c = self._chart_canvas
1215
+
1216
+ if isinstance(edge, TreeEdge):
1217
+ lhs = edge.lhs()
1218
+ rhselts = []
1219
+ for elt in edge.rhs():
1220
+ if isinstance(elt, Nonterminal):
1221
+ rhselts.append(str(elt.symbol()))
1222
+ else:
1223
+ rhselts.append(repr(elt))
1224
+ rhs = " ".join(rhselts)
1225
+ else:
1226
+ lhs = edge.lhs()
1227
+ rhs = ""
1228
+
1229
+ for s in (lhs, rhs):
1230
+ tag = c.create_text(
1231
+ 0, 0, text=s, font=self._boldfont, anchor="nw", justify="left"
1232
+ )
1233
+ bbox = c.bbox(tag)
1234
+ c.delete(tag)
1235
+ width = bbox[2] # + ChartView._LEAF_SPACING
1236
+ edgelen = max(edge.length(), 1)
1237
+ self._unitsize = max(self._unitsize, width / edgelen)
1238
+ self._text_height = max(self._text_height, bbox[3] - bbox[1])
1239
+
1240
+ def _add_edge(self, edge, minlvl=0):
1241
+ """
1242
+ Add a single edge to the ChartView:
1243
+
1244
+ - Call analyze_edge to recalculate display parameters
1245
+ - Find an available level
1246
+ - Call _draw_edge
1247
+ """
1248
+ # Do NOT show leaf edges in the chart.
1249
+ if isinstance(edge, LeafEdge):
1250
+ return
1251
+
1252
+ if edge in self._edgetags:
1253
+ return
1254
+ self._analyze_edge(edge)
1255
+ self._grow()
1256
+
1257
+ if not self._compact:
1258
+ self._edgelevels.append([edge])
1259
+ lvl = len(self._edgelevels) - 1
1260
+ self._draw_edge(edge, lvl)
1261
+ self._resize()
1262
+ return
1263
+
1264
+ # Figure out what level to draw the edge on.
1265
+ lvl = 0
1266
+ while True:
1267
+ # If this level doesn't exist yet, create it.
1268
+ while lvl >= len(self._edgelevels):
1269
+ self._edgelevels.append([])
1270
+ self._resize()
1271
+
1272
+ # Check if we can fit the edge in this level.
1273
+ if lvl >= minlvl and not self._edge_conflict(edge, lvl):
1274
+ # Go ahead and draw it.
1275
+ self._edgelevels[lvl].append(edge)
1276
+ break
1277
+
1278
+ # Try the next level.
1279
+ lvl += 1
1280
+
1281
+ self._draw_edge(edge, lvl)
1282
+
1283
+ def view_edge(self, edge):
1284
+ level = None
1285
+ for i in range(len(self._edgelevels)):
1286
+ if edge in self._edgelevels[i]:
1287
+ level = i
1288
+ break
1289
+ if level is None:
1290
+ return
1291
+ # Try to view the new edge..
1292
+ y = (level + 1) * self._chart_level_size
1293
+ dy = self._text_height + 10
1294
+ self._chart_canvas.yview("moveto", 1.0)
1295
+ if self._chart_height != 0:
1296
+ self._chart_canvas.yview("moveto", (y - dy) / self._chart_height)
1297
+
1298
+ def _draw_edge(self, edge, lvl):
1299
+ """
1300
+ Draw a single edge on the ChartView.
1301
+ """
1302
+ c = self._chart_canvas
1303
+
1304
+ # Draw the arrow.
1305
+ x1 = edge.start() * self._unitsize + ChartView._MARGIN
1306
+ x2 = edge.end() * self._unitsize + ChartView._MARGIN
1307
+ if x2 == x1:
1308
+ x2 += max(4, self._unitsize / 5)
1309
+ y = (lvl + 1) * self._chart_level_size
1310
+ linetag = c.create_line(x1, y, x2, y, arrow="last", width=3)
1311
+
1312
+ # Draw a label for the edge.
1313
+ if isinstance(edge, TreeEdge):
1314
+ rhs = []
1315
+ for elt in edge.rhs():
1316
+ if isinstance(elt, Nonterminal):
1317
+ rhs.append(str(elt.symbol()))
1318
+ else:
1319
+ rhs.append(repr(elt))
1320
+ pos = edge.dot()
1321
+ else:
1322
+ rhs = []
1323
+ pos = 0
1324
+
1325
+ rhs1 = " ".join(rhs[:pos])
1326
+ rhs2 = " ".join(rhs[pos:])
1327
+ rhstag1 = c.create_text(x1 + 3, y, text=rhs1, font=self._font, anchor="nw")
1328
+ dotx = c.bbox(rhstag1)[2] + 6
1329
+ doty = (c.bbox(rhstag1)[1] + c.bbox(rhstag1)[3]) / 2
1330
+ dottag = c.create_oval(dotx - 2, doty - 2, dotx + 2, doty + 2)
1331
+ rhstag2 = c.create_text(dotx + 6, y, text=rhs2, font=self._font, anchor="nw")
1332
+ lhstag = c.create_text(
1333
+ (x1 + x2) / 2, y, text=str(edge.lhs()), anchor="s", font=self._boldfont
1334
+ )
1335
+
1336
+ # Keep track of the edge's tags.
1337
+ self._edgetags[edge] = (linetag, rhstag1, dottag, rhstag2, lhstag)
1338
+
1339
+ # Register a callback for clicking on the edge.
1340
+ def cb(event, self=self, edge=edge):
1341
+ self._fire_callbacks("select", edge)
1342
+
1343
+ c.tag_bind(rhstag1, "<Button-1>", cb)
1344
+ c.tag_bind(rhstag2, "<Button-1>", cb)
1345
+ c.tag_bind(linetag, "<Button-1>", cb)
1346
+ c.tag_bind(dottag, "<Button-1>", cb)
1347
+ c.tag_bind(lhstag, "<Button-1>", cb)
1348
+
1349
+ self._color_edge(edge)
1350
+
1351
+ def _color_edge(self, edge, linecolor=None, textcolor=None):
1352
+ """
1353
+ Color in an edge with the given colors.
1354
+ If no colors are specified, use intelligent defaults
1355
+ (dependent on selection, etc.)
1356
+ """
1357
+ if edge not in self._edgetags:
1358
+ return
1359
+ c = self._chart_canvas
1360
+
1361
+ if linecolor is not None and textcolor is not None:
1362
+ if edge in self._marks:
1363
+ linecolor = self._marks[edge]
1364
+ tags = self._edgetags[edge]
1365
+ c.itemconfig(tags[0], fill=linecolor)
1366
+ c.itemconfig(tags[1], fill=textcolor)
1367
+ c.itemconfig(tags[2], fill=textcolor, outline=textcolor)
1368
+ c.itemconfig(tags[3], fill=textcolor)
1369
+ c.itemconfig(tags[4], fill=textcolor)
1370
+ return
1371
+ else:
1372
+ N = self._chart.num_leaves()
1373
+ if edge in self._marks:
1374
+ self._color_edge(self._marks[edge])
1375
+ if edge.is_complete() and edge.span() == (0, N):
1376
+ self._color_edge(edge, "#084", "#042")
1377
+ elif isinstance(edge, LeafEdge):
1378
+ self._color_edge(edge, "#48c", "#246")
1379
+ else:
1380
+ self._color_edge(edge, "#00f", "#008")
1381
+
1382
+ def mark_edge(self, edge, mark="#0df"):
1383
+ """
1384
+ Mark an edge
1385
+ """
1386
+ self._marks[edge] = mark
1387
+ self._color_edge(edge)
1388
+
1389
+ def unmark_edge(self, edge=None):
1390
+ """
1391
+ Unmark an edge (or all edges)
1392
+ """
1393
+ if edge is None:
1394
+ old_marked_edges = list(self._marks.keys())
1395
+ self._marks = {}
1396
+ for edge in old_marked_edges:
1397
+ self._color_edge(edge)
1398
+ else:
1399
+ del self._marks[edge]
1400
+ self._color_edge(edge)
1401
+
1402
+ def markonly_edge(self, edge, mark="#0df"):
1403
+ self.unmark_edge()
1404
+ self.mark_edge(edge, mark)
1405
+
1406
+ def _analyze(self):
1407
+ """
1408
+ Analyze the sentence string, to figure out how big a unit needs
1409
+ to be, How big the tree should be, etc.
1410
+ """
1411
+ # Figure out the text height and the unit size.
1412
+ unitsize = 70 # min unitsize
1413
+ text_height = 0
1414
+ c = self._chart_canvas
1415
+
1416
+ # Check against all tokens
1417
+ for leaf in self._chart.leaves():
1418
+ tag = c.create_text(
1419
+ 0, 0, text=repr(leaf), font=self._font, anchor="nw", justify="left"
1420
+ )
1421
+ bbox = c.bbox(tag)
1422
+ c.delete(tag)
1423
+ width = bbox[2] + ChartView._LEAF_SPACING
1424
+ unitsize = max(width, unitsize)
1425
+ text_height = max(text_height, bbox[3] - bbox[1])
1426
+
1427
+ self._unitsize = unitsize
1428
+ self._text_height = text_height
1429
+ self._sentence_height = self._text_height + 2 * ChartView._MARGIN
1430
+
1431
+ # Check against edges.
1432
+ for edge in self._chart.edges():
1433
+ self._analyze_edge(edge)
1434
+
1435
+ # Size of chart levels
1436
+ self._chart_level_size = self._text_height * 2
1437
+
1438
+ # Default tree size..
1439
+ self._tree_height = 3 * (ChartView._TREE_LEVEL_SIZE + self._text_height)
1440
+
1441
+ # Resize the scrollregions.
1442
+ self._resize()
1443
+
1444
+ def _resize(self):
1445
+ """
1446
+ Update the scroll-regions for each canvas. This ensures that
1447
+ everything is within a scroll-region, so the user can use the
1448
+ scrollbars to view the entire display. This does *not*
1449
+ resize the window.
1450
+ """
1451
+ c = self._chart_canvas
1452
+
1453
+ # Reset the chart scroll region
1454
+ width = self._chart.num_leaves() * self._unitsize + ChartView._MARGIN * 2
1455
+
1456
+ levels = len(self._edgelevels)
1457
+ self._chart_height = (levels + 2) * self._chart_level_size
1458
+ c["scrollregion"] = (0, 0, width, self._chart_height)
1459
+
1460
+ # Reset the tree scroll region
1461
+ if self._tree_canvas:
1462
+ self._tree_canvas["scrollregion"] = (0, 0, width, self._tree_height)
1463
+
1464
+ def _draw_loclines(self):
1465
+ """
1466
+ Draw location lines. These are vertical gridlines used to
1467
+ show where each location unit is.
1468
+ """
1469
+ BOTTOM = 50000
1470
+ c1 = self._tree_canvas
1471
+ c2 = self._sentence_canvas
1472
+ c3 = self._chart_canvas
1473
+ margin = ChartView._MARGIN
1474
+ self._loclines = []
1475
+ for i in range(0, self._chart.num_leaves() + 1):
1476
+ x = i * self._unitsize + margin
1477
+
1478
+ if c1:
1479
+ t1 = c1.create_line(x, 0, x, BOTTOM)
1480
+ c1.tag_lower(t1)
1481
+ if c2:
1482
+ t2 = c2.create_line(x, 0, x, self._sentence_height)
1483
+ c2.tag_lower(t2)
1484
+ t3 = c3.create_line(x, 0, x, BOTTOM)
1485
+ c3.tag_lower(t3)
1486
+ t4 = c3.create_text(x + 2, 0, text=repr(i), anchor="nw", font=self._font)
1487
+ c3.tag_lower(t4)
1488
+ # if i % 4 == 0:
1489
+ # if c1: c1.itemconfig(t1, width=2, fill='gray60')
1490
+ # if c2: c2.itemconfig(t2, width=2, fill='gray60')
1491
+ # c3.itemconfig(t3, width=2, fill='gray60')
1492
+ if i % 2 == 0:
1493
+ if c1:
1494
+ c1.itemconfig(t1, fill="gray60")
1495
+ if c2:
1496
+ c2.itemconfig(t2, fill="gray60")
1497
+ c3.itemconfig(t3, fill="gray60")
1498
+ else:
1499
+ if c1:
1500
+ c1.itemconfig(t1, fill="gray80")
1501
+ if c2:
1502
+ c2.itemconfig(t2, fill="gray80")
1503
+ c3.itemconfig(t3, fill="gray80")
1504
+
1505
+ def _draw_sentence(self):
1506
+ """Draw the sentence string."""
1507
+ if self._chart.num_leaves() == 0:
1508
+ return
1509
+ c = self._sentence_canvas
1510
+ margin = ChartView._MARGIN
1511
+ y = ChartView._MARGIN
1512
+
1513
+ for i, leaf in enumerate(self._chart.leaves()):
1514
+ x1 = i * self._unitsize + margin
1515
+ x2 = x1 + self._unitsize
1516
+ x = (x1 + x2) / 2
1517
+ tag = c.create_text(
1518
+ x, y, text=repr(leaf), font=self._font, anchor="n", justify="left"
1519
+ )
1520
+ bbox = c.bbox(tag)
1521
+ rt = c.create_rectangle(
1522
+ x1 + 2,
1523
+ bbox[1] - (ChartView._LEAF_SPACING / 2),
1524
+ x2 - 2,
1525
+ bbox[3] + (ChartView._LEAF_SPACING / 2),
1526
+ fill="#f0f0f0",
1527
+ outline="#f0f0f0",
1528
+ )
1529
+ c.tag_lower(rt)
1530
+
1531
+ def erase_tree(self):
1532
+ for tag in self._tree_tags:
1533
+ self._tree_canvas.delete(tag)
1534
+ self._treetoks = []
1535
+ self._treetoks_edge = None
1536
+ self._treetoks_index = 0
1537
+
1538
+ def draw_tree(self, edge=None):
1539
+ if edge is None and self._treetoks_edge is None:
1540
+ return
1541
+ if edge is None:
1542
+ edge = self._treetoks_edge
1543
+
1544
+ # If it's a new edge, then get a new list of treetoks.
1545
+ if self._treetoks_edge != edge:
1546
+ self._treetoks = [t for t in self._chart.trees(edge) if isinstance(t, Tree)]
1547
+ self._treetoks_edge = edge
1548
+ self._treetoks_index = 0
1549
+
1550
+ # Make sure there's something to draw.
1551
+ if len(self._treetoks) == 0:
1552
+ return
1553
+
1554
+ # Erase the old tree.
1555
+ for tag in self._tree_tags:
1556
+ self._tree_canvas.delete(tag)
1557
+
1558
+ # Draw the new tree.
1559
+ tree = self._treetoks[self._treetoks_index]
1560
+ self._draw_treetok(tree, edge.start())
1561
+
1562
+ # Show how many trees are available for the edge.
1563
+ self._draw_treecycle()
1564
+
1565
+ # Update the scroll region.
1566
+ w = self._chart.num_leaves() * self._unitsize + 2 * ChartView._MARGIN
1567
+ h = tree.height() * (ChartView._TREE_LEVEL_SIZE + self._text_height)
1568
+ self._tree_canvas["scrollregion"] = (0, 0, w, h)
1569
+
1570
+ def cycle_tree(self):
1571
+ self._treetoks_index = (self._treetoks_index + 1) % len(self._treetoks)
1572
+ self.draw_tree(self._treetoks_edge)
1573
+
1574
+ def _draw_treecycle(self):
1575
+ if len(self._treetoks) <= 1:
1576
+ return
1577
+
1578
+ # Draw the label.
1579
+ label = "%d Trees" % len(self._treetoks)
1580
+ c = self._tree_canvas
1581
+ margin = ChartView._MARGIN
1582
+ right = self._chart.num_leaves() * self._unitsize + margin - 2
1583
+ tag = c.create_text(right, 2, anchor="ne", text=label, font=self._boldfont)
1584
+ self._tree_tags.append(tag)
1585
+ _, _, _, y = c.bbox(tag)
1586
+
1587
+ # Draw the triangles.
1588
+ for i in range(len(self._treetoks)):
1589
+ x = right - 20 * (len(self._treetoks) - i - 1)
1590
+ if i == self._treetoks_index:
1591
+ fill = "#084"
1592
+ else:
1593
+ fill = "#fff"
1594
+ tag = c.create_polygon(
1595
+ x, y + 10, x - 5, y, x - 10, y + 10, fill=fill, outline="black"
1596
+ )
1597
+ self._tree_tags.append(tag)
1598
+
1599
+ # Set up a callback: show the tree if they click on its
1600
+ # triangle.
1601
+ def cb(event, self=self, i=i):
1602
+ self._treetoks_index = i
1603
+ self.draw_tree()
1604
+
1605
+ c.tag_bind(tag, "<Button-1>", cb)
1606
+
1607
+ def _draw_treetok(self, treetok, index, depth=0):
1608
+ """
1609
+ :param index: The index of the first leaf in the tree.
1610
+ :return: The index of the first leaf after the tree.
1611
+ """
1612
+ c = self._tree_canvas
1613
+ margin = ChartView._MARGIN
1614
+
1615
+ # Draw the children
1616
+ child_xs = []
1617
+ for child in treetok:
1618
+ if isinstance(child, Tree):
1619
+ child_x, index = self._draw_treetok(child, index, depth + 1)
1620
+ child_xs.append(child_x)
1621
+ else:
1622
+ child_xs.append((2 * index + 1) * self._unitsize / 2 + margin)
1623
+ index += 1
1624
+
1625
+ # If we have children, then get the node's x by averaging their
1626
+ # node x's. Otherwise, make room for ourselves.
1627
+ if child_xs:
1628
+ nodex = sum(child_xs) / len(child_xs)
1629
+ else:
1630
+ # [XX] breaks for null productions.
1631
+ nodex = (2 * index + 1) * self._unitsize / 2 + margin
1632
+ index += 1
1633
+
1634
+ # Draw the node
1635
+ nodey = depth * (ChartView._TREE_LEVEL_SIZE + self._text_height)
1636
+ tag = c.create_text(
1637
+ nodex,
1638
+ nodey,
1639
+ anchor="n",
1640
+ justify="center",
1641
+ text=str(treetok.label()),
1642
+ fill="#042",
1643
+ font=self._boldfont,
1644
+ )
1645
+ self._tree_tags.append(tag)
1646
+
1647
+ # Draw lines to the children.
1648
+ childy = nodey + ChartView._TREE_LEVEL_SIZE + self._text_height
1649
+ for childx, child in zip(child_xs, treetok):
1650
+ if isinstance(child, Tree) and child:
1651
+ # A "real" tree token:
1652
+ tag = c.create_line(
1653
+ nodex,
1654
+ nodey + self._text_height,
1655
+ childx,
1656
+ childy,
1657
+ width=2,
1658
+ fill="#084",
1659
+ )
1660
+ self._tree_tags.append(tag)
1661
+ if isinstance(child, Tree) and not child:
1662
+ # An unexpanded tree token:
1663
+ tag = c.create_line(
1664
+ nodex,
1665
+ nodey + self._text_height,
1666
+ childx,
1667
+ childy,
1668
+ width=2,
1669
+ fill="#048",
1670
+ dash="2 3",
1671
+ )
1672
+ self._tree_tags.append(tag)
1673
+ if not isinstance(child, Tree):
1674
+ # A leaf:
1675
+ tag = c.create_line(
1676
+ nodex,
1677
+ nodey + self._text_height,
1678
+ childx,
1679
+ 10000,
1680
+ width=2,
1681
+ fill="#084",
1682
+ )
1683
+ self._tree_tags.append(tag)
1684
+
1685
+ return nodex, index
1686
+
1687
+ def draw(self):
1688
+ """
1689
+ Draw everything (from scratch).
1690
+ """
1691
+ if self._tree_canvas:
1692
+ self._tree_canvas.delete("all")
1693
+ self.draw_tree()
1694
+
1695
+ if self._sentence_canvas:
1696
+ self._sentence_canvas.delete("all")
1697
+ self._draw_sentence()
1698
+
1699
+ self._chart_canvas.delete("all")
1700
+ self._edgetags = {}
1701
+
1702
+ # Redraw any edges we erased.
1703
+ for lvl in range(len(self._edgelevels)):
1704
+ for edge in self._edgelevels[lvl]:
1705
+ self._draw_edge(edge, lvl)
1706
+
1707
+ for edge in self._chart:
1708
+ self._add_edge(edge)
1709
+
1710
+ self._draw_loclines()
1711
+
1712
+ def add_callback(self, event, func):
1713
+ self._callbacks.setdefault(event, {})[func] = 1
1714
+
1715
+ def remove_callback(self, event, func=None):
1716
+ if func is None:
1717
+ del self._callbacks[event]
1718
+ else:
1719
+ try:
1720
+ del self._callbacks[event][func]
1721
+ except:
1722
+ pass
1723
+
1724
+ def _fire_callbacks(self, event, *args):
1725
+ if event not in self._callbacks:
1726
+ return
1727
+ for cb_func in list(self._callbacks[event].keys()):
1728
+ cb_func(*args)
1729
+
1730
+
1731
+ #######################################################################
1732
+ # Edge Rules
1733
+ #######################################################################
1734
+ # These version of the chart rules only apply to a specific edge.
1735
+ # This lets the user select an edge, and then apply a rule.
1736
+
1737
+
1738
+ class EdgeRule:
1739
+ """
1740
+ To create an edge rule, make an empty base class that uses
1741
+ EdgeRule as the first base class, and the basic rule as the
1742
+ second base class. (Order matters!)
1743
+ """
1744
+
1745
+ def __init__(self, edge):
1746
+ super = self.__class__.__bases__[1]
1747
+ self._edge = edge
1748
+ self.NUM_EDGES = super.NUM_EDGES - 1
1749
+
1750
+ def apply(self, chart, grammar, *edges):
1751
+ super = self.__class__.__bases__[1]
1752
+ edges += (self._edge,)
1753
+ yield from super.apply(self, chart, grammar, *edges)
1754
+
1755
+ def __str__(self):
1756
+ super = self.__class__.__bases__[1]
1757
+ return super.__str__(self)
1758
+
1759
+
1760
+ class TopDownPredictEdgeRule(EdgeRule, TopDownPredictRule):
1761
+ pass
1762
+
1763
+
1764
+ class BottomUpEdgeRule(EdgeRule, BottomUpPredictRule):
1765
+ pass
1766
+
1767
+
1768
+ class BottomUpLeftCornerEdgeRule(EdgeRule, BottomUpPredictCombineRule):
1769
+ pass
1770
+
1771
+
1772
+ class FundamentalEdgeRule(EdgeRule, SingleEdgeFundamentalRule):
1773
+ pass
1774
+
1775
+
1776
+ #######################################################################
1777
+ # Chart Parser Application
1778
+ #######################################################################
1779
+
1780
+
1781
+ class ChartParserApp:
1782
+ def __init__(self, grammar, tokens, title="Chart Parser Application"):
1783
+ # Initialize the parser
1784
+ self._init_parser(grammar, tokens)
1785
+
1786
+ self._root = None
1787
+ try:
1788
+ # Create the root window.
1789
+ self._root = Tk()
1790
+ self._root.title(title)
1791
+ self._root.bind("<Control-q>", self.destroy)
1792
+
1793
+ # Set up some frames.
1794
+ frame3 = Frame(self._root)
1795
+ frame2 = Frame(self._root)
1796
+ frame1 = Frame(self._root)
1797
+ frame3.pack(side="bottom", fill="none")
1798
+ frame2.pack(side="bottom", fill="x")
1799
+ frame1.pack(side="bottom", fill="both", expand=1)
1800
+
1801
+ self._init_fonts(self._root)
1802
+ self._init_animation()
1803
+ self._init_chartview(frame1)
1804
+ self._init_rulelabel(frame2)
1805
+ self._init_buttons(frame3)
1806
+ self._init_menubar()
1807
+
1808
+ self._matrix = None
1809
+ self._results = None
1810
+
1811
+ # Set up keyboard bindings.
1812
+ self._init_bindings()
1813
+
1814
+ except:
1815
+ print("Error creating Tree View")
1816
+ self.destroy()
1817
+ raise
1818
+
1819
+ def destroy(self, *args):
1820
+ if self._root is None:
1821
+ return
1822
+ self._root.destroy()
1823
+ self._root = None
1824
+
1825
+ def mainloop(self, *args, **kwargs):
1826
+ """
1827
+ Enter the Tkinter mainloop. This function must be called if
1828
+ this demo is created from a non-interactive program (e.g.
1829
+ from a secript); otherwise, the demo will close as soon as
1830
+ the script completes.
1831
+ """
1832
+ if in_idle():
1833
+ return
1834
+ self._root.mainloop(*args, **kwargs)
1835
+
1836
+ # ////////////////////////////////////////////////////////////
1837
+ # Initialization Helpers
1838
+ # ////////////////////////////////////////////////////////////
1839
+
1840
+ def _init_parser(self, grammar, tokens):
1841
+ self._grammar = grammar
1842
+ self._tokens = tokens
1843
+ self._reset_parser()
1844
+
1845
+ def _reset_parser(self):
1846
+ self._cp = SteppingChartParser(self._grammar)
1847
+ self._cp.initialize(self._tokens)
1848
+ self._chart = self._cp.chart()
1849
+
1850
+ # Insert LeafEdges before the parsing starts.
1851
+ for _new_edge in LeafInitRule().apply(self._chart, self._grammar):
1852
+ pass
1853
+
1854
+ # The step iterator -- use this to generate new edges
1855
+ self._cpstep = self._cp.step()
1856
+
1857
+ # The currently selected edge
1858
+ self._selection = None
1859
+
1860
+ def _init_fonts(self, root):
1861
+ # See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
1862
+ self._sysfont = Font(font=Button()["font"])
1863
+ root.option_add("*Font", self._sysfont)
1864
+
1865
+ # TWhat's our font size (default=same as sysfont)
1866
+ self._size = IntVar(root)
1867
+ self._size.set(self._sysfont.cget("size"))
1868
+
1869
+ self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
1870
+ self._font = Font(family="helvetica", size=self._size.get())
1871
+
1872
+ def _init_animation(self):
1873
+ # Are we stepping? (default=yes)
1874
+ self._step = IntVar(self._root)
1875
+ self._step.set(1)
1876
+
1877
+ # What's our animation speed (default=fast)
1878
+ self._animate = IntVar(self._root)
1879
+ self._animate.set(3) # Default speed = fast
1880
+
1881
+ # Are we currently animating?
1882
+ self._animating = 0
1883
+
1884
+ def _init_chartview(self, parent):
1885
+ self._cv = ChartView(self._chart, parent, draw_tree=1, draw_sentence=1)
1886
+ self._cv.add_callback("select", self._click_cv_edge)
1887
+
1888
+ def _init_rulelabel(self, parent):
1889
+ ruletxt = "Last edge generated by:"
1890
+
1891
+ self._rulelabel1 = Label(parent, text=ruletxt, font=self._boldfont)
1892
+ self._rulelabel2 = Label(
1893
+ parent, width=40, relief="groove", anchor="w", font=self._boldfont
1894
+ )
1895
+ self._rulelabel1.pack(side="left")
1896
+ self._rulelabel2.pack(side="left")
1897
+ step = Checkbutton(parent, variable=self._step, text="Step")
1898
+ step.pack(side="right")
1899
+
1900
+ def _init_buttons(self, parent):
1901
+ frame1 = Frame(parent)
1902
+ frame2 = Frame(parent)
1903
+ frame1.pack(side="bottom", fill="x")
1904
+ frame2.pack(side="top", fill="none")
1905
+
1906
+ Button(
1907
+ frame1,
1908
+ text="Reset\nParser",
1909
+ background="#90c0d0",
1910
+ foreground="black",
1911
+ command=self.reset,
1912
+ ).pack(side="right")
1913
+ # Button(frame1, text='Pause',
1914
+ # background='#90c0d0', foreground='black',
1915
+ # command=self.pause).pack(side='left')
1916
+
1917
+ Button(
1918
+ frame1,
1919
+ text="Top Down\nStrategy",
1920
+ background="#90c0d0",
1921
+ foreground="black",
1922
+ command=self.top_down_strategy,
1923
+ ).pack(side="left")
1924
+ Button(
1925
+ frame1,
1926
+ text="Bottom Up\nStrategy",
1927
+ background="#90c0d0",
1928
+ foreground="black",
1929
+ command=self.bottom_up_strategy,
1930
+ ).pack(side="left")
1931
+ Button(
1932
+ frame1,
1933
+ text="Bottom Up\nLeft-Corner Strategy",
1934
+ background="#90c0d0",
1935
+ foreground="black",
1936
+ command=self.bottom_up_leftcorner_strategy,
1937
+ ).pack(side="left")
1938
+
1939
+ Button(
1940
+ frame2,
1941
+ text="Top Down Init\nRule",
1942
+ background="#90f090",
1943
+ foreground="black",
1944
+ command=self.top_down_init,
1945
+ ).pack(side="left")
1946
+ Button(
1947
+ frame2,
1948
+ text="Top Down Predict\nRule",
1949
+ background="#90f090",
1950
+ foreground="black",
1951
+ command=self.top_down_predict,
1952
+ ).pack(side="left")
1953
+ Frame(frame2, width=20).pack(side="left")
1954
+
1955
+ Button(
1956
+ frame2,
1957
+ text="Bottom Up Predict\nRule",
1958
+ background="#90f090",
1959
+ foreground="black",
1960
+ command=self.bottom_up,
1961
+ ).pack(side="left")
1962
+ Frame(frame2, width=20).pack(side="left")
1963
+
1964
+ Button(
1965
+ frame2,
1966
+ text="Bottom Up Left-Corner\nPredict Rule",
1967
+ background="#90f090",
1968
+ foreground="black",
1969
+ command=self.bottom_up_leftcorner,
1970
+ ).pack(side="left")
1971
+ Frame(frame2, width=20).pack(side="left")
1972
+
1973
+ Button(
1974
+ frame2,
1975
+ text="Fundamental\nRule",
1976
+ background="#90f090",
1977
+ foreground="black",
1978
+ command=self.fundamental,
1979
+ ).pack(side="left")
1980
+
1981
+ def _init_bindings(self):
1982
+ self._root.bind("<Up>", self._cv.scroll_up)
1983
+ self._root.bind("<Down>", self._cv.scroll_down)
1984
+ self._root.bind("<Prior>", self._cv.page_up)
1985
+ self._root.bind("<Next>", self._cv.page_down)
1986
+ self._root.bind("<Control-q>", self.destroy)
1987
+ self._root.bind("<Control-x>", self.destroy)
1988
+ self._root.bind("<F1>", self.help)
1989
+
1990
+ self._root.bind("<Control-s>", self.save_chart)
1991
+ self._root.bind("<Control-o>", self.load_chart)
1992
+ self._root.bind("<Control-r>", self.reset)
1993
+
1994
+ self._root.bind("t", self.top_down_strategy)
1995
+ self._root.bind("b", self.bottom_up_strategy)
1996
+ self._root.bind("c", self.bottom_up_leftcorner_strategy)
1997
+ self._root.bind("<space>", self._stop_animation)
1998
+
1999
+ self._root.bind("<Control-g>", self.edit_grammar)
2000
+ self._root.bind("<Control-t>", self.edit_sentence)
2001
+
2002
+ # Animation speed control
2003
+ self._root.bind("-", lambda e, a=self._animate: a.set(1))
2004
+ self._root.bind("=", lambda e, a=self._animate: a.set(2))
2005
+ self._root.bind("+", lambda e, a=self._animate: a.set(3))
2006
+
2007
+ # Step control
2008
+ self._root.bind("s", lambda e, s=self._step: s.set(not s.get()))
2009
+
2010
+ def _init_menubar(self):
2011
+ menubar = Menu(self._root)
2012
+
2013
+ filemenu = Menu(menubar, tearoff=0)
2014
+ filemenu.add_command(
2015
+ label="Save Chart",
2016
+ underline=0,
2017
+ command=self.save_chart,
2018
+ accelerator="Ctrl-s",
2019
+ )
2020
+ filemenu.add_command(
2021
+ label="Load Chart",
2022
+ underline=0,
2023
+ command=self.load_chart,
2024
+ accelerator="Ctrl-o",
2025
+ )
2026
+ filemenu.add_command(
2027
+ label="Reset Chart", underline=0, command=self.reset, accelerator="Ctrl-r"
2028
+ )
2029
+ filemenu.add_separator()
2030
+ filemenu.add_command(label="Save Grammar", command=self.save_grammar)
2031
+ filemenu.add_command(label="Load Grammar", command=self.load_grammar)
2032
+ filemenu.add_separator()
2033
+ filemenu.add_command(
2034
+ label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
2035
+ )
2036
+ menubar.add_cascade(label="File", underline=0, menu=filemenu)
2037
+
2038
+ editmenu = Menu(menubar, tearoff=0)
2039
+ editmenu.add_command(
2040
+ label="Edit Grammar",
2041
+ underline=5,
2042
+ command=self.edit_grammar,
2043
+ accelerator="Ctrl-g",
2044
+ )
2045
+ editmenu.add_command(
2046
+ label="Edit Text",
2047
+ underline=5,
2048
+ command=self.edit_sentence,
2049
+ accelerator="Ctrl-t",
2050
+ )
2051
+ menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
2052
+
2053
+ viewmenu = Menu(menubar, tearoff=0)
2054
+ viewmenu.add_command(
2055
+ label="Chart Matrix", underline=6, command=self.view_matrix
2056
+ )
2057
+ viewmenu.add_command(label="Results", underline=0, command=self.view_results)
2058
+ menubar.add_cascade(label="View", underline=0, menu=viewmenu)
2059
+
2060
+ rulemenu = Menu(menubar, tearoff=0)
2061
+ rulemenu.add_command(
2062
+ label="Top Down Strategy",
2063
+ underline=0,
2064
+ command=self.top_down_strategy,
2065
+ accelerator="t",
2066
+ )
2067
+ rulemenu.add_command(
2068
+ label="Bottom Up Strategy",
2069
+ underline=0,
2070
+ command=self.bottom_up_strategy,
2071
+ accelerator="b",
2072
+ )
2073
+ rulemenu.add_command(
2074
+ label="Bottom Up Left-Corner Strategy",
2075
+ underline=0,
2076
+ command=self.bottom_up_leftcorner_strategy,
2077
+ accelerator="c",
2078
+ )
2079
+ rulemenu.add_separator()
2080
+ rulemenu.add_command(label="Bottom Up Rule", command=self.bottom_up)
2081
+ rulemenu.add_command(
2082
+ label="Bottom Up Left-Corner Rule", command=self.bottom_up_leftcorner
2083
+ )
2084
+ rulemenu.add_command(label="Top Down Init Rule", command=self.top_down_init)
2085
+ rulemenu.add_command(
2086
+ label="Top Down Predict Rule", command=self.top_down_predict
2087
+ )
2088
+ rulemenu.add_command(label="Fundamental Rule", command=self.fundamental)
2089
+ menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
2090
+
2091
+ animatemenu = Menu(menubar, tearoff=0)
2092
+ animatemenu.add_checkbutton(
2093
+ label="Step", underline=0, variable=self._step, accelerator="s"
2094
+ )
2095
+ animatemenu.add_separator()
2096
+ animatemenu.add_radiobutton(
2097
+ label="No Animation", underline=0, variable=self._animate, value=0
2098
+ )
2099
+ animatemenu.add_radiobutton(
2100
+ label="Slow Animation",
2101
+ underline=0,
2102
+ variable=self._animate,
2103
+ value=1,
2104
+ accelerator="-",
2105
+ )
2106
+ animatemenu.add_radiobutton(
2107
+ label="Normal Animation",
2108
+ underline=0,
2109
+ variable=self._animate,
2110
+ value=2,
2111
+ accelerator="=",
2112
+ )
2113
+ animatemenu.add_radiobutton(
2114
+ label="Fast Animation",
2115
+ underline=0,
2116
+ variable=self._animate,
2117
+ value=3,
2118
+ accelerator="+",
2119
+ )
2120
+ menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
2121
+
2122
+ zoommenu = Menu(menubar, tearoff=0)
2123
+ zoommenu.add_radiobutton(
2124
+ label="Tiny",
2125
+ variable=self._size,
2126
+ underline=0,
2127
+ value=10,
2128
+ command=self.resize,
2129
+ )
2130
+ zoommenu.add_radiobutton(
2131
+ label="Small",
2132
+ variable=self._size,
2133
+ underline=0,
2134
+ value=12,
2135
+ command=self.resize,
2136
+ )
2137
+ zoommenu.add_radiobutton(
2138
+ label="Medium",
2139
+ variable=self._size,
2140
+ underline=0,
2141
+ value=14,
2142
+ command=self.resize,
2143
+ )
2144
+ zoommenu.add_radiobutton(
2145
+ label="Large",
2146
+ variable=self._size,
2147
+ underline=0,
2148
+ value=18,
2149
+ command=self.resize,
2150
+ )
2151
+ zoommenu.add_radiobutton(
2152
+ label="Huge",
2153
+ variable=self._size,
2154
+ underline=0,
2155
+ value=24,
2156
+ command=self.resize,
2157
+ )
2158
+ menubar.add_cascade(label="Zoom", underline=0, menu=zoommenu)
2159
+
2160
+ helpmenu = Menu(menubar, tearoff=0)
2161
+ helpmenu.add_command(label="About", underline=0, command=self.about)
2162
+ helpmenu.add_command(
2163
+ label="Instructions", underline=0, command=self.help, accelerator="F1"
2164
+ )
2165
+ menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
2166
+
2167
+ self._root.config(menu=menubar)
2168
+
2169
+ # ////////////////////////////////////////////////////////////
2170
+ # Selection Handling
2171
+ # ////////////////////////////////////////////////////////////
2172
+
2173
+ def _click_cv_edge(self, edge):
2174
+ if edge != self._selection:
2175
+ # Clicking on a new edge selects it.
2176
+ self._select_edge(edge)
2177
+ else:
2178
+ # Repeated clicks on one edge cycle its trees.
2179
+ self._cv.cycle_tree()
2180
+ # [XX] this can get confused if animation is running
2181
+ # faster than the callbacks...
2182
+
2183
+ def _select_matrix_edge(self, edge):
2184
+ self._select_edge(edge)
2185
+ self._cv.view_edge(edge)
2186
+
2187
+ def _select_edge(self, edge):
2188
+ self._selection = edge
2189
+ # Update the chart view.
2190
+ self._cv.markonly_edge(edge, "#f00")
2191
+ self._cv.draw_tree(edge)
2192
+ # Update the matrix view.
2193
+ if self._matrix:
2194
+ self._matrix.markonly_edge(edge)
2195
+ if self._matrix:
2196
+ self._matrix.view_edge(edge)
2197
+
2198
+ def _deselect_edge(self):
2199
+ self._selection = None
2200
+ # Update the chart view.
2201
+ self._cv.unmark_edge()
2202
+ self._cv.erase_tree()
2203
+ # Update the matrix view
2204
+ if self._matrix:
2205
+ self._matrix.unmark_edge()
2206
+
2207
+ def _show_new_edge(self, edge):
2208
+ self._display_rule(self._cp.current_chartrule())
2209
+ # Update the chart view.
2210
+ self._cv.update()
2211
+ self._cv.draw_tree(edge)
2212
+ self._cv.markonly_edge(edge, "#0df")
2213
+ self._cv.view_edge(edge)
2214
+ # Update the matrix view.
2215
+ if self._matrix:
2216
+ self._matrix.update()
2217
+ if self._matrix:
2218
+ self._matrix.markonly_edge(edge)
2219
+ if self._matrix:
2220
+ self._matrix.view_edge(edge)
2221
+ # Update the results view.
2222
+ if self._results:
2223
+ self._results.update(edge)
2224
+
2225
+ # ////////////////////////////////////////////////////////////
2226
+ # Help/usage
2227
+ # ////////////////////////////////////////////////////////////
2228
+
2229
+ def help(self, *e):
2230
+ self._animating = 0
2231
+ # The default font's not very legible; try using 'fixed' instead.
2232
+ try:
2233
+ ShowText(
2234
+ self._root,
2235
+ "Help: Chart Parser Application",
2236
+ (__doc__ or "").strip(),
2237
+ width=75,
2238
+ font="fixed",
2239
+ )
2240
+ except:
2241
+ ShowText(
2242
+ self._root,
2243
+ "Help: Chart Parser Application",
2244
+ (__doc__ or "").strip(),
2245
+ width=75,
2246
+ )
2247
+
2248
+ def about(self, *e):
2249
+ ABOUT = "NLTK Chart Parser Application\n" + "Written by Edward Loper"
2250
+ showinfo("About: Chart Parser Application", ABOUT)
2251
+
2252
+ # ////////////////////////////////////////////////////////////
2253
+ # File Menu
2254
+ # ////////////////////////////////////////////////////////////
2255
+
2256
+ CHART_FILE_TYPES = [("Pickle file", ".pickle"), ("All files", "*")]
2257
+ GRAMMAR_FILE_TYPES = [
2258
+ ("Plaintext grammar file", ".cfg"),
2259
+ ("Pickle file", ".pickle"),
2260
+ ("All files", "*"),
2261
+ ]
2262
+
2263
+ def load_chart(self, *args):
2264
+ "Load a chart from a pickle file"
2265
+ filename = askopenfilename(
2266
+ filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
2267
+ )
2268
+ if not filename:
2269
+ return
2270
+ try:
2271
+ with open(filename, "rb") as infile:
2272
+ chart = pickle.load(infile)
2273
+ self._chart = chart
2274
+ self._cv.update(chart)
2275
+ if self._matrix:
2276
+ self._matrix.set_chart(chart)
2277
+ if self._matrix:
2278
+ self._matrix.deselect_cell()
2279
+ if self._results:
2280
+ self._results.set_chart(chart)
2281
+ self._cp.set_chart(chart)
2282
+ except Exception as e:
2283
+ raise
2284
+ showerror("Error Loading Chart", "Unable to open file: %r" % filename)
2285
+
2286
+ def save_chart(self, *args):
2287
+ "Save a chart to a pickle file"
2288
+ filename = asksaveasfilename(
2289
+ filetypes=self.CHART_FILE_TYPES, defaultextension=".pickle"
2290
+ )
2291
+ if not filename:
2292
+ return
2293
+ try:
2294
+ with open(filename, "wb") as outfile:
2295
+ pickle.dump(self._chart, outfile)
2296
+ except Exception as e:
2297
+ raise
2298
+ showerror("Error Saving Chart", "Unable to open file: %r" % filename)
2299
+
2300
+ def load_grammar(self, *args):
2301
+ "Load a grammar from a pickle file"
2302
+ filename = askopenfilename(
2303
+ filetypes=self.GRAMMAR_FILE_TYPES, defaultextension=".cfg"
2304
+ )
2305
+ if not filename:
2306
+ return
2307
+ try:
2308
+ if filename.endswith(".pickle"):
2309
+ with open(filename, "rb") as infile:
2310
+ grammar = pickle.load(infile)
2311
+ else:
2312
+ with open(filename) as infile:
2313
+ grammar = CFG.fromstring(infile.read())
2314
+ self.set_grammar(grammar)
2315
+ except Exception as e:
2316
+ showerror("Error Loading Grammar", "Unable to open file: %r" % filename)
2317
+
2318
+ def save_grammar(self, *args):
2319
+ filename = asksaveasfilename(
2320
+ filetypes=self.GRAMMAR_FILE_TYPES, defaultextension=".cfg"
2321
+ )
2322
+ if not filename:
2323
+ return
2324
+ try:
2325
+ if filename.endswith(".pickle"):
2326
+ with open(filename, "wb") as outfile:
2327
+ pickle.dump((self._chart, self._tokens), outfile)
2328
+ else:
2329
+ with open(filename, "w") as outfile:
2330
+ prods = self._grammar.productions()
2331
+ start = [p for p in prods if p.lhs() == self._grammar.start()]
2332
+ rest = [p for p in prods if p.lhs() != self._grammar.start()]
2333
+ for prod in start:
2334
+ outfile.write("%s\n" % prod)
2335
+ for prod in rest:
2336
+ outfile.write("%s\n" % prod)
2337
+ except Exception as e:
2338
+ showerror("Error Saving Grammar", "Unable to open file: %r" % filename)
2339
+
2340
+ def reset(self, *args):
2341
+ self._animating = 0
2342
+ self._reset_parser()
2343
+ self._cv.update(self._chart)
2344
+ if self._matrix:
2345
+ self._matrix.set_chart(self._chart)
2346
+ if self._matrix:
2347
+ self._matrix.deselect_cell()
2348
+ if self._results:
2349
+ self._results.set_chart(self._chart)
2350
+
2351
+ # ////////////////////////////////////////////////////////////
2352
+ # Edit
2353
+ # ////////////////////////////////////////////////////////////
2354
+
2355
+ def edit_grammar(self, *e):
2356
+ CFGEditor(self._root, self._grammar, self.set_grammar)
2357
+
2358
+ def set_grammar(self, grammar):
2359
+ self._grammar = grammar
2360
+ self._cp.set_grammar(grammar)
2361
+ if self._results:
2362
+ self._results.set_grammar(grammar)
2363
+
2364
+ def edit_sentence(self, *e):
2365
+ sentence = " ".join(self._tokens)
2366
+ title = "Edit Text"
2367
+ instr = "Enter a new sentence to parse."
2368
+ EntryDialog(self._root, sentence, instr, self.set_sentence, title)
2369
+
2370
+ def set_sentence(self, sentence):
2371
+ self._tokens = list(sentence.split())
2372
+ self.reset()
2373
+
2374
+ # ////////////////////////////////////////////////////////////
2375
+ # View Menu
2376
+ # ////////////////////////////////////////////////////////////
2377
+
2378
+ def view_matrix(self, *e):
2379
+ if self._matrix is not None:
2380
+ self._matrix.destroy()
2381
+ self._matrix = ChartMatrixView(self._root, self._chart)
2382
+ self._matrix.add_callback("select", self._select_matrix_edge)
2383
+
2384
+ def view_results(self, *e):
2385
+ if self._results is not None:
2386
+ self._results.destroy()
2387
+ self._results = ChartResultsView(self._root, self._chart, self._grammar)
2388
+
2389
+ # ////////////////////////////////////////////////////////////
2390
+ # Zoom Menu
2391
+ # ////////////////////////////////////////////////////////////
2392
+
2393
+ def resize(self):
2394
+ self._animating = 0
2395
+ self.set_font_size(self._size.get())
2396
+
2397
+ def set_font_size(self, size):
2398
+ self._cv.set_font_size(size)
2399
+ self._font.configure(size=-abs(size))
2400
+ self._boldfont.configure(size=-abs(size))
2401
+ self._sysfont.configure(size=-abs(size))
2402
+
2403
+ def get_font_size(self):
2404
+ return abs(self._size.get())
2405
+
2406
+ # ////////////////////////////////////////////////////////////
2407
+ # Parsing
2408
+ # ////////////////////////////////////////////////////////////
2409
+
2410
+ def apply_strategy(self, strategy, edge_strategy=None):
2411
+ # If we're animating, then stop.
2412
+ if self._animating:
2413
+ self._animating = 0
2414
+ return
2415
+
2416
+ # Clear the rule display & mark.
2417
+ self._display_rule(None)
2418
+ # self._cv.unmark_edge()
2419
+
2420
+ if self._step.get():
2421
+ selection = self._selection
2422
+ if (selection is not None) and (edge_strategy is not None):
2423
+ # Apply the given strategy to the selected edge.
2424
+ self._cp.set_strategy([edge_strategy(selection)])
2425
+ newedge = self._apply_strategy()
2426
+
2427
+ # If it failed, then clear the selection.
2428
+ if newedge is None:
2429
+ self._cv.unmark_edge()
2430
+ self._selection = None
2431
+ else:
2432
+ self._cp.set_strategy(strategy)
2433
+ self._apply_strategy()
2434
+
2435
+ else:
2436
+ self._cp.set_strategy(strategy)
2437
+ if self._animate.get():
2438
+ self._animating = 1
2439
+ self._animate_strategy()
2440
+ else:
2441
+ for edge in self._cpstep:
2442
+ if edge is None:
2443
+ break
2444
+ self._cv.update()
2445
+ if self._matrix:
2446
+ self._matrix.update()
2447
+ if self._results:
2448
+ self._results.update()
2449
+
2450
+ def _stop_animation(self, *e):
2451
+ self._animating = 0
2452
+
2453
+ def _animate_strategy(self, speed=1):
2454
+ if self._animating == 0:
2455
+ return
2456
+ if self._apply_strategy() is not None:
2457
+ if self._animate.get() == 0 or self._step.get() == 1:
2458
+ return
2459
+ if self._animate.get() == 1:
2460
+ self._root.after(3000, self._animate_strategy)
2461
+ elif self._animate.get() == 2:
2462
+ self._root.after(1000, self._animate_strategy)
2463
+ else:
2464
+ self._root.after(20, self._animate_strategy)
2465
+
2466
+ def _apply_strategy(self):
2467
+ new_edge = next(self._cpstep)
2468
+
2469
+ if new_edge is not None:
2470
+ self._show_new_edge(new_edge)
2471
+ return new_edge
2472
+
2473
+ def _display_rule(self, rule):
2474
+ if rule is None:
2475
+ self._rulelabel2["text"] = ""
2476
+ else:
2477
+ name = str(rule)
2478
+ self._rulelabel2["text"] = name
2479
+ size = self._cv.get_font_size()
2480
+
2481
+ # ////////////////////////////////////////////////////////////
2482
+ # Parsing Strategies
2483
+ # ////////////////////////////////////////////////////////////
2484
+
2485
+ # Basic rules:
2486
+ _TD_INIT = [TopDownInitRule()]
2487
+ _TD_PREDICT = [TopDownPredictRule()]
2488
+ _BU_RULE = [BottomUpPredictRule()]
2489
+ _BU_LC_RULE = [BottomUpPredictCombineRule()]
2490
+ _FUNDAMENTAL = [SingleEdgeFundamentalRule()]
2491
+
2492
+ # Complete strategies:
2493
+ _TD_STRATEGY = _TD_INIT + _TD_PREDICT + _FUNDAMENTAL
2494
+ _BU_STRATEGY = _BU_RULE + _FUNDAMENTAL
2495
+ _BU_LC_STRATEGY = _BU_LC_RULE + _FUNDAMENTAL
2496
+
2497
+ # Button callback functions:
2498
+ def top_down_init(self, *e):
2499
+ self.apply_strategy(self._TD_INIT, None)
2500
+
2501
+ def top_down_predict(self, *e):
2502
+ self.apply_strategy(self._TD_PREDICT, TopDownPredictEdgeRule)
2503
+
2504
+ def bottom_up(self, *e):
2505
+ self.apply_strategy(self._BU_RULE, BottomUpEdgeRule)
2506
+
2507
+ def bottom_up_leftcorner(self, *e):
2508
+ self.apply_strategy(self._BU_LC_RULE, BottomUpLeftCornerEdgeRule)
2509
+
2510
+ def fundamental(self, *e):
2511
+ self.apply_strategy(self._FUNDAMENTAL, FundamentalEdgeRule)
2512
+
2513
+ def bottom_up_strategy(self, *e):
2514
+ self.apply_strategy(self._BU_STRATEGY, BottomUpEdgeRule)
2515
+
2516
+ def bottom_up_leftcorner_strategy(self, *e):
2517
+ self.apply_strategy(self._BU_LC_STRATEGY, BottomUpLeftCornerEdgeRule)
2518
+
2519
+ def top_down_strategy(self, *e):
2520
+ self.apply_strategy(self._TD_STRATEGY, TopDownPredictEdgeRule)
2521
+
2522
+
2523
+ def app():
2524
+ grammar = CFG.fromstring(
2525
+ """
2526
+ # Grammatical productions.
2527
+ S -> NP VP
2528
+ VP -> VP PP | V NP | V
2529
+ NP -> Det N | NP PP
2530
+ PP -> P NP
2531
+ # Lexical productions.
2532
+ NP -> 'John' | 'I'
2533
+ Det -> 'the' | 'my' | 'a'
2534
+ N -> 'dog' | 'cookie' | 'table' | 'cake' | 'fork'
2535
+ V -> 'ate' | 'saw'
2536
+ P -> 'on' | 'under' | 'with'
2537
+ """
2538
+ )
2539
+
2540
+ sent = "John ate the cake on the table with a fork"
2541
+ sent = "John ate the cake on the table"
2542
+ tokens = list(sent.split())
2543
+
2544
+ print("grammar= (")
2545
+ for rule in grammar.productions():
2546
+ print((" ", repr(rule) + ","))
2547
+ print(")")
2548
+ print("tokens = %r" % tokens)
2549
+ print('Calling "ChartParserApp(grammar, tokens)"...')
2550
+ ChartParserApp(grammar, tokens).mainloop()
2551
+
2552
+
2553
+ if __name__ == "__main__":
2554
+ app()
2555
+
2556
+ # Chart comparer:
2557
+ # charts = ['/tmp/earley.pickle',
2558
+ # '/tmp/topdown.pickle',
2559
+ # '/tmp/bottomup.pickle']
2560
+ # ChartComparer(*charts).mainloop()
2561
+
2562
+ # import profile
2563
+ # profile.run('demo2()', '/tmp/profile.out')
2564
+ # import pstats
2565
+ # p = pstats.Stats('/tmp/profile.out')
2566
+ # p.strip_dirs().sort_stats('time', 'cum').print_stats(60)
2567
+ # p.strip_dirs().sort_stats('cum', 'time').print_stats(60)
2568
+
2569
+ __all__ = ["app"]
pipeline/nltk/app/chunkparser_app.py ADDED
@@ -0,0 +1,1500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Regexp Chunk Parser Application
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Edward Loper <[email protected]>
5
+ # URL: <https://www.nltk.org/>
6
+ # For license information, see LICENSE.TXT
7
+
8
+ """
9
+ A graphical tool for exploring the regular expression based chunk
10
+ parser ``nltk.chunk.RegexpChunkParser``.
11
+ """
12
+
13
+ # Todo: Add a way to select the development set from the menubar. This
14
+ # might just need to be a selection box (conll vs treebank etc) plus
15
+ # configuration parameters to select what's being chunked (eg VP vs NP)
16
+ # and what part of the data is being used as the development set.
17
+
18
+ import random
19
+ import re
20
+ import textwrap
21
+ import time
22
+ from tkinter import (
23
+ Button,
24
+ Canvas,
25
+ Checkbutton,
26
+ Frame,
27
+ IntVar,
28
+ Label,
29
+ Menu,
30
+ Scrollbar,
31
+ Text,
32
+ Tk,
33
+ )
34
+ from tkinter.filedialog import askopenfilename, asksaveasfilename
35
+ from tkinter.font import Font
36
+
37
+ from nltk.chunk import ChunkScore, RegexpChunkParser
38
+ from nltk.chunk.regexp import RegexpChunkRule
39
+ from nltk.corpus import conll2000, treebank_chunk
40
+ from nltk.draw.util import ShowText
41
+ from nltk.tree import Tree
42
+ from nltk.util import in_idle
43
+
44
+
45
+ class RegexpChunkApp:
46
+ """
47
+ A graphical tool for exploring the regular expression based chunk
48
+ parser ``nltk.chunk.RegexpChunkParser``.
49
+
50
+ See ``HELP`` for instructional text.
51
+ """
52
+
53
+ ##/////////////////////////////////////////////////////////////////
54
+ ## Help Text
55
+ ##/////////////////////////////////////////////////////////////////
56
+
57
+ #: A dictionary mapping from part of speech tags to descriptions,
58
+ #: which is used in the help text. (This should probably live with
59
+ #: the conll and/or treebank corpus instead.)
60
+ TAGSET = {
61
+ "CC": "Coordinating conjunction",
62
+ "PRP$": "Possessive pronoun",
63
+ "CD": "Cardinal number",
64
+ "RB": "Adverb",
65
+ "DT": "Determiner",
66
+ "RBR": "Adverb, comparative",
67
+ "EX": "Existential there",
68
+ "RBS": "Adverb, superlative",
69
+ "FW": "Foreign word",
70
+ "RP": "Particle",
71
+ "JJ": "Adjective",
72
+ "TO": "to",
73
+ "JJR": "Adjective, comparative",
74
+ "UH": "Interjection",
75
+ "JJS": "Adjective, superlative",
76
+ "VB": "Verb, base form",
77
+ "LS": "List item marker",
78
+ "VBD": "Verb, past tense",
79
+ "MD": "Modal",
80
+ "NNS": "Noun, plural",
81
+ "NN": "Noun, singular or masps",
82
+ "VBN": "Verb, past participle",
83
+ "VBZ": "Verb,3rd ps. sing. present",
84
+ "NNP": "Proper noun, singular",
85
+ "NNPS": "Proper noun plural",
86
+ "WDT": "wh-determiner",
87
+ "PDT": "Predeterminer",
88
+ "WP": "wh-pronoun",
89
+ "POS": "Possessive ending",
90
+ "WP$": "Possessive wh-pronoun",
91
+ "PRP": "Personal pronoun",
92
+ "WRB": "wh-adverb",
93
+ "(": "open parenthesis",
94
+ ")": "close parenthesis",
95
+ "``": "open quote",
96
+ ",": "comma",
97
+ "''": "close quote",
98
+ ".": "period",
99
+ "#": "pound sign (currency marker)",
100
+ "$": "dollar sign (currency marker)",
101
+ "IN": "Preposition/subord. conjunction",
102
+ "SYM": "Symbol (mathematical or scientific)",
103
+ "VBG": "Verb, gerund/present participle",
104
+ "VBP": "Verb, non-3rd ps. sing. present",
105
+ ":": "colon",
106
+ }
107
+
108
+ #: Contents for the help box. This is a list of tuples, one for
109
+ #: each help page, where each tuple has four elements:
110
+ #: - A title (displayed as a tab)
111
+ #: - A string description of tabstops (see Tkinter.Text for details)
112
+ #: - The text contents for the help page. You can use expressions
113
+ #: like <red>...</red> to colorize the text; see ``HELP_AUTOTAG``
114
+ #: for a list of tags you can use for colorizing.
115
+ HELP = [
116
+ (
117
+ "Help",
118
+ "20",
119
+ "Welcome to the regular expression chunk-parser grammar editor. "
120
+ "You can use this editor to develop and test chunk parser grammars "
121
+ "based on NLTK's RegexpChunkParser class.\n\n"
122
+ # Help box.
123
+ "Use this box ('Help') to learn more about the editor; click on the "
124
+ "tabs for help on specific topics:"
125
+ "<indent>\n"
126
+ "Rules: grammar rule types\n"
127
+ "Regexps: regular expression syntax\n"
128
+ "Tags: part of speech tags\n</indent>\n"
129
+ # Grammar.
130
+ "Use the upper-left box ('Grammar') to edit your grammar. "
131
+ "Each line of your grammar specifies a single 'rule', "
132
+ "which performs an action such as creating a chunk or merging "
133
+ "two chunks.\n\n"
134
+ # Dev set.
135
+ "The lower-left box ('Development Set') runs your grammar on the "
136
+ "development set, and displays the results. "
137
+ "Your grammar's chunks are <highlight>highlighted</highlight>, and "
138
+ "the correct (gold standard) chunks are "
139
+ "<underline>underlined</underline>. If they "
140
+ "match, they are displayed in <green>green</green>; otherwise, "
141
+ "they are displayed in <red>red</red>. The box displays a single "
142
+ "sentence from the development set at a time; use the scrollbar or "
143
+ "the next/previous buttons view additional sentences.\n\n"
144
+ # Performance
145
+ "The lower-right box ('Evaluation') tracks the performance of "
146
+ "your grammar on the development set. The 'precision' axis "
147
+ "indicates how many of your grammar's chunks are correct; and "
148
+ "the 'recall' axis indicates how many of the gold standard "
149
+ "chunks your system generated. Typically, you should try to "
150
+ "design a grammar that scores high on both metrics. The "
151
+ "exact precision and recall of the current grammar, as well "
152
+ "as their harmonic mean (the 'f-score'), are displayed in "
153
+ "the status bar at the bottom of the window.",
154
+ ),
155
+ (
156
+ "Rules",
157
+ "10",
158
+ "<h1>{...regexp...}</h1>"
159
+ "<indent>\nChunk rule: creates new chunks from words matching "
160
+ "regexp.</indent>\n\n"
161
+ "<h1>}...regexp...{</h1>"
162
+ "<indent>\nStrip rule: removes words matching regexp from existing "
163
+ "chunks.</indent>\n\n"
164
+ "<h1>...regexp1...}{...regexp2...</h1>"
165
+ "<indent>\nSplit rule: splits chunks that match regexp1 followed by "
166
+ "regexp2 in two.</indent>\n\n"
167
+ "<h1>...regexp...{}...regexp...</h1>"
168
+ "<indent>\nMerge rule: joins consecutive chunks that match regexp1 "
169
+ "and regexp2</indent>\n",
170
+ ),
171
+ (
172
+ "Regexps",
173
+ "10 60",
174
+ # "Regular Expression Syntax Summary:\n\n"
175
+ "<h1>Pattern\t\tMatches...</h1>\n"
176
+ "<hangindent>"
177
+ "\t<<var>T</var>>\ta word with tag <var>T</var> "
178
+ "(where <var>T</var> may be a regexp).\n"
179
+ "\t<var>x</var>?\tan optional <var>x</var>\n"
180
+ "\t<var>x</var>+\ta sequence of 1 or more <var>x</var>'s\n"
181
+ "\t<var>x</var>*\ta sequence of 0 or more <var>x</var>'s\n"
182
+ "\t<var>x</var>|<var>y</var>\t<var>x</var> or <var>y</var>\n"
183
+ "\t.\tmatches any character\n"
184
+ "\t(<var>x</var>)\tTreats <var>x</var> as a group\n"
185
+ "\t# <var>x...</var>\tTreats <var>x...</var> "
186
+ "(to the end of the line) as a comment\n"
187
+ "\t\\<var>C</var>\tmatches character <var>C</var> "
188
+ "(useful when <var>C</var> is a special character "
189
+ "like + or #)\n"
190
+ "</hangindent>"
191
+ "\n<h1>Examples:</h1>\n"
192
+ "<hangindent>"
193
+ "\t<regexp><NN></regexp>\n"
194
+ '\t\tMatches <match>"cow/NN"</match>\n'
195
+ '\t\tMatches <match>"green/NN"</match>\n'
196
+ "\t<regexp><VB.*></regexp>\n"
197
+ '\t\tMatches <match>"eating/VBG"</match>\n'
198
+ '\t\tMatches <match>"ate/VBD"</match>\n'
199
+ "\t<regexp><IN><DT><NN></regexp>\n"
200
+ '\t\tMatches <match>"on/IN the/DT car/NN"</match>\n'
201
+ "\t<regexp><RB>?<VBD></regexp>\n"
202
+ '\t\tMatches <match>"ran/VBD"</match>\n'
203
+ '\t\tMatches <match>"slowly/RB ate/VBD"</match>\n'
204
+ r"\t<regexp><\#><CD> # This is a comment...</regexp>\n"
205
+ '\t\tMatches <match>"#/# 100/CD"</match>\n'
206
+ "</hangindent>",
207
+ ),
208
+ (
209
+ "Tags",
210
+ "10 60",
211
+ "<h1>Part of Speech Tags:</h1>\n"
212
+ + "<hangindent>"
213
+ + "<<TAGSET>>"
214
+ + "</hangindent>\n", # this gets auto-substituted w/ self.TAGSET
215
+ ),
216
+ ]
217
+
218
+ HELP_AUTOTAG = [
219
+ ("red", dict(foreground="#a00")),
220
+ ("green", dict(foreground="#080")),
221
+ ("highlight", dict(background="#ddd")),
222
+ ("underline", dict(underline=True)),
223
+ ("h1", dict(underline=True)),
224
+ ("indent", dict(lmargin1=20, lmargin2=20)),
225
+ ("hangindent", dict(lmargin1=0, lmargin2=60)),
226
+ ("var", dict(foreground="#88f")),
227
+ ("regexp", dict(foreground="#ba7")),
228
+ ("match", dict(foreground="#6a6")),
229
+ ]
230
+
231
+ ##/////////////////////////////////////////////////////////////////
232
+ ## Config Parameters
233
+ ##/////////////////////////////////////////////////////////////////
234
+
235
+ _EVAL_DELAY = 1
236
+ """If the user has not pressed any key for this amount of time (in
237
+ seconds), and the current grammar has not been evaluated, then
238
+ the eval demon will evaluate it."""
239
+
240
+ _EVAL_CHUNK = 15
241
+ """The number of sentences that should be evaluated by the eval
242
+ demon each time it runs."""
243
+ _EVAL_FREQ = 0.2
244
+ """The frequency (in seconds) at which the eval demon is run"""
245
+ _EVAL_DEMON_MIN = 0.02
246
+ """The minimum amount of time that the eval demon should take each time
247
+ it runs -- if it takes less than this time, _EVAL_CHUNK will be
248
+ modified upwards."""
249
+ _EVAL_DEMON_MAX = 0.04
250
+ """The maximum amount of time that the eval demon should take each time
251
+ it runs -- if it takes more than this time, _EVAL_CHUNK will be
252
+ modified downwards."""
253
+
254
+ _GRAMMARBOX_PARAMS = dict(
255
+ width=40,
256
+ height=12,
257
+ background="#efe",
258
+ highlightbackground="#efe",
259
+ highlightthickness=1,
260
+ relief="groove",
261
+ border=2,
262
+ wrap="word",
263
+ )
264
+ _HELPBOX_PARAMS = dict(
265
+ width=15,
266
+ height=15,
267
+ background="#efe",
268
+ highlightbackground="#efe",
269
+ foreground="#555",
270
+ highlightthickness=1,
271
+ relief="groove",
272
+ border=2,
273
+ wrap="word",
274
+ )
275
+ _DEVSETBOX_PARAMS = dict(
276
+ width=70,
277
+ height=10,
278
+ background="#eef",
279
+ highlightbackground="#eef",
280
+ highlightthickness=1,
281
+ relief="groove",
282
+ border=2,
283
+ wrap="word",
284
+ tabs=(30,),
285
+ )
286
+ _STATUS_PARAMS = dict(background="#9bb", relief="groove", border=2)
287
+ _FONT_PARAMS = dict(family="helvetica", size=-20)
288
+ _FRAME_PARAMS = dict(background="#777", padx=2, pady=2, border=3)
289
+ _EVALBOX_PARAMS = dict(
290
+ background="#eef",
291
+ highlightbackground="#eef",
292
+ highlightthickness=1,
293
+ relief="groove",
294
+ border=2,
295
+ width=300,
296
+ height=280,
297
+ )
298
+ _BUTTON_PARAMS = dict(
299
+ background="#777", activebackground="#777", highlightbackground="#777"
300
+ )
301
+ _HELPTAB_BG_COLOR = "#aba"
302
+ _HELPTAB_FG_COLOR = "#efe"
303
+
304
+ _HELPTAB_FG_PARAMS = dict(background="#efe")
305
+ _HELPTAB_BG_PARAMS = dict(background="#aba")
306
+ _HELPTAB_SPACER = 6
307
+
308
+ def normalize_grammar(self, grammar):
309
+ # Strip comments
310
+ grammar = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", grammar)
311
+ # Normalize whitespace
312
+ grammar = re.sub(" +", " ", grammar)
313
+ grammar = re.sub(r"\n\s+", r"\n", grammar)
314
+ grammar = grammar.strip()
315
+ # [xx] Hack: automatically backslash $!
316
+ grammar = re.sub(r"([^\\])\$", r"\1\\$", grammar)
317
+ return grammar
318
+
319
+ def __init__(
320
+ self,
321
+ devset_name="conll2000",
322
+ devset=None,
323
+ grammar="",
324
+ chunk_label="NP",
325
+ tagset=None,
326
+ ):
327
+ """
328
+ :param devset_name: The name of the development set; used for
329
+ display & for save files. If either the name 'treebank'
330
+ or the name 'conll2000' is used, and devset is None, then
331
+ devset will be set automatically.
332
+ :param devset: A list of chunked sentences
333
+ :param grammar: The initial grammar to display.
334
+ :param tagset: Dictionary from tags to string descriptions, used
335
+ for the help page. Defaults to ``self.TAGSET``.
336
+ """
337
+ self._chunk_label = chunk_label
338
+
339
+ if tagset is None:
340
+ tagset = self.TAGSET
341
+ self.tagset = tagset
342
+
343
+ # Named development sets:
344
+ if devset is None:
345
+ if devset_name == "conll2000":
346
+ devset = conll2000.chunked_sents("train.txt") # [:100]
347
+ elif devset == "treebank":
348
+ devset = treebank_chunk.chunked_sents() # [:100]
349
+ else:
350
+ raise ValueError("Unknown development set %s" % devset_name)
351
+
352
+ self.chunker = None
353
+ """The chunker built from the grammar string"""
354
+
355
+ self.grammar = grammar
356
+ """The unparsed grammar string"""
357
+
358
+ self.normalized_grammar = None
359
+ """A normalized version of ``self.grammar``."""
360
+
361
+ self.grammar_changed = 0
362
+ """The last time() that the grammar was changed."""
363
+
364
+ self.devset = devset
365
+ """The development set -- a list of chunked sentences."""
366
+
367
+ self.devset_name = devset_name
368
+ """The name of the development set (for save files)."""
369
+
370
+ self.devset_index = -1
371
+ """The index into the development set of the first instance
372
+ that's currently being viewed."""
373
+
374
+ self._last_keypress = 0
375
+ """The time() when a key was most recently pressed"""
376
+
377
+ self._history = []
378
+ """A list of (grammar, precision, recall, fscore) tuples for
379
+ grammars that the user has already tried."""
380
+
381
+ self._history_index = 0
382
+ """When the user is scrolling through previous grammars, this
383
+ is used to keep track of which grammar they're looking at."""
384
+
385
+ self._eval_grammar = None
386
+ """The grammar that is being currently evaluated by the eval
387
+ demon."""
388
+
389
+ self._eval_normalized_grammar = None
390
+ """A normalized copy of ``_eval_grammar``."""
391
+
392
+ self._eval_index = 0
393
+ """The index of the next sentence in the development set that
394
+ should be looked at by the eval demon."""
395
+
396
+ self._eval_score = ChunkScore(chunk_label=chunk_label)
397
+ """The ``ChunkScore`` object that's used to keep track of the score
398
+ of the current grammar on the development set."""
399
+
400
+ # Set up the main window.
401
+ top = self.top = Tk()
402
+ top.geometry("+50+50")
403
+ top.title("Regexp Chunk Parser App")
404
+ top.bind("<Control-q>", self.destroy)
405
+
406
+ # Variable that restricts how much of the devset we look at.
407
+ self._devset_size = IntVar(top)
408
+ self._devset_size.set(100)
409
+
410
+ # Set up all the tkinter widgets
411
+ self._init_fonts(top)
412
+ self._init_widgets(top)
413
+ self._init_bindings(top)
414
+ self._init_menubar(top)
415
+ self.grammarbox.focus()
416
+
417
+ # If a grammar was given, then display it.
418
+ if grammar:
419
+ self.grammarbox.insert("end", grammar + "\n")
420
+ self.grammarbox.mark_set("insert", "1.0")
421
+
422
+ # Display the first item in the development set
423
+ self.show_devset(0)
424
+ self.update()
425
+
426
+ def _init_bindings(self, top):
427
+ top.bind("<Control-n>", self._devset_next)
428
+ top.bind("<Control-p>", self._devset_prev)
429
+ top.bind("<Control-t>", self.toggle_show_trace)
430
+ top.bind("<KeyPress>", self.update)
431
+ top.bind("<Control-s>", lambda e: self.save_grammar())
432
+ top.bind("<Control-o>", lambda e: self.load_grammar())
433
+ self.grammarbox.bind("<Control-t>", self.toggle_show_trace)
434
+ self.grammarbox.bind("<Control-n>", self._devset_next)
435
+ self.grammarbox.bind("<Control-p>", self._devset_prev)
436
+
437
+ # Redraw the eval graph when the window size changes
438
+ self.evalbox.bind("<Configure>", self._eval_plot)
439
+
440
+ def _init_fonts(self, top):
441
+ # TWhat's our font size (default=same as sysfont)
442
+ self._size = IntVar(top)
443
+ self._size.set(20)
444
+ self._font = Font(family="helvetica", size=-self._size.get())
445
+ self._smallfont = Font(
446
+ family="helvetica", size=-(int(self._size.get() * 14 // 20))
447
+ )
448
+
449
+ def _init_menubar(self, parent):
450
+ menubar = Menu(parent)
451
+
452
+ filemenu = Menu(menubar, tearoff=0)
453
+ filemenu.add_command(label="Reset Application", underline=0, command=self.reset)
454
+ filemenu.add_command(
455
+ label="Save Current Grammar",
456
+ underline=0,
457
+ accelerator="Ctrl-s",
458
+ command=self.save_grammar,
459
+ )
460
+ filemenu.add_command(
461
+ label="Load Grammar",
462
+ underline=0,
463
+ accelerator="Ctrl-o",
464
+ command=self.load_grammar,
465
+ )
466
+
467
+ filemenu.add_command(
468
+ label="Save Grammar History", underline=13, command=self.save_history
469
+ )
470
+
471
+ filemenu.add_command(
472
+ label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
473
+ )
474
+ menubar.add_cascade(label="File", underline=0, menu=filemenu)
475
+
476
+ viewmenu = Menu(menubar, tearoff=0)
477
+ viewmenu.add_radiobutton(
478
+ label="Tiny",
479
+ variable=self._size,
480
+ underline=0,
481
+ value=10,
482
+ command=self.resize,
483
+ )
484
+ viewmenu.add_radiobutton(
485
+ label="Small",
486
+ variable=self._size,
487
+ underline=0,
488
+ value=16,
489
+ command=self.resize,
490
+ )
491
+ viewmenu.add_radiobutton(
492
+ label="Medium",
493
+ variable=self._size,
494
+ underline=0,
495
+ value=20,
496
+ command=self.resize,
497
+ )
498
+ viewmenu.add_radiobutton(
499
+ label="Large",
500
+ variable=self._size,
501
+ underline=0,
502
+ value=24,
503
+ command=self.resize,
504
+ )
505
+ viewmenu.add_radiobutton(
506
+ label="Huge",
507
+ variable=self._size,
508
+ underline=0,
509
+ value=34,
510
+ command=self.resize,
511
+ )
512
+ menubar.add_cascade(label="View", underline=0, menu=viewmenu)
513
+
514
+ devsetmenu = Menu(menubar, tearoff=0)
515
+ devsetmenu.add_radiobutton(
516
+ label="50 sentences",
517
+ variable=self._devset_size,
518
+ value=50,
519
+ command=self.set_devset_size,
520
+ )
521
+ devsetmenu.add_radiobutton(
522
+ label="100 sentences",
523
+ variable=self._devset_size,
524
+ value=100,
525
+ command=self.set_devset_size,
526
+ )
527
+ devsetmenu.add_radiobutton(
528
+ label="200 sentences",
529
+ variable=self._devset_size,
530
+ value=200,
531
+ command=self.set_devset_size,
532
+ )
533
+ devsetmenu.add_radiobutton(
534
+ label="500 sentences",
535
+ variable=self._devset_size,
536
+ value=500,
537
+ command=self.set_devset_size,
538
+ )
539
+ menubar.add_cascade(label="Development-Set", underline=0, menu=devsetmenu)
540
+
541
+ helpmenu = Menu(menubar, tearoff=0)
542
+ helpmenu.add_command(label="About", underline=0, command=self.about)
543
+ menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
544
+
545
+ parent.config(menu=menubar)
546
+
547
+ def toggle_show_trace(self, *e):
548
+ if self._showing_trace:
549
+ self.show_devset()
550
+ else:
551
+ self.show_trace()
552
+ return "break"
553
+
554
+ _SCALE_N = 5 # center on the last 5 examples.
555
+ _DRAW_LINES = False
556
+
557
+ def _eval_plot(self, *e, **config):
558
+ width = config.get("width", self.evalbox.winfo_width())
559
+ height = config.get("height", self.evalbox.winfo_height())
560
+
561
+ # Clear the canvas
562
+ self.evalbox.delete("all")
563
+
564
+ # Draw the precision & recall labels.
565
+ tag = self.evalbox.create_text(
566
+ 10, height // 2 - 10, justify="left", anchor="w", text="Precision"
567
+ )
568
+ left, right = self.evalbox.bbox(tag)[2] + 5, width - 10
569
+ tag = self.evalbox.create_text(
570
+ left + (width - left) // 2,
571
+ height - 10,
572
+ anchor="s",
573
+ text="Recall",
574
+ justify="center",
575
+ )
576
+ top, bot = 10, self.evalbox.bbox(tag)[1] - 10
577
+
578
+ # Draw masks for clipping the plot.
579
+ bg = self._EVALBOX_PARAMS["background"]
580
+ self.evalbox.lower(
581
+ self.evalbox.create_rectangle(0, 0, left - 1, 5000, fill=bg, outline=bg)
582
+ )
583
+ self.evalbox.lower(
584
+ self.evalbox.create_rectangle(0, bot + 1, 5000, 5000, fill=bg, outline=bg)
585
+ )
586
+
587
+ # Calculate the plot's scale.
588
+ if self._autoscale.get() and len(self._history) > 1:
589
+ max_precision = max_recall = 0
590
+ min_precision = min_recall = 1
591
+ for i in range(1, min(len(self._history), self._SCALE_N + 1)):
592
+ grammar, precision, recall, fmeasure = self._history[-i]
593
+ min_precision = min(precision, min_precision)
594
+ min_recall = min(recall, min_recall)
595
+ max_precision = max(precision, max_precision)
596
+ max_recall = max(recall, max_recall)
597
+ # if max_precision-min_precision > max_recall-min_recall:
598
+ # min_recall -= (max_precision-min_precision)/2
599
+ # max_recall += (max_precision-min_precision)/2
600
+ # else:
601
+ # min_precision -= (max_recall-min_recall)/2
602
+ # max_precision += (max_recall-min_recall)/2
603
+ # if min_recall < 0:
604
+ # max_recall -= min_recall
605
+ # min_recall = 0
606
+ # if min_precision < 0:
607
+ # max_precision -= min_precision
608
+ # min_precision = 0
609
+ min_precision = max(min_precision - 0.01, 0)
610
+ min_recall = max(min_recall - 0.01, 0)
611
+ max_precision = min(max_precision + 0.01, 1)
612
+ max_recall = min(max_recall + 0.01, 1)
613
+ else:
614
+ min_precision = min_recall = 0
615
+ max_precision = max_recall = 1
616
+
617
+ # Draw the axis lines & grid lines
618
+ for i in range(11):
619
+ x = left + (right - left) * (
620
+ (i / 10.0 - min_recall) / (max_recall - min_recall)
621
+ )
622
+ y = bot - (bot - top) * (
623
+ (i / 10.0 - min_precision) / (max_precision - min_precision)
624
+ )
625
+ if left < x < right:
626
+ self.evalbox.create_line(x, top, x, bot, fill="#888")
627
+ if top < y < bot:
628
+ self.evalbox.create_line(left, y, right, y, fill="#888")
629
+ self.evalbox.create_line(left, top, left, bot)
630
+ self.evalbox.create_line(left, bot, right, bot)
631
+
632
+ # Display the plot's scale
633
+ self.evalbox.create_text(
634
+ left - 3,
635
+ bot,
636
+ justify="right",
637
+ anchor="se",
638
+ text="%d%%" % (100 * min_precision),
639
+ )
640
+ self.evalbox.create_text(
641
+ left - 3,
642
+ top,
643
+ justify="right",
644
+ anchor="ne",
645
+ text="%d%%" % (100 * max_precision),
646
+ )
647
+ self.evalbox.create_text(
648
+ left,
649
+ bot + 3,
650
+ justify="center",
651
+ anchor="nw",
652
+ text="%d%%" % (100 * min_recall),
653
+ )
654
+ self.evalbox.create_text(
655
+ right,
656
+ bot + 3,
657
+ justify="center",
658
+ anchor="ne",
659
+ text="%d%%" % (100 * max_recall),
660
+ )
661
+
662
+ # Display the scores.
663
+ prev_x = prev_y = None
664
+ for i, (_, precision, recall, fscore) in enumerate(self._history):
665
+ x = left + (right - left) * (
666
+ (recall - min_recall) / (max_recall - min_recall)
667
+ )
668
+ y = bot - (bot - top) * (
669
+ (precision - min_precision) / (max_precision - min_precision)
670
+ )
671
+ if i == self._history_index:
672
+ self.evalbox.create_oval(
673
+ x - 2, y - 2, x + 2, y + 2, fill="#0f0", outline="#000"
674
+ )
675
+ self.status["text"] = (
676
+ "Precision: %.2f%%\t" % (precision * 100)
677
+ + "Recall: %.2f%%\t" % (recall * 100)
678
+ + "F-score: %.2f%%" % (fscore * 100)
679
+ )
680
+ else:
681
+ self.evalbox.lower(
682
+ self.evalbox.create_oval(
683
+ x - 2, y - 2, x + 2, y + 2, fill="#afa", outline="#8c8"
684
+ )
685
+ )
686
+ if prev_x is not None and self._eval_lines.get():
687
+ self.evalbox.lower(
688
+ self.evalbox.create_line(prev_x, prev_y, x, y, fill="#8c8")
689
+ )
690
+ prev_x, prev_y = x, y
691
+
692
+ _eval_demon_running = False
693
+
694
+ def _eval_demon(self):
695
+ if self.top is None:
696
+ return
697
+ if self.chunker is None:
698
+ self._eval_demon_running = False
699
+ return
700
+
701
+ # Note our starting time.
702
+ t0 = time.time()
703
+
704
+ # If are still typing, then wait for them to finish.
705
+ if (
706
+ time.time() - self._last_keypress < self._EVAL_DELAY
707
+ and self.normalized_grammar != self._eval_normalized_grammar
708
+ ):
709
+ self._eval_demon_running = True
710
+ return self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)
711
+
712
+ # If the grammar changed, restart the evaluation.
713
+ if self.normalized_grammar != self._eval_normalized_grammar:
714
+ # Check if we've seen this grammar already. If so, then
715
+ # just use the old evaluation values.
716
+ for (g, p, r, f) in self._history:
717
+ if self.normalized_grammar == self.normalize_grammar(g):
718
+ self._history.append((g, p, r, f))
719
+ self._history_index = len(self._history) - 1
720
+ self._eval_plot()
721
+ self._eval_demon_running = False
722
+ self._eval_normalized_grammar = None
723
+ return
724
+ self._eval_index = 0
725
+ self._eval_score = ChunkScore(chunk_label=self._chunk_label)
726
+ self._eval_grammar = self.grammar
727
+ self._eval_normalized_grammar = self.normalized_grammar
728
+
729
+ # If the grammar is empty, the don't bother evaluating it, or
730
+ # recording it in history -- the score will just be 0.
731
+ if self.normalized_grammar.strip() == "":
732
+ # self._eval_index = self._devset_size.get()
733
+ self._eval_demon_running = False
734
+ return
735
+
736
+ # Score the next set of examples
737
+ for gold in self.devset[
738
+ self._eval_index : min(
739
+ self._eval_index + self._EVAL_CHUNK, self._devset_size.get()
740
+ )
741
+ ]:
742
+ guess = self._chunkparse(gold.leaves())
743
+ self._eval_score.score(gold, guess)
744
+
745
+ # update our index in the devset.
746
+ self._eval_index += self._EVAL_CHUNK
747
+
748
+ # Check if we're done
749
+ if self._eval_index >= self._devset_size.get():
750
+ self._history.append(
751
+ (
752
+ self._eval_grammar,
753
+ self._eval_score.precision(),
754
+ self._eval_score.recall(),
755
+ self._eval_score.f_measure(),
756
+ )
757
+ )
758
+ self._history_index = len(self._history) - 1
759
+ self._eval_plot()
760
+ self._eval_demon_running = False
761
+ self._eval_normalized_grammar = None
762
+ else:
763
+ progress = 100 * self._eval_index / self._devset_size.get()
764
+ self.status["text"] = "Evaluating on Development Set (%d%%)" % progress
765
+ self._eval_demon_running = True
766
+ self._adaptively_modify_eval_chunk(time.time() - t0)
767
+ self.top.after(int(self._EVAL_FREQ * 1000), self._eval_demon)
768
+
769
+ def _adaptively_modify_eval_chunk(self, t):
770
+ """
771
+ Modify _EVAL_CHUNK to try to keep the amount of time that the
772
+ eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.
773
+
774
+ :param t: The amount of time that the eval demon took.
775
+ """
776
+ if t > self._EVAL_DEMON_MAX and self._EVAL_CHUNK > 5:
777
+ self._EVAL_CHUNK = min(
778
+ self._EVAL_CHUNK - 1,
779
+ max(
780
+ int(self._EVAL_CHUNK * (self._EVAL_DEMON_MAX / t)),
781
+ self._EVAL_CHUNK - 10,
782
+ ),
783
+ )
784
+ elif t < self._EVAL_DEMON_MIN:
785
+ self._EVAL_CHUNK = max(
786
+ self._EVAL_CHUNK + 1,
787
+ min(
788
+ int(self._EVAL_CHUNK * (self._EVAL_DEMON_MIN / t)),
789
+ self._EVAL_CHUNK + 10,
790
+ ),
791
+ )
792
+
793
+ def _init_widgets(self, top):
794
+ frame0 = Frame(top, **self._FRAME_PARAMS)
795
+ frame0.grid_columnconfigure(0, weight=4)
796
+ frame0.grid_columnconfigure(3, weight=2)
797
+ frame0.grid_rowconfigure(1, weight=1)
798
+ frame0.grid_rowconfigure(5, weight=1)
799
+
800
+ # The grammar
801
+ self.grammarbox = Text(frame0, font=self._font, **self._GRAMMARBOX_PARAMS)
802
+ self.grammarlabel = Label(
803
+ frame0,
804
+ font=self._font,
805
+ text="Grammar:",
806
+ highlightcolor="black",
807
+ background=self._GRAMMARBOX_PARAMS["background"],
808
+ )
809
+ self.grammarlabel.grid(column=0, row=0, sticky="SW")
810
+ self.grammarbox.grid(column=0, row=1, sticky="NEWS")
811
+
812
+ # Scroll bar for grammar
813
+ grammar_scrollbar = Scrollbar(frame0, command=self.grammarbox.yview)
814
+ grammar_scrollbar.grid(column=1, row=1, sticky="NWS")
815
+ self.grammarbox.config(yscrollcommand=grammar_scrollbar.set)
816
+
817
+ # grammar buttons
818
+ bg = self._FRAME_PARAMS["background"]
819
+ frame3 = Frame(frame0, background=bg)
820
+ frame3.grid(column=0, row=2, sticky="EW")
821
+ Button(
822
+ frame3,
823
+ text="Prev Grammar",
824
+ command=self._history_prev,
825
+ **self._BUTTON_PARAMS,
826
+ ).pack(side="left")
827
+ Button(
828
+ frame3,
829
+ text="Next Grammar",
830
+ command=self._history_next,
831
+ **self._BUTTON_PARAMS,
832
+ ).pack(side="left")
833
+
834
+ # Help box
835
+ self.helpbox = Text(frame0, font=self._smallfont, **self._HELPBOX_PARAMS)
836
+ self.helpbox.grid(column=3, row=1, sticky="NEWS")
837
+ self.helptabs = {}
838
+ bg = self._FRAME_PARAMS["background"]
839
+ helptab_frame = Frame(frame0, background=bg)
840
+ helptab_frame.grid(column=3, row=0, sticky="SW")
841
+ for i, (tab, tabstops, text) in enumerate(self.HELP):
842
+ label = Label(helptab_frame, text=tab, font=self._smallfont)
843
+ label.grid(column=i * 2, row=0, sticky="S")
844
+ # help_frame.grid_columnconfigure(i, weight=1)
845
+ # label.pack(side='left')
846
+ label.bind("<ButtonPress>", lambda e, tab=tab: self.show_help(tab))
847
+ self.helptabs[tab] = label
848
+ Frame(
849
+ helptab_frame, height=1, width=self._HELPTAB_SPACER, background=bg
850
+ ).grid(column=i * 2 + 1, row=0)
851
+ self.helptabs[self.HELP[0][0]].configure(font=self._font)
852
+ self.helpbox.tag_config("elide", elide=True)
853
+ for (tag, params) in self.HELP_AUTOTAG:
854
+ self.helpbox.tag_config("tag-%s" % tag, **params)
855
+ self.show_help(self.HELP[0][0])
856
+
857
+ # Scroll bar for helpbox
858
+ help_scrollbar = Scrollbar(frame0, command=self.helpbox.yview)
859
+ self.helpbox.config(yscrollcommand=help_scrollbar.set)
860
+ help_scrollbar.grid(column=4, row=1, sticky="NWS")
861
+
862
+ # The dev set
863
+ frame4 = Frame(frame0, background=self._FRAME_PARAMS["background"])
864
+ self.devsetbox = Text(frame4, font=self._font, **self._DEVSETBOX_PARAMS)
865
+ self.devsetbox.pack(expand=True, fill="both")
866
+ self.devsetlabel = Label(
867
+ frame0,
868
+ font=self._font,
869
+ text="Development Set:",
870
+ justify="right",
871
+ background=self._DEVSETBOX_PARAMS["background"],
872
+ )
873
+ self.devsetlabel.grid(column=0, row=4, sticky="SW")
874
+ frame4.grid(column=0, row=5, sticky="NEWS")
875
+
876
+ # dev set scrollbars
877
+ self.devset_scroll = Scrollbar(frame0, command=self._devset_scroll)
878
+ self.devset_scroll.grid(column=1, row=5, sticky="NWS")
879
+ self.devset_xscroll = Scrollbar(
880
+ frame4, command=self.devsetbox.xview, orient="horiz"
881
+ )
882
+ self.devsetbox["xscrollcommand"] = self.devset_xscroll.set
883
+ self.devset_xscroll.pack(side="bottom", fill="x")
884
+
885
+ # dev set buttons
886
+ bg = self._FRAME_PARAMS["background"]
887
+ frame1 = Frame(frame0, background=bg)
888
+ frame1.grid(column=0, row=7, sticky="EW")
889
+ Button(
890
+ frame1,
891
+ text="Prev Example (Ctrl-p)",
892
+ command=self._devset_prev,
893
+ **self._BUTTON_PARAMS,
894
+ ).pack(side="left")
895
+ Button(
896
+ frame1,
897
+ text="Next Example (Ctrl-n)",
898
+ command=self._devset_next,
899
+ **self._BUTTON_PARAMS,
900
+ ).pack(side="left")
901
+ self.devset_button = Button(
902
+ frame1,
903
+ text="Show example",
904
+ command=self.show_devset,
905
+ state="disabled",
906
+ **self._BUTTON_PARAMS,
907
+ )
908
+ self.devset_button.pack(side="right")
909
+ self.trace_button = Button(
910
+ frame1, text="Show trace", command=self.show_trace, **self._BUTTON_PARAMS
911
+ )
912
+ self.trace_button.pack(side="right")
913
+
914
+ # evaluation box
915
+ self.evalbox = Canvas(frame0, **self._EVALBOX_PARAMS)
916
+ label = Label(
917
+ frame0,
918
+ font=self._font,
919
+ text="Evaluation:",
920
+ justify="right",
921
+ background=self._EVALBOX_PARAMS["background"],
922
+ )
923
+ label.grid(column=3, row=4, sticky="SW")
924
+ self.evalbox.grid(column=3, row=5, sticky="NEWS", columnspan=2)
925
+
926
+ # evaluation box buttons
927
+ bg = self._FRAME_PARAMS["background"]
928
+ frame2 = Frame(frame0, background=bg)
929
+ frame2.grid(column=3, row=7, sticky="EW")
930
+ self._autoscale = IntVar(self.top)
931
+ self._autoscale.set(False)
932
+ Checkbutton(
933
+ frame2,
934
+ variable=self._autoscale,
935
+ command=self._eval_plot,
936
+ text="Zoom",
937
+ **self._BUTTON_PARAMS,
938
+ ).pack(side="left")
939
+ self._eval_lines = IntVar(self.top)
940
+ self._eval_lines.set(False)
941
+ Checkbutton(
942
+ frame2,
943
+ variable=self._eval_lines,
944
+ command=self._eval_plot,
945
+ text="Lines",
946
+ **self._BUTTON_PARAMS,
947
+ ).pack(side="left")
948
+ Button(frame2, text="History", **self._BUTTON_PARAMS).pack(side="right")
949
+
950
+ # The status label
951
+ self.status = Label(frame0, font=self._font, **self._STATUS_PARAMS)
952
+ self.status.grid(column=0, row=9, sticky="NEW", padx=3, pady=2, columnspan=5)
953
+
954
+ # Help box & devset box can't be edited.
955
+ self.helpbox["state"] = "disabled"
956
+ self.devsetbox["state"] = "disabled"
957
+
958
+ # Spacers
959
+ bg = self._FRAME_PARAMS["background"]
960
+ Frame(frame0, height=10, width=0, background=bg).grid(column=0, row=3)
961
+ Frame(frame0, height=0, width=10, background=bg).grid(column=2, row=0)
962
+ Frame(frame0, height=6, width=0, background=bg).grid(column=0, row=8)
963
+
964
+ # pack the frame.
965
+ frame0.pack(fill="both", expand=True)
966
+
967
+ # Set up colors for the devset box
968
+ self.devsetbox.tag_config("true-pos", background="#afa", underline="True")
969
+ self.devsetbox.tag_config("false-neg", underline="True", foreground="#800")
970
+ self.devsetbox.tag_config("false-pos", background="#faa")
971
+ self.devsetbox.tag_config("trace", foreground="#666", wrap="none")
972
+ self.devsetbox.tag_config("wrapindent", lmargin2=30, wrap="none")
973
+ self.devsetbox.tag_config("error", foreground="#800")
974
+
975
+ # And for the grammarbox
976
+ self.grammarbox.tag_config("error", background="#fec")
977
+ self.grammarbox.tag_config("comment", foreground="#840")
978
+ self.grammarbox.tag_config("angle", foreground="#00f")
979
+ self.grammarbox.tag_config("brace", foreground="#0a0")
980
+ self.grammarbox.tag_config("hangindent", lmargin1=0, lmargin2=40)
981
+
982
+ _showing_trace = False
983
+
984
+ def show_trace(self, *e):
985
+ self._showing_trace = True
986
+ self.trace_button["state"] = "disabled"
987
+ self.devset_button["state"] = "normal"
988
+
989
+ self.devsetbox["state"] = "normal"
990
+ # self.devsetbox['wrap'] = 'none'
991
+ self.devsetbox.delete("1.0", "end")
992
+ self.devsetlabel["text"] = "Development Set (%d/%d)" % (
993
+ (self.devset_index + 1, self._devset_size.get())
994
+ )
995
+
996
+ if self.chunker is None:
997
+ self.devsetbox.insert("1.0", "Trace: waiting for a valid grammar.")
998
+ self.devsetbox.tag_add("error", "1.0", "end")
999
+ return # can't do anything more
1000
+
1001
+ gold_tree = self.devset[self.devset_index]
1002
+ rules = self.chunker.rules()
1003
+
1004
+ # Calculate the tag sequence
1005
+ tagseq = "\t"
1006
+ charnum = [1]
1007
+ for wordnum, (word, pos) in enumerate(gold_tree.leaves()):
1008
+ tagseq += "%s " % pos
1009
+ charnum.append(len(tagseq))
1010
+ self.charnum = {
1011
+ (i, j): charnum[j]
1012
+ for i in range(len(rules) + 1)
1013
+ for j in range(len(charnum))
1014
+ }
1015
+ self.linenum = {i: i * 2 + 2 for i in range(len(rules) + 1)}
1016
+
1017
+ for i in range(len(rules) + 1):
1018
+ if i == 0:
1019
+ self.devsetbox.insert("end", "Start:\n")
1020
+ self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
1021
+ else:
1022
+ self.devsetbox.insert("end", "Apply %s:\n" % rules[i - 1])
1023
+ self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
1024
+ # Display the tag sequence.
1025
+ self.devsetbox.insert("end", tagseq + "\n")
1026
+ self.devsetbox.tag_add("wrapindent", "end -2c linestart", "end -2c")
1027
+ # Run a partial parser, and extract gold & test chunks
1028
+ chunker = RegexpChunkParser(rules[:i])
1029
+ test_tree = self._chunkparse(gold_tree.leaves())
1030
+ gold_chunks = self._chunks(gold_tree)
1031
+ test_chunks = self._chunks(test_tree)
1032
+ # Compare them.
1033
+ for chunk in gold_chunks.intersection(test_chunks):
1034
+ self._color_chunk(i, chunk, "true-pos")
1035
+ for chunk in gold_chunks - test_chunks:
1036
+ self._color_chunk(i, chunk, "false-neg")
1037
+ for chunk in test_chunks - gold_chunks:
1038
+ self._color_chunk(i, chunk, "false-pos")
1039
+ self.devsetbox.insert("end", "Finished.\n")
1040
+ self.devsetbox.tag_add("trace", "end -2c linestart", "end -2c")
1041
+
1042
+ # This is a hack, because the x-scrollbar isn't updating its
1043
+ # position right -- I'm not sure what the underlying cause is
1044
+ # though. (This is on OS X w/ python 2.5)
1045
+ self.top.after(100, self.devset_xscroll.set, 0, 0.3)
1046
+
1047
+ def show_help(self, tab):
1048
+ self.helpbox["state"] = "normal"
1049
+ self.helpbox.delete("1.0", "end")
1050
+ for (name, tabstops, text) in self.HELP:
1051
+ if name == tab:
1052
+ text = text.replace(
1053
+ "<<TAGSET>>",
1054
+ "\n".join(
1055
+ "\t%s\t%s" % item
1056
+ for item in sorted(
1057
+ list(self.tagset.items()),
1058
+ key=lambda t_w: re.match(r"\w+", t_w[0])
1059
+ and (0, t_w[0])
1060
+ or (1, t_w[0]),
1061
+ )
1062
+ ),
1063
+ )
1064
+
1065
+ self.helptabs[name].config(**self._HELPTAB_FG_PARAMS)
1066
+ self.helpbox.config(tabs=tabstops)
1067
+ self.helpbox.insert("1.0", text + "\n" * 20)
1068
+ C = "1.0 + %d chars"
1069
+ for (tag, params) in self.HELP_AUTOTAG:
1070
+ pattern = f"(?s)(<{tag}>)(.*?)(</{tag}>)"
1071
+ for m in re.finditer(pattern, text):
1072
+ self.helpbox.tag_add("elide", C % m.start(1), C % m.end(1))
1073
+ self.helpbox.tag_add(
1074
+ "tag-%s" % tag, C % m.start(2), C % m.end(2)
1075
+ )
1076
+ self.helpbox.tag_add("elide", C % m.start(3), C % m.end(3))
1077
+ else:
1078
+ self.helptabs[name].config(**self._HELPTAB_BG_PARAMS)
1079
+ self.helpbox["state"] = "disabled"
1080
+
1081
+ def _history_prev(self, *e):
1082
+ self._view_history(self._history_index - 1)
1083
+ return "break"
1084
+
1085
+ def _history_next(self, *e):
1086
+ self._view_history(self._history_index + 1)
1087
+ return "break"
1088
+
1089
+ def _view_history(self, index):
1090
+ # Bounds & sanity checking:
1091
+ index = max(0, min(len(self._history) - 1, index))
1092
+ if not self._history:
1093
+ return
1094
+ # Already viewing the requested history item?
1095
+ if index == self._history_index:
1096
+ return
1097
+ # Show the requested grammar. It will get added to _history
1098
+ # only if they edit it (causing self.update() to get run.)
1099
+ self.grammarbox["state"] = "normal"
1100
+ self.grammarbox.delete("1.0", "end")
1101
+ self.grammarbox.insert("end", self._history[index][0])
1102
+ self.grammarbox.mark_set("insert", "1.0")
1103
+ self._history_index = index
1104
+ self._syntax_highlight_grammar(self._history[index][0])
1105
+ # Record the normalized grammar & regenerate the chunker.
1106
+ self.normalized_grammar = self.normalize_grammar(self._history[index][0])
1107
+ if self.normalized_grammar:
1108
+ rules = [
1109
+ RegexpChunkRule.fromstring(line)
1110
+ for line in self.normalized_grammar.split("\n")
1111
+ ]
1112
+ else:
1113
+ rules = []
1114
+ self.chunker = RegexpChunkParser(rules)
1115
+ # Show the score.
1116
+ self._eval_plot()
1117
+ # Update the devset box
1118
+ self._highlight_devset()
1119
+ if self._showing_trace:
1120
+ self.show_trace()
1121
+ # Update the grammar label
1122
+ if self._history_index < len(self._history) - 1:
1123
+ self.grammarlabel["text"] = "Grammar {}/{}:".format(
1124
+ self._history_index + 1,
1125
+ len(self._history),
1126
+ )
1127
+ else:
1128
+ self.grammarlabel["text"] = "Grammar:"
1129
+
1130
+ def _devset_next(self, *e):
1131
+ self._devset_scroll("scroll", 1, "page")
1132
+ return "break"
1133
+
1134
+ def _devset_prev(self, *e):
1135
+ self._devset_scroll("scroll", -1, "page")
1136
+ return "break"
1137
+
1138
+ def destroy(self, *e):
1139
+ if self.top is None:
1140
+ return
1141
+ self.top.destroy()
1142
+ self.top = None
1143
+
1144
+ def _devset_scroll(self, command, *args):
1145
+ N = 1 # size of a page -- one sentence.
1146
+ showing_trace = self._showing_trace
1147
+ if command == "scroll" and args[1].startswith("unit"):
1148
+ self.show_devset(self.devset_index + int(args[0]))
1149
+ elif command == "scroll" and args[1].startswith("page"):
1150
+ self.show_devset(self.devset_index + N * int(args[0]))
1151
+ elif command == "moveto":
1152
+ self.show_devset(int(float(args[0]) * self._devset_size.get()))
1153
+ else:
1154
+ assert 0, f"bad scroll command {command} {args}"
1155
+ if showing_trace:
1156
+ self.show_trace()
1157
+
1158
+ def show_devset(self, index=None):
1159
+ if index is None:
1160
+ index = self.devset_index
1161
+
1162
+ # Bounds checking
1163
+ index = min(max(0, index), self._devset_size.get() - 1)
1164
+
1165
+ if index == self.devset_index and not self._showing_trace:
1166
+ return
1167
+ self.devset_index = index
1168
+
1169
+ self._showing_trace = False
1170
+ self.trace_button["state"] = "normal"
1171
+ self.devset_button["state"] = "disabled"
1172
+
1173
+ # Clear the text box.
1174
+ self.devsetbox["state"] = "normal"
1175
+ self.devsetbox["wrap"] = "word"
1176
+ self.devsetbox.delete("1.0", "end")
1177
+ self.devsetlabel["text"] = "Development Set (%d/%d)" % (
1178
+ (self.devset_index + 1, self._devset_size.get())
1179
+ )
1180
+
1181
+ # Add the sentences
1182
+ sample = self.devset[self.devset_index : self.devset_index + 1]
1183
+ self.charnum = {}
1184
+ self.linenum = {0: 1}
1185
+ for sentnum, sent in enumerate(sample):
1186
+ linestr = ""
1187
+ for wordnum, (word, pos) in enumerate(sent.leaves()):
1188
+ self.charnum[sentnum, wordnum] = len(linestr)
1189
+ linestr += f"{word}/{pos} "
1190
+ self.charnum[sentnum, wordnum + 1] = len(linestr)
1191
+ self.devsetbox.insert("end", linestr[:-1] + "\n\n")
1192
+
1193
+ # Highlight chunks in the dev set
1194
+ if self.chunker is not None:
1195
+ self._highlight_devset()
1196
+ self.devsetbox["state"] = "disabled"
1197
+
1198
+ # Update the scrollbar
1199
+ first = self.devset_index / self._devset_size.get()
1200
+ last = (self.devset_index + 2) / self._devset_size.get()
1201
+ self.devset_scroll.set(first, last)
1202
+
1203
+ def _chunks(self, tree):
1204
+ chunks = set()
1205
+ wordnum = 0
1206
+ for child in tree:
1207
+ if isinstance(child, Tree):
1208
+ if child.label() == self._chunk_label:
1209
+ chunks.add((wordnum, wordnum + len(child)))
1210
+ wordnum += len(child)
1211
+ else:
1212
+ wordnum += 1
1213
+ return chunks
1214
+
1215
+ def _syntax_highlight_grammar(self, grammar):
1216
+ if self.top is None:
1217
+ return
1218
+ self.grammarbox.tag_remove("comment", "1.0", "end")
1219
+ self.grammarbox.tag_remove("angle", "1.0", "end")
1220
+ self.grammarbox.tag_remove("brace", "1.0", "end")
1221
+ self.grammarbox.tag_add("hangindent", "1.0", "end")
1222
+ for lineno, line in enumerate(grammar.split("\n")):
1223
+ if not line.strip():
1224
+ continue
1225
+ m = re.match(r"(\\.|[^#])*(#.*)?", line)
1226
+ comment_start = None
1227
+ if m.group(2):
1228
+ comment_start = m.start(2)
1229
+ s = "%d.%d" % (lineno + 1, m.start(2))
1230
+ e = "%d.%d" % (lineno + 1, m.end(2))
1231
+ self.grammarbox.tag_add("comment", s, e)
1232
+ for m in re.finditer("[<>{}]", line):
1233
+ if comment_start is not None and m.start() >= comment_start:
1234
+ break
1235
+ s = "%d.%d" % (lineno + 1, m.start())
1236
+ e = "%d.%d" % (lineno + 1, m.end())
1237
+ if m.group() in "<>":
1238
+ self.grammarbox.tag_add("angle", s, e)
1239
+ else:
1240
+ self.grammarbox.tag_add("brace", s, e)
1241
+
1242
+ def _grammarcheck(self, grammar):
1243
+ if self.top is None:
1244
+ return
1245
+ self.grammarbox.tag_remove("error", "1.0", "end")
1246
+ self._grammarcheck_errs = []
1247
+ for lineno, line in enumerate(grammar.split("\n")):
1248
+ line = re.sub(r"((\\.|[^#])*)(#.*)?", r"\1", line)
1249
+ line = line.strip()
1250
+ if line:
1251
+ try:
1252
+ RegexpChunkRule.fromstring(line)
1253
+ except ValueError as e:
1254
+ self.grammarbox.tag_add(
1255
+ "error", "%s.0" % (lineno + 1), "%s.0 lineend" % (lineno + 1)
1256
+ )
1257
+ self.status["text"] = ""
1258
+
1259
+ def update(self, *event):
1260
+ # Record when update was called (for grammarcheck)
1261
+ if event:
1262
+ self._last_keypress = time.time()
1263
+
1264
+ # Read the grammar from the Text box.
1265
+ self.grammar = grammar = self.grammarbox.get("1.0", "end")
1266
+
1267
+ # If the grammar hasn't changed, do nothing:
1268
+ normalized_grammar = self.normalize_grammar(grammar)
1269
+ if normalized_grammar == self.normalized_grammar:
1270
+ return
1271
+ else:
1272
+ self.normalized_grammar = normalized_grammar
1273
+
1274
+ # If the grammar has changed, and we're looking at history,
1275
+ # then stop looking at history.
1276
+ if self._history_index < len(self._history) - 1:
1277
+ self.grammarlabel["text"] = "Grammar:"
1278
+
1279
+ self._syntax_highlight_grammar(grammar)
1280
+
1281
+ # The grammar has changed; try parsing it. If it doesn't
1282
+ # parse, do nothing. (flag error location?)
1283
+ try:
1284
+ # Note: the normalized grammar has no blank lines.
1285
+ if normalized_grammar:
1286
+ rules = [
1287
+ RegexpChunkRule.fromstring(line)
1288
+ for line in normalized_grammar.split("\n")
1289
+ ]
1290
+ else:
1291
+ rules = []
1292
+ except ValueError as e:
1293
+ # Use the un-normalized grammar for error highlighting.
1294
+ self._grammarcheck(grammar)
1295
+ self.chunker = None
1296
+ return
1297
+
1298
+ self.chunker = RegexpChunkParser(rules)
1299
+ self.grammarbox.tag_remove("error", "1.0", "end")
1300
+ self.grammar_changed = time.time()
1301
+ # Display the results
1302
+ if self._showing_trace:
1303
+ self.show_trace()
1304
+ else:
1305
+ self._highlight_devset()
1306
+ # Start the eval demon
1307
+ if not self._eval_demon_running:
1308
+ self._eval_demon()
1309
+
1310
+ def _highlight_devset(self, sample=None):
1311
+ if sample is None:
1312
+ sample = self.devset[self.devset_index : self.devset_index + 1]
1313
+
1314
+ self.devsetbox.tag_remove("true-pos", "1.0", "end")
1315
+ self.devsetbox.tag_remove("false-neg", "1.0", "end")
1316
+ self.devsetbox.tag_remove("false-pos", "1.0", "end")
1317
+
1318
+ # Run the grammar on the test cases.
1319
+ for sentnum, gold_tree in enumerate(sample):
1320
+ # Run the chunk parser
1321
+ test_tree = self._chunkparse(gold_tree.leaves())
1322
+ # Extract gold & test chunks
1323
+ gold_chunks = self._chunks(gold_tree)
1324
+ test_chunks = self._chunks(test_tree)
1325
+ # Compare them.
1326
+ for chunk in gold_chunks.intersection(test_chunks):
1327
+ self._color_chunk(sentnum, chunk, "true-pos")
1328
+ for chunk in gold_chunks - test_chunks:
1329
+ self._color_chunk(sentnum, chunk, "false-neg")
1330
+ for chunk in test_chunks - gold_chunks:
1331
+ self._color_chunk(sentnum, chunk, "false-pos")
1332
+
1333
+ def _chunkparse(self, words):
1334
+ try:
1335
+ return self.chunker.parse(words)
1336
+ except (ValueError, IndexError) as e:
1337
+ # There's an error somewhere in the grammar, but we're not sure
1338
+ # exactly where, so just mark the whole grammar as bad.
1339
+ # E.g., this is caused by: "({<NN>})"
1340
+ self.grammarbox.tag_add("error", "1.0", "end")
1341
+ # Treat it as tagging nothing:
1342
+ return words
1343
+
1344
+ def _color_chunk(self, sentnum, chunk, tag):
1345
+ start, end = chunk
1346
+ self.devsetbox.tag_add(
1347
+ tag,
1348
+ f"{self.linenum[sentnum]}.{self.charnum[sentnum, start]}",
1349
+ f"{self.linenum[sentnum]}.{self.charnum[sentnum, end] - 1}",
1350
+ )
1351
+
1352
+ def reset(self):
1353
+ # Clear various variables
1354
+ self.chunker = None
1355
+ self.grammar = None
1356
+ self.normalized_grammar = None
1357
+ self.grammar_changed = 0
1358
+ self._history = []
1359
+ self._history_index = 0
1360
+ # Update the on-screen display.
1361
+ self.grammarbox.delete("1.0", "end")
1362
+ self.show_devset(0)
1363
+ self.update()
1364
+ # self._eval_plot()
1365
+
1366
+ SAVE_GRAMMAR_TEMPLATE = (
1367
+ "# Regexp Chunk Parsing Grammar\n"
1368
+ "# Saved %(date)s\n"
1369
+ "#\n"
1370
+ "# Development set: %(devset)s\n"
1371
+ "# Precision: %(precision)s\n"
1372
+ "# Recall: %(recall)s\n"
1373
+ "# F-score: %(fscore)s\n\n"
1374
+ "%(grammar)s\n"
1375
+ )
1376
+
1377
+ def save_grammar(self, filename=None):
1378
+ if not filename:
1379
+ ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")]
1380
+ filename = asksaveasfilename(filetypes=ftypes, defaultextension=".chunk")
1381
+ if not filename:
1382
+ return
1383
+ if self._history and self.normalized_grammar == self.normalize_grammar(
1384
+ self._history[-1][0]
1385
+ ):
1386
+ precision, recall, fscore = (
1387
+ "%.2f%%" % (100 * v) for v in self._history[-1][1:]
1388
+ )
1389
+ elif self.chunker is None:
1390
+ precision = recall = fscore = "Grammar not well formed"
1391
+ else:
1392
+ precision = recall = fscore = "Not finished evaluation yet"
1393
+
1394
+ with open(filename, "w") as outfile:
1395
+ outfile.write(
1396
+ self.SAVE_GRAMMAR_TEMPLATE
1397
+ % dict(
1398
+ date=time.ctime(),
1399
+ devset=self.devset_name,
1400
+ precision=precision,
1401
+ recall=recall,
1402
+ fscore=fscore,
1403
+ grammar=self.grammar.strip(),
1404
+ )
1405
+ )
1406
+
1407
+ def load_grammar(self, filename=None):
1408
+ if not filename:
1409
+ ftypes = [("Chunk Gramamr", ".chunk"), ("All files", "*")]
1410
+ filename = askopenfilename(filetypes=ftypes, defaultextension=".chunk")
1411
+ if not filename:
1412
+ return
1413
+ self.grammarbox.delete("1.0", "end")
1414
+ self.update()
1415
+ with open(filename) as infile:
1416
+ grammar = infile.read()
1417
+ grammar = re.sub(
1418
+ r"^\# Regexp Chunk Parsing Grammar[\s\S]*" "F-score:.*\n", "", grammar
1419
+ ).lstrip()
1420
+ self.grammarbox.insert("1.0", grammar)
1421
+ self.update()
1422
+
1423
+ def save_history(self, filename=None):
1424
+ if not filename:
1425
+ ftypes = [("Chunk Gramamr History", ".txt"), ("All files", "*")]
1426
+ filename = asksaveasfilename(filetypes=ftypes, defaultextension=".txt")
1427
+ if not filename:
1428
+ return
1429
+
1430
+ with open(filename, "w") as outfile:
1431
+ outfile.write("# Regexp Chunk Parsing Grammar History\n")
1432
+ outfile.write("# Saved %s\n" % time.ctime())
1433
+ outfile.write("# Development set: %s\n" % self.devset_name)
1434
+ for i, (g, p, r, f) in enumerate(self._history):
1435
+ hdr = (
1436
+ "Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, "
1437
+ "fscore=%.2f%%)"
1438
+ % (i + 1, len(self._history), p * 100, r * 100, f * 100)
1439
+ )
1440
+ outfile.write("\n%s\n" % hdr)
1441
+ outfile.write("".join(" %s\n" % line for line in g.strip().split()))
1442
+
1443
+ if not (
1444
+ self._history
1445
+ and self.normalized_grammar
1446
+ == self.normalize_grammar(self._history[-1][0])
1447
+ ):
1448
+ if self.chunker is None:
1449
+ outfile.write("\nCurrent Grammar (not well-formed)\n")
1450
+ else:
1451
+ outfile.write("\nCurrent Grammar (not evaluated)\n")
1452
+ outfile.write(
1453
+ "".join(" %s\n" % line for line in self.grammar.strip().split())
1454
+ )
1455
+
1456
+ def about(self, *e):
1457
+ ABOUT = "NLTK RegExp Chunk Parser Application\n" + "Written by Edward Loper"
1458
+ TITLE = "About: Regular Expression Chunk Parser Application"
1459
+ try:
1460
+ from tkinter.messagebox import Message
1461
+
1462
+ Message(message=ABOUT, title=TITLE).show()
1463
+ except:
1464
+ ShowText(self.top, TITLE, ABOUT)
1465
+
1466
+ def set_devset_size(self, size=None):
1467
+ if size is not None:
1468
+ self._devset_size.set(size)
1469
+ self._devset_size.set(min(len(self.devset), self._devset_size.get()))
1470
+ self.show_devset(1)
1471
+ self.show_devset(0)
1472
+ # what about history? Evaluated at diff dev set sizes!
1473
+
1474
+ def resize(self, size=None):
1475
+ if size is not None:
1476
+ self._size.set(size)
1477
+ size = self._size.get()
1478
+ self._font.configure(size=-(abs(size)))
1479
+ self._smallfont.configure(size=min(-10, -(abs(size)) * 14 // 20))
1480
+
1481
+ def mainloop(self, *args, **kwargs):
1482
+ """
1483
+ Enter the Tkinter mainloop. This function must be called if
1484
+ this demo is created from a non-interactive program (e.g.
1485
+ from a secript); otherwise, the demo will close as soon as
1486
+ the script completes.
1487
+ """
1488
+ if in_idle():
1489
+ return
1490
+ self.top.mainloop(*args, **kwargs)
1491
+
1492
+
1493
+ def app():
1494
+ RegexpChunkApp().mainloop()
1495
+
1496
+
1497
+ if __name__ == "__main__":
1498
+ app()
1499
+
1500
+ __all__ = ["app"]
pipeline/nltk/app/collocations_app.py ADDED
@@ -0,0 +1,438 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Collocations Application
2
+ # Much of the GUI code is imported from concordance.py; We intend to merge these tools together
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Sumukh Ghodke <[email protected]>
5
+ # URL: <https://www.nltk.org/>
6
+ # For license information, see LICENSE.TXT
7
+ #
8
+
9
+
10
+ import queue as q
11
+ import threading
12
+ from tkinter import (
13
+ END,
14
+ LEFT,
15
+ SUNKEN,
16
+ Button,
17
+ Frame,
18
+ IntVar,
19
+ Label,
20
+ Menu,
21
+ OptionMenu,
22
+ Scrollbar,
23
+ StringVar,
24
+ Text,
25
+ Tk,
26
+ )
27
+ from tkinter.font import Font
28
+
29
+ from nltk.corpus import (
30
+ alpino,
31
+ brown,
32
+ cess_cat,
33
+ cess_esp,
34
+ floresta,
35
+ indian,
36
+ mac_morpho,
37
+ machado,
38
+ nps_chat,
39
+ sinica_treebank,
40
+ treebank,
41
+ )
42
+ from nltk.probability import FreqDist
43
+ from nltk.util import in_idle
44
+
45
+ CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
46
+ ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
47
+ POLL_INTERVAL = 100
48
+
49
+ _DEFAULT = "English: Brown Corpus (Humor)"
50
+ _CORPORA = {
51
+ "Catalan: CESS-CAT Corpus": lambda: cess_cat.words(),
52
+ "English: Brown Corpus": lambda: brown.words(),
53
+ "English: Brown Corpus (Press)": lambda: brown.words(
54
+ categories=["news", "editorial", "reviews"]
55
+ ),
56
+ "English: Brown Corpus (Religion)": lambda: brown.words(categories="religion"),
57
+ "English: Brown Corpus (Learned)": lambda: brown.words(categories="learned"),
58
+ "English: Brown Corpus (Science Fiction)": lambda: brown.words(
59
+ categories="science_fiction"
60
+ ),
61
+ "English: Brown Corpus (Romance)": lambda: brown.words(categories="romance"),
62
+ "English: Brown Corpus (Humor)": lambda: brown.words(categories="humor"),
63
+ "English: NPS Chat Corpus": lambda: nps_chat.words(),
64
+ "English: Wall Street Journal Corpus": lambda: treebank.words(),
65
+ "Chinese: Sinica Corpus": lambda: sinica_treebank.words(),
66
+ "Dutch: Alpino Corpus": lambda: alpino.words(),
67
+ "Hindi: Indian Languages Corpus": lambda: indian.words(files="hindi.pos"),
68
+ "Portuguese: Floresta Corpus (Portugal)": lambda: floresta.words(),
69
+ "Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.words(),
70
+ "Portuguese: Machado Corpus (Brazil)": lambda: machado.words(),
71
+ "Spanish: CESS-ESP Corpus": lambda: cess_esp.words(),
72
+ }
73
+
74
+
75
+ class CollocationsView:
76
+ _BACKGROUND_COLOUR = "#FFF" # white
77
+
78
+ def __init__(self):
79
+ self.queue = q.Queue()
80
+ self.model = CollocationsModel(self.queue)
81
+ self.top = Tk()
82
+ self._init_top(self.top)
83
+ self._init_menubar()
84
+ self._init_widgets(self.top)
85
+ self.load_corpus(self.model.DEFAULT_CORPUS)
86
+ self.after = self.top.after(POLL_INTERVAL, self._poll)
87
+
88
+ def _init_top(self, top):
89
+ top.geometry("550x650+50+50")
90
+ top.title("NLTK Collocations List")
91
+ top.bind("<Control-q>", self.destroy)
92
+ top.protocol("WM_DELETE_WINDOW", self.destroy)
93
+ top.minsize(550, 650)
94
+
95
+ def _init_widgets(self, parent):
96
+ self.main_frame = Frame(
97
+ parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
98
+ )
99
+ self._init_corpus_select(self.main_frame)
100
+ self._init_results_box(self.main_frame)
101
+ self._init_paging(self.main_frame)
102
+ self._init_status(self.main_frame)
103
+ self.main_frame.pack(fill="both", expand=True)
104
+
105
+ def _init_corpus_select(self, parent):
106
+ innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
107
+ self.var = StringVar(innerframe)
108
+ self.var.set(self.model.DEFAULT_CORPUS)
109
+ Label(
110
+ innerframe,
111
+ justify=LEFT,
112
+ text=" Corpus: ",
113
+ background=self._BACKGROUND_COLOUR,
114
+ padx=2,
115
+ pady=1,
116
+ border=0,
117
+ ).pack(side="left")
118
+
119
+ other_corpora = list(self.model.CORPORA.keys()).remove(
120
+ self.model.DEFAULT_CORPUS
121
+ )
122
+ om = OptionMenu(
123
+ innerframe,
124
+ self.var,
125
+ self.model.DEFAULT_CORPUS,
126
+ command=self.corpus_selected,
127
+ *self.model.non_default_corpora()
128
+ )
129
+ om["borderwidth"] = 0
130
+ om["highlightthickness"] = 1
131
+ om.pack(side="left")
132
+ innerframe.pack(side="top", fill="x", anchor="n")
133
+
134
+ def _init_status(self, parent):
135
+ self.status = Label(
136
+ parent,
137
+ justify=LEFT,
138
+ relief=SUNKEN,
139
+ background=self._BACKGROUND_COLOUR,
140
+ border=0,
141
+ padx=1,
142
+ pady=0,
143
+ )
144
+ self.status.pack(side="top", anchor="sw")
145
+
146
+ def _init_menubar(self):
147
+ self._result_size = IntVar(self.top)
148
+ menubar = Menu(self.top)
149
+
150
+ filemenu = Menu(menubar, tearoff=0, borderwidth=0)
151
+ filemenu.add_command(
152
+ label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
153
+ )
154
+ menubar.add_cascade(label="File", underline=0, menu=filemenu)
155
+
156
+ editmenu = Menu(menubar, tearoff=0)
157
+ rescntmenu = Menu(editmenu, tearoff=0)
158
+ rescntmenu.add_radiobutton(
159
+ label="20",
160
+ variable=self._result_size,
161
+ underline=0,
162
+ value=20,
163
+ command=self.set_result_size,
164
+ )
165
+ rescntmenu.add_radiobutton(
166
+ label="50",
167
+ variable=self._result_size,
168
+ underline=0,
169
+ value=50,
170
+ command=self.set_result_size,
171
+ )
172
+ rescntmenu.add_radiobutton(
173
+ label="100",
174
+ variable=self._result_size,
175
+ underline=0,
176
+ value=100,
177
+ command=self.set_result_size,
178
+ )
179
+ rescntmenu.invoke(1)
180
+ editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
181
+
182
+ menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
183
+ self.top.config(menu=menubar)
184
+
185
+ def set_result_size(self, **kwargs):
186
+ self.model.result_count = self._result_size.get()
187
+
188
+ def _init_results_box(self, parent):
189
+ innerframe = Frame(parent)
190
+ i1 = Frame(innerframe)
191
+ i2 = Frame(innerframe)
192
+ vscrollbar = Scrollbar(i1, borderwidth=1)
193
+ hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
194
+ self.results_box = Text(
195
+ i1,
196
+ font=Font(family="courier", size="16"),
197
+ state="disabled",
198
+ borderwidth=1,
199
+ yscrollcommand=vscrollbar.set,
200
+ xscrollcommand=hscrollbar.set,
201
+ wrap="none",
202
+ width="40",
203
+ height="20",
204
+ exportselection=1,
205
+ )
206
+ self.results_box.pack(side="left", fill="both", expand=True)
207
+ vscrollbar.pack(side="left", fill="y", anchor="e")
208
+ vscrollbar.config(command=self.results_box.yview)
209
+ hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
210
+ hscrollbar.config(command=self.results_box.xview)
211
+ # there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
212
+ Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
213
+ side="left", anchor="e"
214
+ )
215
+ i1.pack(side="top", fill="both", expand=True, anchor="n")
216
+ i2.pack(side="bottom", fill="x", anchor="s")
217
+ innerframe.pack(side="top", fill="both", expand=True)
218
+
219
+ def _init_paging(self, parent):
220
+ innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
221
+ self.prev = prev = Button(
222
+ innerframe,
223
+ text="Previous",
224
+ command=self.previous,
225
+ width="10",
226
+ borderwidth=1,
227
+ highlightthickness=1,
228
+ state="disabled",
229
+ )
230
+ prev.pack(side="left", anchor="center")
231
+ self.next = next = Button(
232
+ innerframe,
233
+ text="Next",
234
+ command=self.__next__,
235
+ width="10",
236
+ borderwidth=1,
237
+ highlightthickness=1,
238
+ state="disabled",
239
+ )
240
+ next.pack(side="right", anchor="center")
241
+ innerframe.pack(side="top", fill="y")
242
+ self.reset_current_page()
243
+
244
+ def reset_current_page(self):
245
+ self.current_page = -1
246
+
247
+ def _poll(self):
248
+ try:
249
+ event = self.queue.get(block=False)
250
+ except q.Empty:
251
+ pass
252
+ else:
253
+ if event == CORPUS_LOADED_EVENT:
254
+ self.handle_corpus_loaded(event)
255
+ elif event == ERROR_LOADING_CORPUS_EVENT:
256
+ self.handle_error_loading_corpus(event)
257
+ self.after = self.top.after(POLL_INTERVAL, self._poll)
258
+
259
+ def handle_error_loading_corpus(self, event):
260
+ self.status["text"] = "Error in loading " + self.var.get()
261
+ self.unfreeze_editable()
262
+ self.clear_results_box()
263
+ self.freeze_editable()
264
+ self.reset_current_page()
265
+
266
+ def handle_corpus_loaded(self, event):
267
+ self.status["text"] = self.var.get() + " is loaded"
268
+ self.unfreeze_editable()
269
+ self.clear_results_box()
270
+ self.reset_current_page()
271
+ # self.next()
272
+ collocations = self.model.next(self.current_page + 1)
273
+ self.write_results(collocations)
274
+ self.current_page += 1
275
+
276
+ def corpus_selected(self, *args):
277
+ new_selection = self.var.get()
278
+ self.load_corpus(new_selection)
279
+
280
+ def previous(self):
281
+ self.freeze_editable()
282
+ collocations = self.model.prev(self.current_page - 1)
283
+ self.current_page = self.current_page - 1
284
+ self.clear_results_box()
285
+ self.write_results(collocations)
286
+ self.unfreeze_editable()
287
+
288
+ def __next__(self):
289
+ self.freeze_editable()
290
+ collocations = self.model.next(self.current_page + 1)
291
+ self.clear_results_box()
292
+ self.write_results(collocations)
293
+ self.current_page += 1
294
+ self.unfreeze_editable()
295
+
296
+ def load_corpus(self, selection):
297
+ if self.model.selected_corpus != selection:
298
+ self.status["text"] = "Loading " + selection + "..."
299
+ self.freeze_editable()
300
+ self.model.load_corpus(selection)
301
+
302
+ def freeze_editable(self):
303
+ self.prev["state"] = "disabled"
304
+ self.next["state"] = "disabled"
305
+
306
+ def clear_results_box(self):
307
+ self.results_box["state"] = "normal"
308
+ self.results_box.delete("1.0", END)
309
+ self.results_box["state"] = "disabled"
310
+
311
+ def fire_event(self, event):
312
+ # Firing an event so that rendering of widgets happen in the mainloop thread
313
+ self.top.event_generate(event, when="tail")
314
+
315
+ def destroy(self, *e):
316
+ if self.top is None:
317
+ return
318
+ self.top.after_cancel(self.after)
319
+ self.top.destroy()
320
+ self.top = None
321
+
322
+ def mainloop(self, *args, **kwargs):
323
+ if in_idle():
324
+ return
325
+ self.top.mainloop(*args, **kwargs)
326
+
327
+ def unfreeze_editable(self):
328
+ self.set_paging_button_states()
329
+
330
+ def set_paging_button_states(self):
331
+ if self.current_page == -1 or self.current_page == 0:
332
+ self.prev["state"] = "disabled"
333
+ else:
334
+ self.prev["state"] = "normal"
335
+ if self.model.is_last_page(self.current_page):
336
+ self.next["state"] = "disabled"
337
+ else:
338
+ self.next["state"] = "normal"
339
+
340
+ def write_results(self, results):
341
+ self.results_box["state"] = "normal"
342
+ row = 1
343
+ for each in results:
344
+ self.results_box.insert(str(row) + ".0", each[0] + " " + each[1] + "\n")
345
+ row += 1
346
+ self.results_box["state"] = "disabled"
347
+
348
+
349
+ class CollocationsModel:
350
+ def __init__(self, queue):
351
+ self.result_count = None
352
+ self.selected_corpus = None
353
+ self.collocations = None
354
+ self.CORPORA = _CORPORA
355
+ self.DEFAULT_CORPUS = _DEFAULT
356
+ self.queue = queue
357
+ self.reset_results()
358
+
359
+ def reset_results(self):
360
+ self.result_pages = []
361
+ self.results_returned = 0
362
+
363
+ def load_corpus(self, name):
364
+ self.selected_corpus = name
365
+ self.collocations = None
366
+ runner_thread = self.LoadCorpus(name, self)
367
+ runner_thread.start()
368
+ self.reset_results()
369
+
370
+ def non_default_corpora(self):
371
+ copy = []
372
+ copy.extend(list(self.CORPORA.keys()))
373
+ copy.remove(self.DEFAULT_CORPUS)
374
+ copy.sort()
375
+ return copy
376
+
377
+ def is_last_page(self, number):
378
+ if number < len(self.result_pages):
379
+ return False
380
+ return self.results_returned + (
381
+ number - len(self.result_pages)
382
+ ) * self.result_count >= len(self.collocations)
383
+
384
+ def next(self, page):
385
+ if (len(self.result_pages) - 1) < page:
386
+ for i in range(page - (len(self.result_pages) - 1)):
387
+ self.result_pages.append(
388
+ self.collocations[
389
+ self.results_returned : self.results_returned
390
+ + self.result_count
391
+ ]
392
+ )
393
+ self.results_returned += self.result_count
394
+ return self.result_pages[page]
395
+
396
+ def prev(self, page):
397
+ if page == -1:
398
+ return []
399
+ return self.result_pages[page]
400
+
401
+ class LoadCorpus(threading.Thread):
402
+ def __init__(self, name, model):
403
+ threading.Thread.__init__(self)
404
+ self.model, self.name = model, name
405
+
406
+ def run(self):
407
+ try:
408
+ words = self.model.CORPORA[self.name]()
409
+ from operator import itemgetter
410
+
411
+ text = [w for w in words if len(w) > 2]
412
+ fd = FreqDist(tuple(text[i : i + 2]) for i in range(len(text) - 1))
413
+ vocab = FreqDist(text)
414
+ scored = [
415
+ ((w1, w2), fd[(w1, w2)] ** 3 / (vocab[w1] * vocab[w2]))
416
+ for w1, w2 in fd
417
+ ]
418
+ scored.sort(key=itemgetter(1), reverse=True)
419
+ self.model.collocations = list(map(itemgetter(0), scored))
420
+ self.model.queue.put(CORPUS_LOADED_EVENT)
421
+ except Exception as e:
422
+ print(e)
423
+ self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
424
+
425
+
426
+ # def collocations():
427
+ # colloc_strings = [w1 + ' ' + w2 for w1, w2 in self._collocations[:num]]
428
+
429
+
430
+ def app():
431
+ c = CollocationsView()
432
+ c.mainloop()
433
+
434
+
435
+ if __name__ == "__main__":
436
+ app()
437
+
438
+ __all__ = ["app"]
pipeline/nltk/app/concordance_app.py ADDED
@@ -0,0 +1,709 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Concordance Application
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Sumukh Ghodke <[email protected]>
5
+ # URL: <https://www.nltk.org/>
6
+ # For license information, see LICENSE.TXT
7
+
8
+ import queue as q
9
+ import re
10
+ import threading
11
+ from tkinter import (
12
+ END,
13
+ LEFT,
14
+ SUNKEN,
15
+ Button,
16
+ Entry,
17
+ Frame,
18
+ IntVar,
19
+ Label,
20
+ Menu,
21
+ OptionMenu,
22
+ Scrollbar,
23
+ StringVar,
24
+ Text,
25
+ Tk,
26
+ )
27
+ from tkinter.font import Font
28
+
29
+ from nltk.corpus import (
30
+ alpino,
31
+ brown,
32
+ cess_cat,
33
+ cess_esp,
34
+ floresta,
35
+ indian,
36
+ mac_morpho,
37
+ nps_chat,
38
+ sinica_treebank,
39
+ treebank,
40
+ )
41
+ from nltk.draw.util import ShowText
42
+ from nltk.util import in_idle
43
+
44
+ WORD_OR_TAG = "[^/ ]+"
45
+ BOUNDARY = r"\b"
46
+
47
+ CORPUS_LOADED_EVENT = "<<CL_EVENT>>"
48
+ SEARCH_TERMINATED_EVENT = "<<ST_EVENT>>"
49
+ SEARCH_ERROR_EVENT = "<<SE_EVENT>>"
50
+ ERROR_LOADING_CORPUS_EVENT = "<<ELC_EVENT>>"
51
+
52
+ POLL_INTERVAL = 50
53
+
54
+ # NB All corpora must be specified in a lambda expression so as not to be
55
+ # loaded when the module is imported.
56
+
57
+ _DEFAULT = "English: Brown Corpus (Humor, simplified)"
58
+ _CORPORA = {
59
+ "Catalan: CESS-CAT Corpus (simplified)": lambda: cess_cat.tagged_sents(
60
+ tagset="universal"
61
+ ),
62
+ "English: Brown Corpus": lambda: brown.tagged_sents(),
63
+ "English: Brown Corpus (simplified)": lambda: brown.tagged_sents(
64
+ tagset="universal"
65
+ ),
66
+ "English: Brown Corpus (Press, simplified)": lambda: brown.tagged_sents(
67
+ categories=["news", "editorial", "reviews"], tagset="universal"
68
+ ),
69
+ "English: Brown Corpus (Religion, simplified)": lambda: brown.tagged_sents(
70
+ categories="religion", tagset="universal"
71
+ ),
72
+ "English: Brown Corpus (Learned, simplified)": lambda: brown.tagged_sents(
73
+ categories="learned", tagset="universal"
74
+ ),
75
+ "English: Brown Corpus (Science Fiction, simplified)": lambda: brown.tagged_sents(
76
+ categories="science_fiction", tagset="universal"
77
+ ),
78
+ "English: Brown Corpus (Romance, simplified)": lambda: brown.tagged_sents(
79
+ categories="romance", tagset="universal"
80
+ ),
81
+ "English: Brown Corpus (Humor, simplified)": lambda: brown.tagged_sents(
82
+ categories="humor", tagset="universal"
83
+ ),
84
+ "English: NPS Chat Corpus": lambda: nps_chat.tagged_posts(),
85
+ "English: NPS Chat Corpus (simplified)": lambda: nps_chat.tagged_posts(
86
+ tagset="universal"
87
+ ),
88
+ "English: Wall Street Journal Corpus": lambda: treebank.tagged_sents(),
89
+ "English: Wall Street Journal Corpus (simplified)": lambda: treebank.tagged_sents(
90
+ tagset="universal"
91
+ ),
92
+ "Chinese: Sinica Corpus": lambda: sinica_treebank.tagged_sents(),
93
+ "Chinese: Sinica Corpus (simplified)": lambda: sinica_treebank.tagged_sents(
94
+ tagset="universal"
95
+ ),
96
+ "Dutch: Alpino Corpus": lambda: alpino.tagged_sents(),
97
+ "Dutch: Alpino Corpus (simplified)": lambda: alpino.tagged_sents(
98
+ tagset="universal"
99
+ ),
100
+ "Hindi: Indian Languages Corpus": lambda: indian.tagged_sents(files="hindi.pos"),
101
+ "Hindi: Indian Languages Corpus (simplified)": lambda: indian.tagged_sents(
102
+ files="hindi.pos", tagset="universal"
103
+ ),
104
+ "Portuguese: Floresta Corpus (Portugal)": lambda: floresta.tagged_sents(),
105
+ "Portuguese: Floresta Corpus (Portugal, simplified)": lambda: floresta.tagged_sents(
106
+ tagset="universal"
107
+ ),
108
+ "Portuguese: MAC-MORPHO Corpus (Brazil)": lambda: mac_morpho.tagged_sents(),
109
+ "Portuguese: MAC-MORPHO Corpus (Brazil, simplified)": lambda: mac_morpho.tagged_sents(
110
+ tagset="universal"
111
+ ),
112
+ "Spanish: CESS-ESP Corpus (simplified)": lambda: cess_esp.tagged_sents(
113
+ tagset="universal"
114
+ ),
115
+ }
116
+
117
+
118
+ class ConcordanceSearchView:
119
+ _BACKGROUND_COLOUR = "#FFF" # white
120
+
121
+ # Colour of highlighted results
122
+ _HIGHLIGHT_WORD_COLOUR = "#F00" # red
123
+ _HIGHLIGHT_WORD_TAG = "HL_WRD_TAG"
124
+
125
+ _HIGHLIGHT_LABEL_COLOUR = "#C0C0C0" # dark grey
126
+ _HIGHLIGHT_LABEL_TAG = "HL_LBL_TAG"
127
+
128
+ # Percentage of text left of the scrollbar position
129
+ _FRACTION_LEFT_TEXT = 0.30
130
+
131
+ def __init__(self):
132
+ self.queue = q.Queue()
133
+ self.model = ConcordanceSearchModel(self.queue)
134
+ self.top = Tk()
135
+ self._init_top(self.top)
136
+ self._init_menubar()
137
+ self._init_widgets(self.top)
138
+ self.load_corpus(self.model.DEFAULT_CORPUS)
139
+ self.after = self.top.after(POLL_INTERVAL, self._poll)
140
+
141
+ def _init_top(self, top):
142
+ top.geometry("950x680+50+50")
143
+ top.title("NLTK Concordance Search")
144
+ top.bind("<Control-q>", self.destroy)
145
+ top.protocol("WM_DELETE_WINDOW", self.destroy)
146
+ top.minsize(950, 680)
147
+
148
+ def _init_widgets(self, parent):
149
+ self.main_frame = Frame(
150
+ parent, dict(background=self._BACKGROUND_COLOUR, padx=1, pady=1, border=1)
151
+ )
152
+ self._init_corpus_select(self.main_frame)
153
+ self._init_query_box(self.main_frame)
154
+ self._init_results_box(self.main_frame)
155
+ self._init_paging(self.main_frame)
156
+ self._init_status(self.main_frame)
157
+ self.main_frame.pack(fill="both", expand=True)
158
+
159
+ def _init_menubar(self):
160
+ self._result_size = IntVar(self.top)
161
+ self._cntx_bf_len = IntVar(self.top)
162
+ self._cntx_af_len = IntVar(self.top)
163
+ menubar = Menu(self.top)
164
+
165
+ filemenu = Menu(menubar, tearoff=0, borderwidth=0)
166
+ filemenu.add_command(
167
+ label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-q"
168
+ )
169
+ menubar.add_cascade(label="File", underline=0, menu=filemenu)
170
+
171
+ editmenu = Menu(menubar, tearoff=0)
172
+ rescntmenu = Menu(editmenu, tearoff=0)
173
+ rescntmenu.add_radiobutton(
174
+ label="20",
175
+ variable=self._result_size,
176
+ underline=0,
177
+ value=20,
178
+ command=self.set_result_size,
179
+ )
180
+ rescntmenu.add_radiobutton(
181
+ label="50",
182
+ variable=self._result_size,
183
+ underline=0,
184
+ value=50,
185
+ command=self.set_result_size,
186
+ )
187
+ rescntmenu.add_radiobutton(
188
+ label="100",
189
+ variable=self._result_size,
190
+ underline=0,
191
+ value=100,
192
+ command=self.set_result_size,
193
+ )
194
+ rescntmenu.invoke(1)
195
+ editmenu.add_cascade(label="Result Count", underline=0, menu=rescntmenu)
196
+
197
+ cntxmenu = Menu(editmenu, tearoff=0)
198
+ cntxbfmenu = Menu(cntxmenu, tearoff=0)
199
+ cntxbfmenu.add_radiobutton(
200
+ label="60 characters",
201
+ variable=self._cntx_bf_len,
202
+ underline=0,
203
+ value=60,
204
+ command=self.set_cntx_bf_len,
205
+ )
206
+ cntxbfmenu.add_radiobutton(
207
+ label="80 characters",
208
+ variable=self._cntx_bf_len,
209
+ underline=0,
210
+ value=80,
211
+ command=self.set_cntx_bf_len,
212
+ )
213
+ cntxbfmenu.add_radiobutton(
214
+ label="100 characters",
215
+ variable=self._cntx_bf_len,
216
+ underline=0,
217
+ value=100,
218
+ command=self.set_cntx_bf_len,
219
+ )
220
+ cntxbfmenu.invoke(1)
221
+ cntxmenu.add_cascade(label="Before", underline=0, menu=cntxbfmenu)
222
+
223
+ cntxafmenu = Menu(cntxmenu, tearoff=0)
224
+ cntxafmenu.add_radiobutton(
225
+ label="70 characters",
226
+ variable=self._cntx_af_len,
227
+ underline=0,
228
+ value=70,
229
+ command=self.set_cntx_af_len,
230
+ )
231
+ cntxafmenu.add_radiobutton(
232
+ label="90 characters",
233
+ variable=self._cntx_af_len,
234
+ underline=0,
235
+ value=90,
236
+ command=self.set_cntx_af_len,
237
+ )
238
+ cntxafmenu.add_radiobutton(
239
+ label="110 characters",
240
+ variable=self._cntx_af_len,
241
+ underline=0,
242
+ value=110,
243
+ command=self.set_cntx_af_len,
244
+ )
245
+ cntxafmenu.invoke(1)
246
+ cntxmenu.add_cascade(label="After", underline=0, menu=cntxafmenu)
247
+
248
+ editmenu.add_cascade(label="Context", underline=0, menu=cntxmenu)
249
+
250
+ menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
251
+
252
+ self.top.config(menu=menubar)
253
+
254
+ def set_result_size(self, **kwargs):
255
+ self.model.result_count = self._result_size.get()
256
+
257
+ def set_cntx_af_len(self, **kwargs):
258
+ self._char_after = self._cntx_af_len.get()
259
+
260
+ def set_cntx_bf_len(self, **kwargs):
261
+ self._char_before = self._cntx_bf_len.get()
262
+
263
+ def _init_corpus_select(self, parent):
264
+ innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
265
+ self.var = StringVar(innerframe)
266
+ self.var.set(self.model.DEFAULT_CORPUS)
267
+ Label(
268
+ innerframe,
269
+ justify=LEFT,
270
+ text=" Corpus: ",
271
+ background=self._BACKGROUND_COLOUR,
272
+ padx=2,
273
+ pady=1,
274
+ border=0,
275
+ ).pack(side="left")
276
+
277
+ other_corpora = list(self.model.CORPORA.keys()).remove(
278
+ self.model.DEFAULT_CORPUS
279
+ )
280
+ om = OptionMenu(
281
+ innerframe,
282
+ self.var,
283
+ self.model.DEFAULT_CORPUS,
284
+ command=self.corpus_selected,
285
+ *self.model.non_default_corpora()
286
+ )
287
+ om["borderwidth"] = 0
288
+ om["highlightthickness"] = 1
289
+ om.pack(side="left")
290
+ innerframe.pack(side="top", fill="x", anchor="n")
291
+
292
+ def _init_status(self, parent):
293
+ self.status = Label(
294
+ parent,
295
+ justify=LEFT,
296
+ relief=SUNKEN,
297
+ background=self._BACKGROUND_COLOUR,
298
+ border=0,
299
+ padx=1,
300
+ pady=0,
301
+ )
302
+ self.status.pack(side="top", anchor="sw")
303
+
304
+ def _init_query_box(self, parent):
305
+ innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
306
+ another = Frame(innerframe, background=self._BACKGROUND_COLOUR)
307
+ self.query_box = Entry(another, width=60)
308
+ self.query_box.pack(side="left", fill="x", pady=25, anchor="center")
309
+ self.search_button = Button(
310
+ another,
311
+ text="Search",
312
+ command=self.search,
313
+ borderwidth=1,
314
+ highlightthickness=1,
315
+ )
316
+ self.search_button.pack(side="left", fill="x", pady=25, anchor="center")
317
+ self.query_box.bind("<KeyPress-Return>", self.search_enter_keypress_handler)
318
+ another.pack()
319
+ innerframe.pack(side="top", fill="x", anchor="n")
320
+
321
+ def search_enter_keypress_handler(self, *event):
322
+ self.search()
323
+
324
+ def _init_results_box(self, parent):
325
+ innerframe = Frame(parent)
326
+ i1 = Frame(innerframe)
327
+ i2 = Frame(innerframe)
328
+ vscrollbar = Scrollbar(i1, borderwidth=1)
329
+ hscrollbar = Scrollbar(i2, borderwidth=1, orient="horiz")
330
+ self.results_box = Text(
331
+ i1,
332
+ font=Font(family="courier", size="16"),
333
+ state="disabled",
334
+ borderwidth=1,
335
+ yscrollcommand=vscrollbar.set,
336
+ xscrollcommand=hscrollbar.set,
337
+ wrap="none",
338
+ width="40",
339
+ height="20",
340
+ exportselection=1,
341
+ )
342
+ self.results_box.pack(side="left", fill="both", expand=True)
343
+ self.results_box.tag_config(
344
+ self._HIGHLIGHT_WORD_TAG, foreground=self._HIGHLIGHT_WORD_COLOUR
345
+ )
346
+ self.results_box.tag_config(
347
+ self._HIGHLIGHT_LABEL_TAG, foreground=self._HIGHLIGHT_LABEL_COLOUR
348
+ )
349
+ vscrollbar.pack(side="left", fill="y", anchor="e")
350
+ vscrollbar.config(command=self.results_box.yview)
351
+ hscrollbar.pack(side="left", fill="x", expand=True, anchor="w")
352
+ hscrollbar.config(command=self.results_box.xview)
353
+ # there is no other way of avoiding the overlap of scrollbars while using pack layout manager!!!
354
+ Label(i2, text=" ", background=self._BACKGROUND_COLOUR).pack(
355
+ side="left", anchor="e"
356
+ )
357
+ i1.pack(side="top", fill="both", expand=True, anchor="n")
358
+ i2.pack(side="bottom", fill="x", anchor="s")
359
+ innerframe.pack(side="top", fill="both", expand=True)
360
+
361
+ def _init_paging(self, parent):
362
+ innerframe = Frame(parent, background=self._BACKGROUND_COLOUR)
363
+ self.prev = prev = Button(
364
+ innerframe,
365
+ text="Previous",
366
+ command=self.previous,
367
+ width="10",
368
+ borderwidth=1,
369
+ highlightthickness=1,
370
+ state="disabled",
371
+ )
372
+ prev.pack(side="left", anchor="center")
373
+ self.next = next = Button(
374
+ innerframe,
375
+ text="Next",
376
+ command=self.__next__,
377
+ width="10",
378
+ borderwidth=1,
379
+ highlightthickness=1,
380
+ state="disabled",
381
+ )
382
+ next.pack(side="right", anchor="center")
383
+ innerframe.pack(side="top", fill="y")
384
+ self.current_page = 0
385
+
386
+ def previous(self):
387
+ self.clear_results_box()
388
+ self.freeze_editable()
389
+ self.model.prev(self.current_page - 1)
390
+
391
+ def __next__(self):
392
+ self.clear_results_box()
393
+ self.freeze_editable()
394
+ self.model.next(self.current_page + 1)
395
+
396
+ def about(self, *e):
397
+ ABOUT = "NLTK Concordance Search Demo\n"
398
+ TITLE = "About: NLTK Concordance Search Demo"
399
+ try:
400
+ from tkinter.messagebox import Message
401
+
402
+ Message(message=ABOUT, title=TITLE, parent=self.main_frame).show()
403
+ except:
404
+ ShowText(self.top, TITLE, ABOUT)
405
+
406
+ def _bind_event_handlers(self):
407
+ self.top.bind(CORPUS_LOADED_EVENT, self.handle_corpus_loaded)
408
+ self.top.bind(SEARCH_TERMINATED_EVENT, self.handle_search_terminated)
409
+ self.top.bind(SEARCH_ERROR_EVENT, self.handle_search_error)
410
+ self.top.bind(ERROR_LOADING_CORPUS_EVENT, self.handle_error_loading_corpus)
411
+
412
+ def _poll(self):
413
+ try:
414
+ event = self.queue.get(block=False)
415
+ except q.Empty:
416
+ pass
417
+ else:
418
+ if event == CORPUS_LOADED_EVENT:
419
+ self.handle_corpus_loaded(event)
420
+ elif event == SEARCH_TERMINATED_EVENT:
421
+ self.handle_search_terminated(event)
422
+ elif event == SEARCH_ERROR_EVENT:
423
+ self.handle_search_error(event)
424
+ elif event == ERROR_LOADING_CORPUS_EVENT:
425
+ self.handle_error_loading_corpus(event)
426
+ self.after = self.top.after(POLL_INTERVAL, self._poll)
427
+
428
+ def handle_error_loading_corpus(self, event):
429
+ self.status["text"] = "Error in loading " + self.var.get()
430
+ self.unfreeze_editable()
431
+ self.clear_all()
432
+ self.freeze_editable()
433
+
434
+ def handle_corpus_loaded(self, event):
435
+ self.status["text"] = self.var.get() + " is loaded"
436
+ self.unfreeze_editable()
437
+ self.clear_all()
438
+ self.query_box.focus_set()
439
+
440
+ def handle_search_terminated(self, event):
441
+ # todo: refactor the model such that it is less state sensitive
442
+ results = self.model.get_results()
443
+ self.write_results(results)
444
+ self.status["text"] = ""
445
+ if len(results) == 0:
446
+ self.status["text"] = "No results found for " + self.model.query
447
+ else:
448
+ self.current_page = self.model.last_requested_page
449
+ self.unfreeze_editable()
450
+ self.results_box.xview_moveto(self._FRACTION_LEFT_TEXT)
451
+
452
+ def handle_search_error(self, event):
453
+ self.status["text"] = "Error in query " + self.model.query
454
+ self.unfreeze_editable()
455
+
456
+ def corpus_selected(self, *args):
457
+ new_selection = self.var.get()
458
+ self.load_corpus(new_selection)
459
+
460
+ def load_corpus(self, selection):
461
+ if self.model.selected_corpus != selection:
462
+ self.status["text"] = "Loading " + selection + "..."
463
+ self.freeze_editable()
464
+ self.model.load_corpus(selection)
465
+
466
+ def search(self):
467
+ self.current_page = 0
468
+ self.clear_results_box()
469
+ self.model.reset_results()
470
+ query = self.query_box.get()
471
+ if len(query.strip()) == 0:
472
+ return
473
+ self.status["text"] = "Searching for " + query
474
+ self.freeze_editable()
475
+ self.model.search(query, self.current_page + 1)
476
+
477
+ def write_results(self, results):
478
+ self.results_box["state"] = "normal"
479
+ row = 1
480
+ for each in results:
481
+ sent, pos1, pos2 = each[0].strip(), each[1], each[2]
482
+ if len(sent) != 0:
483
+ if pos1 < self._char_before:
484
+ sent, pos1, pos2 = self.pad(sent, pos1, pos2)
485
+ sentence = sent[pos1 - self._char_before : pos1 + self._char_after]
486
+ if not row == len(results):
487
+ sentence += "\n"
488
+ self.results_box.insert(str(row) + ".0", sentence)
489
+ word_markers, label_markers = self.words_and_labels(sent, pos1, pos2)
490
+ for marker in word_markers:
491
+ self.results_box.tag_add(
492
+ self._HIGHLIGHT_WORD_TAG,
493
+ str(row) + "." + str(marker[0]),
494
+ str(row) + "." + str(marker[1]),
495
+ )
496
+ for marker in label_markers:
497
+ self.results_box.tag_add(
498
+ self._HIGHLIGHT_LABEL_TAG,
499
+ str(row) + "." + str(marker[0]),
500
+ str(row) + "." + str(marker[1]),
501
+ )
502
+ row += 1
503
+ self.results_box["state"] = "disabled"
504
+
505
+ def words_and_labels(self, sentence, pos1, pos2):
506
+ search_exp = sentence[pos1:pos2]
507
+ words, labels = [], []
508
+ labeled_words = search_exp.split(" ")
509
+ index = 0
510
+ for each in labeled_words:
511
+ if each == "":
512
+ index += 1
513
+ else:
514
+ word, label = each.split("/")
515
+ words.append(
516
+ (self._char_before + index, self._char_before + index + len(word))
517
+ )
518
+ index += len(word) + 1
519
+ labels.append(
520
+ (self._char_before + index, self._char_before + index + len(label))
521
+ )
522
+ index += len(label)
523
+ index += 1
524
+ return words, labels
525
+
526
+ def pad(self, sent, hstart, hend):
527
+ if hstart >= self._char_before:
528
+ return sent, hstart, hend
529
+ d = self._char_before - hstart
530
+ sent = "".join([" "] * d) + sent
531
+ return sent, hstart + d, hend + d
532
+
533
+ def destroy(self, *e):
534
+ if self.top is None:
535
+ return
536
+ self.top.after_cancel(self.after)
537
+ self.top.destroy()
538
+ self.top = None
539
+
540
+ def clear_all(self):
541
+ self.query_box.delete(0, END)
542
+ self.model.reset_query()
543
+ self.clear_results_box()
544
+
545
+ def clear_results_box(self):
546
+ self.results_box["state"] = "normal"
547
+ self.results_box.delete("1.0", END)
548
+ self.results_box["state"] = "disabled"
549
+
550
+ def freeze_editable(self):
551
+ self.query_box["state"] = "disabled"
552
+ self.search_button["state"] = "disabled"
553
+ self.prev["state"] = "disabled"
554
+ self.next["state"] = "disabled"
555
+
556
+ def unfreeze_editable(self):
557
+ self.query_box["state"] = "normal"
558
+ self.search_button["state"] = "normal"
559
+ self.set_paging_button_states()
560
+
561
+ def set_paging_button_states(self):
562
+ if self.current_page == 0 or self.current_page == 1:
563
+ self.prev["state"] = "disabled"
564
+ else:
565
+ self.prev["state"] = "normal"
566
+ if self.model.has_more_pages(self.current_page):
567
+ self.next["state"] = "normal"
568
+ else:
569
+ self.next["state"] = "disabled"
570
+
571
+ def fire_event(self, event):
572
+ # Firing an event so that rendering of widgets happen in the mainloop thread
573
+ self.top.event_generate(event, when="tail")
574
+
575
+ def mainloop(self, *args, **kwargs):
576
+ if in_idle():
577
+ return
578
+ self.top.mainloop(*args, **kwargs)
579
+
580
+
581
+ class ConcordanceSearchModel:
582
+ def __init__(self, queue):
583
+ self.queue = queue
584
+ self.CORPORA = _CORPORA
585
+ self.DEFAULT_CORPUS = _DEFAULT
586
+ self.selected_corpus = None
587
+ self.reset_query()
588
+ self.reset_results()
589
+ self.result_count = None
590
+ self.last_sent_searched = 0
591
+
592
+ def non_default_corpora(self):
593
+ copy = []
594
+ copy.extend(list(self.CORPORA.keys()))
595
+ copy.remove(self.DEFAULT_CORPUS)
596
+ copy.sort()
597
+ return copy
598
+
599
+ def load_corpus(self, name):
600
+ self.selected_corpus = name
601
+ self.tagged_sents = []
602
+ runner_thread = self.LoadCorpus(name, self)
603
+ runner_thread.start()
604
+
605
+ def search(self, query, page):
606
+ self.query = query
607
+ self.last_requested_page = page
608
+ self.SearchCorpus(self, page, self.result_count).start()
609
+
610
+ def next(self, page):
611
+ self.last_requested_page = page
612
+ if len(self.results) < page:
613
+ self.search(self.query, page)
614
+ else:
615
+ self.queue.put(SEARCH_TERMINATED_EVENT)
616
+
617
+ def prev(self, page):
618
+ self.last_requested_page = page
619
+ self.queue.put(SEARCH_TERMINATED_EVENT)
620
+
621
+ def reset_results(self):
622
+ self.last_sent_searched = 0
623
+ self.results = []
624
+ self.last_page = None
625
+
626
+ def reset_query(self):
627
+ self.query = None
628
+
629
+ def set_results(self, page, resultset):
630
+ self.results.insert(page - 1, resultset)
631
+
632
+ def get_results(self):
633
+ return self.results[self.last_requested_page - 1]
634
+
635
+ def has_more_pages(self, page):
636
+ if self.results == [] or self.results[0] == []:
637
+ return False
638
+ if self.last_page is None:
639
+ return True
640
+ return page < self.last_page
641
+
642
+ class LoadCorpus(threading.Thread):
643
+ def __init__(self, name, model):
644
+ threading.Thread.__init__(self)
645
+ self.model, self.name = model, name
646
+
647
+ def run(self):
648
+ try:
649
+ ts = self.model.CORPORA[self.name]()
650
+ self.model.tagged_sents = [
651
+ " ".join(w + "/" + t for (w, t) in sent) for sent in ts
652
+ ]
653
+ self.model.queue.put(CORPUS_LOADED_EVENT)
654
+ except Exception as e:
655
+ print(e)
656
+ self.model.queue.put(ERROR_LOADING_CORPUS_EVENT)
657
+
658
+ class SearchCorpus(threading.Thread):
659
+ def __init__(self, model, page, count):
660
+ self.model, self.count, self.page = model, count, page
661
+ threading.Thread.__init__(self)
662
+
663
+ def run(self):
664
+ q = self.processed_query()
665
+ sent_pos, i, sent_count = [], 0, 0
666
+ for sent in self.model.tagged_sents[self.model.last_sent_searched :]:
667
+ try:
668
+ m = re.search(q, sent)
669
+ except re.error:
670
+ self.model.reset_results()
671
+ self.model.queue.put(SEARCH_ERROR_EVENT)
672
+ return
673
+ if m:
674
+ sent_pos.append((sent, m.start(), m.end()))
675
+ i += 1
676
+ if i > self.count:
677
+ self.model.last_sent_searched += sent_count - 1
678
+ break
679
+ sent_count += 1
680
+ if self.count >= len(sent_pos):
681
+ self.model.last_sent_searched += sent_count - 1
682
+ self.model.last_page = self.page
683
+ self.model.set_results(self.page, sent_pos)
684
+ else:
685
+ self.model.set_results(self.page, sent_pos[:-1])
686
+ self.model.queue.put(SEARCH_TERMINATED_EVENT)
687
+
688
+ def processed_query(self):
689
+ new = []
690
+ for term in self.model.query.split():
691
+ term = re.sub(r"\.", r"[^/ ]", term)
692
+ if re.match("[A-Z]+$", term):
693
+ new.append(BOUNDARY + WORD_OR_TAG + "/" + term + BOUNDARY)
694
+ elif "/" in term:
695
+ new.append(BOUNDARY + term + BOUNDARY)
696
+ else:
697
+ new.append(BOUNDARY + term + "/" + WORD_OR_TAG + BOUNDARY)
698
+ return " ".join(new)
699
+
700
+
701
+ def app():
702
+ d = ConcordanceSearchView()
703
+ d.mainloop()
704
+
705
+
706
+ if __name__ == "__main__":
707
+ app()
708
+
709
+ __all__ = ["app"]
pipeline/nltk/app/nemo_app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Finding (and Replacing) Nemo, Version 1.1, Aristide Grange 2006/06/06
2
+ # https://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/496783
3
+
4
+ """
5
+ Finding (and Replacing) Nemo
6
+
7
+ Instant Regular Expressions
8
+ Created by Aristide Grange
9
+ """
10
+ import itertools
11
+ import re
12
+ from tkinter import SEL_FIRST, SEL_LAST, Frame, Label, PhotoImage, Scrollbar, Text, Tk
13
+
14
+ windowTitle = "Finding (and Replacing) Nemo"
15
+ initialFind = r"n(.*?)e(.*?)m(.*?)o"
16
+ initialRepl = r"M\1A\2K\3I"
17
+ initialText = """\
18
+ Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
19
+ Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.
20
+ Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.
21
+ Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
22
+ """
23
+ images = {
24
+ "FIND": "R0lGODlhMAAiAPcAMf/////37//35//n1v97Off///f/9/f37/fexvfOvfeEQvd7QvdrQvdrKfdaKfdSMfdSIe/v9+/v7+/v5+/n3u/e1u/Wxu/Gre+1lO+tnO+thO+Ua+97Y+97Oe97Me9rOe9rMe9jOe9jMe9jIe9aMefe5+fe3ufezuece+eEWudzQudaIedSIedKMedKIedCKedCId7e1t7Wzt7Oxt7Gvd69vd69rd61pd6ljN6UjN6Ue96EY95zY95rUt5rQt5jMd5SId5KIdbn59be3tbGztbGvda1rdaEa9Z7a9Z7WtZzQtZzOdZzMdZjMdZaQtZSOdZSMdZKMdZCKdZCGNY5Ic7W1s7Oxs7Gtc69xs69tc69rc6tpc6llM6clM6cjM6Ue86EY85zWs5rSs5SKc5KKc5KGMa1tcatrcalvcalnMaUpcZ7c8ZzMcZrUsZrOcZrMcZaQsZSOcZSMcZKMcZCKcZCGMYxIcYxGL3Gxr21tb21rb2lpb2crb2cjL2UnL2UlL2UhL2Ec717Wr17Ur1zWr1rMb1jUr1KMb1KIb1CIb0xGLWlrbWlpbWcnLWEe7V7c7VzY7VzUrVSKbVKMbVCMbVCIbU5KbUxIbUxEK2lta2lpa2clK2UjK2MnK2MlK2Ea617e61za61rY61rMa1jSq1aUq1aSq1SQq1KKa0xEKWlnKWcnKWUnKWUhKWMjKWEa6Vza6VrWqVjMaVaUqVaKaVSMaVCMaU5KaUxIaUxGJyclJyMe5yElJyEhJx7e5x7c5xrOZxaQpxSOZxKQpw5IZSMhJSEjJR7c5Rre5RrY5RrUpRSQpRSKZRCOZRCKZQxKZQxIYyEhIx7hIxza4xzY4xrc4xjUoxaa4xaUoxSSoxKQoxCMYw5GIR7c4Rzc4Rre4RjY4RjWoRaa4RSWoRSUoRSMYRKQoRCOYQ5KYQxIXtra3taY3taSntKOXtCMXtCKXNCMXM5MXMxIWtSUmtKSmtKQmtCOWs5MWs5KWs5IWNCKWMxIVIxKUIQCDkhGAAAACH+AS4ALAAAAAAwACIAAAj/AAEIHEiwoMGDCBMqXMiwoUOHMqxIeEiRoZVp7cpZ29WrF4WKIAd208dGAQEVbiTVChUjZMU9+pYQmPmBZpxgvVw+nDdKwQICNVcIXQEkTgKdDdUJ+/nggVAXK1xI3TEA6UIr2uJ8iBqka1cXXTlkqGoVYRZ7iLyqBSs0iiEtZQVKiDGxBI1u3NR6lUpGDKg8MSgEQCphU7Z22vhg0dILXRCpYLuSCcYJT4wqXASBQaBzU7klHxC127OHD7ZDJFpERqRt0x5OnwQpmZmCLEhrbgg4WIHO1RY+nbQ9WRGEDJlmnXwJ+9FBgXMCIzYMVijBBgYMFxIMqJBMSc0Ht7qh/+Gjpte2rnYsYeNlasWIBgQ6yCewIoPCCp/cyP/wgUGbXVu0QcADZNBDnh98gHMLGXYQUw02w61QU3wdbNWDbQVVIIhMMwFF1DaZiPLBAy7E04kafrjSizaK3LFNNc0AAYRQDsAHHQlJ2IDQJ2zE1+EKDjiAijShkECCC8Qgw4cr7ZgyzC2WaHPNLWWoNeNWPiRAw0QFWQFMhz8C+QQ20yAiVSrY+MGOJCsccsst2GCzoHFxxEGGC+8hgs0MB2kyCpgzrUDCbs1Es41UdtATHFFkWELMOtsoQsYcgvRRQw5RSDgGOjZMR1AvPQIq6KCo9AKOJWDd48owQlHR4DXEKP9iyRrK+DNNBTu4RwIPFeTAGUG7hAomkA84gEg1m6ADljy9PBKGGJY4ig0xlsTBRSn98FOFDUC8pwQOPkgHbCGAzhTkA850s0c7j6Hjix9+gBIrMXLeAccWXUCyiRBcBEECdEJ98KtAqtBCYQc/OvDENnl4gYpUxISCIjjzylkGGV9okYUVNogRhAOBuuAEhjG08wOgDYzAgA5bCjIoCe5uwUk80RKTTSppPREGGGCIISOQ9AXBg6cC6WIywvCpoMHAocRBwhP4bHLFLujYkV42xNxBRhAyGrc113EgYtRBerDDDHMoDCyQEL5sE083EkgwQyBhxGFHMM206DUixGxmE0wssbQjCQ4JCaFKFwgQTVAVVhQUwAVPIFJKrHfYYRwi6OCDzzuIJIFhXAD0EccPsYRiSyqKSDpFcWSMIcZRoBMkQyA2BGZDIKSYcggih8TRRg4VxM5QABVYYLxgwiev/PLMCxQQADs=",
25
+ "find": "R0lGODlhMAAiAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OSkpKRgYGAAAAAAAAAAAAAAAAAAAACH+AS4ALAAAAAAwACIAAAX/ICCOZGmeaKquY2AGLiuvMCAUBuHWc48Kh0iFInEYCb4kSQCxPBiMxkMigRQEgJiSFVBYHNGG0RiZOHjblWAiiY4fkDhEYoBp06dAWfyAQyKAgAwDaHgnB0RwgYASgQ0IhDuGJDAIFhMRVFSLEX8QCJJ4AQM5AgQHTZqqjBAOCQQEkWkCDRMUFQsICQ4Vm5maEwwHOAsPDTpKMAsUDlO4CssTcb+2DAp8YGCyNFoCEsZwFQ3QDRTTVBRS0g1QbgsCd5QAAwgIBwYFAwStzQ8UEdCKVchky0yVBw7YuXkAKt4IAg74vXHVagqFBRgXSCAyYWAVCH0SNhDTitCJfSL5/4RbAPKPhQYYjVCYYAvCP0BxEDaD8CheAAHNwqh8MMGPSwgLeJWhwHSjqkYI+xg4MMCEgQjtRvZ7UAYCpghMF7CxONOWJkYR+rCpY4JlVpVxKDwYWEactKW9mhYRtqCTgwgWEMArERSK1j5q//6T8KXonFsShpiJkAECgQYVjykooCVA0JGHEWNiYCHThTFeb3UkoiCCBgwGEKQ1kuAJlhFwhA71h5SukwUM5qqeCSGBgicEWkfNiWSERtBad4JNIBaQBaQah1ToyGZBAnsIuIJs1qnqiAIVjIE2gnAB1T5x0icgzXT79ipgMOOEH6HBbREBMJCeGEY08IoLAkzB1YYFwjxwSUGSNULQJnNUwRYlCcyEkALIxECAP9cNMMABYpRhy3ZsSLDaR70oUAiABGCkAxowCGCAAfDYIQACXoElGRsdXWDBdg2Y90IWktDYGYAB9PWHP0PMdFZaF07SQgAFNDAMAQg0QA1UC8xoZQl22JGFPgWkOUCOL1pZQyhjxinnnCWEAAA7",
26
+ "REPL": "R0lGODlhMAAjAPcAMf/////3//+lOf+UKf+MEPf///f39/f35/fv7/ecQvecOfecKfeUIfeUGPeUEPeUCPeMAO/37+/v9+/v3u/n3u/n1u+9jO+9c++1hO+ta++tY++tWu+tUu+tSu+lUu+lQu+lMe+UMe+UKe+UGO+UEO+UAO+MCOfv5+fvxufn7+fn5+fnzue9lOe9c+e1jOe1e+e1c+e1a+etWuetUuelQuecOeeUUueUCN7e597e3t7e1t7ezt7evd7Wzt7Oxt7Ovd7Otd7Opd7OnN7Gtd7Gpd69lN61hN6ta96lStbextberdbW3tbWztbWxtbOvdbOrda1hNalUtaECM7W1s7Ozs7Oxs7Otc7Gxs7Gvc69tc69rc69pc61jM6lc8bWlMbOvcbGxsbGpca9tca9pca1nMaMAL3OhL3Gtb21vb21tb2tpb2tnL2tlLW9tbW9pbW9e7W1pbWtjLWcKa21nK2tra2tnK2tlK2lpa2llK2ljK2le6WlnKWljKWUe6WUc6WUY5y1QpyclJycjJychJyUc5yMY5StY5SUe5SMhJSMe5SMc5SMWpSEa5SESoyUe4yMhIyEY4SlKYScWoSMe4SEe4SEa4R7c4R7Y3uMY3uEe3t7e3t7c3tza3tzY3trKXtjIXOcAHOUMXOEY3Nzc3NzWnNrSmulCGuUMWuMGGtzWmtrY2taMWtaGGOUOWOMAGNzUmNjWmNjSmNaUmNaQmNaOWNaIWNSCFqcAFpjUlpSMVpSIVpSEFpKKVKMAFJSUlJSSlJSMVJKMVJKGFJKAFI5CEqUAEqEAEpzQkpKIUpCQkpCGEpCAEo5EEoxAEJjOUJCOUJCAEI5IUIxADl7ADlaITlCOTkxMTkxKTkxEDkhADFzADFrGDE5OTExADEpEClrCCkxKSkpKSkpISkpACkhCCkhACkYACFzACFrACEhCCEYGBhjEBhjABghABgYCBgYABgQEBgQABAQABAIAAhjAAhSAAhKAAgIEAgICABaAABCAAAhAAAQAAAIAAAAAAAAACH+AS4ALAAAAAAwACMAAAj/AAEIHEiwoMGDCBMqXMiwocOHAA4cgEixIIIJO3JMmAjADIqKFU/8MHIkg5EgYXx4iaTkI0iHE6wE2TCggYILQayEAgXIy8uGCKz8sDCAQAMRG3iEcXULlJkJPwli3OFjh9UdYYLE6NBhA04UXHoVA2XoTZgfPKBWlOBDphAWOdfMcfMDLloeO3hIMjbWVCQ5Fn6E2UFxgpsgFjYIEBADrZU6luqEEfqjTqpt54z1uuWqTIcgWAk7PECGzIUQDRosDmxlUrVJkwQJkqVuX71v06YZcyUlROAdbnLAJKPFyAYFAhoMwFlnEh0rWkpz8raPHm7dqKKc/KFFkBUrVn1M/ziBcEIeLUEQI8/AYk0i9Be4sqjsrN66c9/OnbobhpR3HkIUoZ0WVnBE0AGLFKKFD0HAFUQe77HQgQI1hRBDEHMcY0899bBzihZuCPILJD8EccEGGzwAQhFaUHHQH82sUkgeNHISDBk8WCCCcsqFUEQWmOyzjz3sUGNNOO5Y48YOEgowAAQhnBScQV00k82V47jzjy9CXZBcjziFoco//4CDiSOyhPMPLkJZkEBqJmRQxA9uZGEQD8Ncmc044/zzDF2IZQBCCDYE8QMZz/iiCSx0neHGI7BIhhhNn+1gxRpokEcQAp7seWU7/PwTyxqG/iCEEVzQmUombnDRxRExzP9nBR2PCKLFD3UJwcMPa/SRqUGNWJmNOVn+M44ukMRB4KGcWDNLVhuUMEIJAlzwA3DJBHMJIXm4sQYhqyxCRQQGLSIsn1qac2UzysQSyzX/hLMGD0F0IMCODYAQBA9W/PKPOcRiw0wzwxTiokF9dLMnuv/Mo+fCZF7jBr0xbDDCACWEYKgb1vzjDp/jZNOMLX0IZxAKq2TZTjtaOjwOsXyG+s8sZJTIQsUdIGHoJPf8w487QI/TDSt5mGwQFZxc406o8HiDJchk/ltLHpSlJwSvz5DpTjvmuGNOM57koelBOaAhiCaaPBLL0wwbm003peRBnBZqJMJL1ECz/HXYYx/NdAIOOVCxQyLorswymU93o0wuwfAiTDNR/xz0MLXU0XdCE+UwSTRZAq2lsSATu+4wkGvt+TjNzPLrQyegAUku2Hij5cd8LhxyM8QIg4w18HgcdC6BTBFSDmfQqsovttveDcG7lFLHI75cE841sARCxeWsnxC4G9HADPK6ywzDCRqBo0EHHWhMgT1IJzziNci1N7PMKnSYfML96/90AiJKey/0KtbLX1QK0rrNnQ541xugQ7SHhkXBghN0SKACWRc4KlAhBwKcIOYymJCAAAA7",
27
+ "repl": "R0lGODlhMAAjAPQAMf////f39+/v7+fn597e3tbW1s7OzsbGxr29vbW1ta2traWlpZycnJSUlIyMjISEhHt7e3Nzc2tra2NjY1paWlJSUkpKSkJCQjk5OTExMSkpKSEhIRgYGBAQEAgICAAAACH+AS4ALAAAAAAwACMAAAX/ICCOZGmeaKqubOu+gCDANBkIQ1EMQhAghFptYEAkEgjEwXBo7ISvweGgWCwUysPjwTgEoCafTySYIhYMxgLBjEQgCULvCw0QdAZdoVhUIJUFChISEAxYeQM1N1OMTAp+UwZ5eA4TEhFbDWYFdC4ECVMJjwl5BwsQa0umEhUVlhESDgqlBp0rAn5nVpBMDxeZDRQbHBgWFBSWDgtLBnFjKwRYCI9VqQsPs0YKEcMXFq0UEalFDWx4BAO2IwPjppAKDkrTWKYUGd7fEJJFEZpM00cOzCgh4EE8SaoWxKNixQooBRMyZMBwAYIRBhUgLDGS4MoBJeoANMhAgQsaCRZm/5lqaCUJhA4cNHjDoKEDBlJUHqkBlYBTiQUZNGjYMMxDhY3VWk6R4MEDBoMUak5AqoYBqANIBo4wcGGDUKIeLlzVZmWJggsVIkwAZaQSA3kdZzlKkIiEAAlDvW5oOkEBs488JTw44oeUIwdvVTFTUK7uiAAPgubt8GFDhQepqETAQCFU1UMGzlqAgFhUsAcCS0AO6lUDhw8xNRSbENGDhgWSHjWUe6ACbKITizmopZoBa6KvOwj9uuHDhwxyj3xekgDDhw5EvWKo0IB4iQLCOCC/njc7ZQ8UeGvza+ABZZgcxJNc4FO1gc0cOsCUrHevc8tdIMTIAhc4F198G2Qwwd8CBIQUAwEINABBBJUwR9R5wElgVRLwWODBBx4cGB8GEzDQIAo33CGJA8gh+JoH/clUgQU0YvDhdfmJdwEFC6Sjgg8yEPAABsPkh2F22cl2AQbn6QdTghTQ5eAJAQyQAAQV0MSBB9gRVZ4GE1mw5JZOAmiAVi1UWcAZDrDyZXYTeaOhA/bIVuIBPtKQ4h7ViYekUPdcEAEbzTzCRp5CADmAAwj+ORGPBcgwAAHo9ABGCYtm0ChwFHShlRiXhmHlkAcCiOeUodqQw5W0oXLAiamy4MOkjOyAaqxUymApDCEAADs=",
28
+ }
29
+ colors = ["#FF7B39", "#80F121"]
30
+ emphColors = ["#DAFC33", "#F42548"]
31
+ fieldParams = {
32
+ "height": 3,
33
+ "width": 70,
34
+ "font": ("monaco", 14),
35
+ "highlightthickness": 0,
36
+ "borderwidth": 0,
37
+ "background": "white",
38
+ }
39
+ textParams = {
40
+ "bg": "#F7E0D4",
41
+ "fg": "#2321F1",
42
+ "highlightthickness": 0,
43
+ "width": 1,
44
+ "height": 10,
45
+ "font": ("verdana", 16),
46
+ "wrap": "word",
47
+ }
48
+
49
+
50
+ class Zone:
51
+ def __init__(self, image, initialField, initialText):
52
+ frm = Frame(root)
53
+ frm.config(background="white")
54
+ self.image = PhotoImage(format="gif", data=images[image.upper()])
55
+ self.imageDimmed = PhotoImage(format="gif", data=images[image])
56
+ self.img = Label(frm)
57
+ self.img.config(borderwidth=0)
58
+ self.img.pack(side="left")
59
+ self.fld = Text(frm, **fieldParams)
60
+ self.initScrollText(frm, self.fld, initialField)
61
+ frm = Frame(root)
62
+ self.txt = Text(frm, **textParams)
63
+ self.initScrollText(frm, self.txt, initialText)
64
+ for i in range(2):
65
+ self.txt.tag_config(colors[i], background=colors[i])
66
+ self.txt.tag_config("emph" + colors[i], foreground=emphColors[i])
67
+
68
+ def initScrollText(self, frm, txt, contents):
69
+ scl = Scrollbar(frm)
70
+ scl.config(command=txt.yview)
71
+ scl.pack(side="right", fill="y")
72
+ txt.pack(side="left", expand=True, fill="x")
73
+ txt.config(yscrollcommand=scl.set)
74
+ txt.insert("1.0", contents)
75
+ frm.pack(fill="x")
76
+ Frame(height=2, bd=1, relief="ridge").pack(fill="x")
77
+
78
+ def refresh(self):
79
+ self.colorCycle = itertools.cycle(colors)
80
+ try:
81
+ self.substitute()
82
+ self.img.config(image=self.image)
83
+ except re.error:
84
+ self.img.config(image=self.imageDimmed)
85
+
86
+
87
+ class FindZone(Zone):
88
+ def addTags(self, m):
89
+ color = next(self.colorCycle)
90
+ self.txt.tag_add(color, "1.0+%sc" % m.start(), "1.0+%sc" % m.end())
91
+ try:
92
+ self.txt.tag_add(
93
+ "emph" + color, "1.0+%sc" % m.start("emph"), "1.0+%sc" % m.end("emph")
94
+ )
95
+ except:
96
+ pass
97
+
98
+ def substitute(self, *args):
99
+ for color in colors:
100
+ self.txt.tag_remove(color, "1.0", "end")
101
+ self.txt.tag_remove("emph" + color, "1.0", "end")
102
+ self.rex = re.compile("") # default value in case of malformed regexp
103
+ self.rex = re.compile(self.fld.get("1.0", "end")[:-1], re.MULTILINE)
104
+ try:
105
+ re.compile("(?P<emph>%s)" % self.fld.get(SEL_FIRST, SEL_LAST))
106
+ self.rexSel = re.compile(
107
+ "%s(?P<emph>%s)%s"
108
+ % (
109
+ self.fld.get("1.0", SEL_FIRST),
110
+ self.fld.get(SEL_FIRST, SEL_LAST),
111
+ self.fld.get(SEL_LAST, "end")[:-1],
112
+ ),
113
+ re.MULTILINE,
114
+ )
115
+ except:
116
+ self.rexSel = self.rex
117
+ self.rexSel.sub(self.addTags, self.txt.get("1.0", "end"))
118
+
119
+
120
+ class ReplaceZone(Zone):
121
+ def addTags(self, m):
122
+ s = sz.rex.sub(self.repl, m.group())
123
+ self.txt.delete(
124
+ "1.0+%sc" % (m.start() + self.diff), "1.0+%sc" % (m.end() + self.diff)
125
+ )
126
+ self.txt.insert("1.0+%sc" % (m.start() + self.diff), s, next(self.colorCycle))
127
+ self.diff += len(s) - (m.end() - m.start())
128
+
129
+ def substitute(self):
130
+ self.txt.delete("1.0", "end")
131
+ self.txt.insert("1.0", sz.txt.get("1.0", "end")[:-1])
132
+ self.diff = 0
133
+ self.repl = rex0.sub(r"\\g<\1>", self.fld.get("1.0", "end")[:-1])
134
+ sz.rex.sub(self.addTags, sz.txt.get("1.0", "end")[:-1])
135
+
136
+
137
+ def launchRefresh(_):
138
+ sz.fld.after_idle(sz.refresh)
139
+ rz.fld.after_idle(rz.refresh)
140
+
141
+
142
+ def app():
143
+ global root, sz, rz, rex0
144
+ root = Tk()
145
+ root.resizable(height=False, width=True)
146
+ root.title(windowTitle)
147
+ root.minsize(width=250, height=0)
148
+ sz = FindZone("find", initialFind, initialText)
149
+ sz.fld.bind("<Button-1>", launchRefresh)
150
+ sz.fld.bind("<ButtonRelease-1>", launchRefresh)
151
+ sz.fld.bind("<B1-Motion>", launchRefresh)
152
+ sz.rexSel = re.compile("")
153
+ rz = ReplaceZone("repl", initialRepl, "")
154
+ rex0 = re.compile(r"(?<!\\)\\([0-9]+)")
155
+ root.bind_all("<Key>", launchRefresh)
156
+ launchRefresh(None)
157
+ root.mainloop()
158
+
159
+
160
+ if __name__ == "__main__":
161
+ app()
162
+
163
+ __all__ = ["app"]
pipeline/nltk/app/rdparser_app.py ADDED
@@ -0,0 +1,1052 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Recursive Descent Parser Application
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Edward Loper <[email protected]>
5
+ # URL: <https://www.nltk.org/>
6
+ # For license information, see LICENSE.TXT
7
+
8
+ """
9
+ A graphical tool for exploring the recursive descent parser.
10
+
11
+ The recursive descent parser maintains a tree, which records the
12
+ structure of the portion of the text that has been parsed. It uses
13
+ CFG productions to expand the fringe of the tree, and matches its
14
+ leaves against the text. Initially, the tree contains the start
15
+ symbol ("S"). It is shown in the main canvas, to the right of the
16
+ list of available expansions.
17
+
18
+ The parser builds up a tree structure for the text using three
19
+ operations:
20
+
21
+ - "expand" uses a CFG production to add children to a node on the
22
+ fringe of the tree.
23
+ - "match" compares a leaf in the tree to a text token.
24
+ - "backtrack" returns the tree to its state before the most recent
25
+ expand or match operation.
26
+
27
+ The parser maintains a list of tree locations called a "frontier" to
28
+ remember which nodes have not yet been expanded and which leaves have
29
+ not yet been matched against the text. The leftmost frontier node is
30
+ shown in green, and the other frontier nodes are shown in blue. The
31
+ parser always performs expand and match operations on the leftmost
32
+ element of the frontier.
33
+
34
+ You can control the parser's operation by using the "expand," "match,"
35
+ and "backtrack" buttons; or you can use the "step" button to let the
36
+ parser automatically decide which operation to apply. The parser uses
37
+ the following rules to decide which operation to apply:
38
+
39
+ - If the leftmost frontier element is a token, try matching it.
40
+ - If the leftmost frontier element is a node, try expanding it with
41
+ the first untried expansion.
42
+ - Otherwise, backtrack.
43
+
44
+ The "expand" button applies the untried expansion whose CFG production
45
+ is listed earliest in the grammar. To manually choose which expansion
46
+ to apply, click on a CFG production from the list of available
47
+ expansions, on the left side of the main window.
48
+
49
+ The "autostep" button will let the parser continue applying
50
+ applications to the tree until it reaches a complete parse. You can
51
+ cancel an autostep in progress at any time by clicking on the
52
+ "autostep" button again.
53
+
54
+ Keyboard Shortcuts::
55
+ [Space]\t Perform the next expand, match, or backtrack operation
56
+ [a]\t Step through operations until the next complete parse
57
+ [e]\t Perform an expand operation
58
+ [m]\t Perform a match operation
59
+ [b]\t Perform a backtrack operation
60
+ [Delete]\t Reset the parser
61
+ [g]\t Show/hide available expansions list
62
+ [h]\t Help
63
+ [Ctrl-p]\t Print
64
+ [q]\t Quit
65
+ """
66
+
67
+ from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
68
+ from tkinter.font import Font
69
+
70
+ from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
71
+ from nltk.draw.util import CanvasFrame, EntryDialog, ShowText, TextWidget
72
+ from nltk.parse import SteppingRecursiveDescentParser
73
+ from nltk.tree import Tree
74
+ from nltk.util import in_idle
75
+
76
+
77
+ class RecursiveDescentApp:
78
+ """
79
+ A graphical tool for exploring the recursive descent parser. The tool
80
+ displays the parser's tree and the remaining text, and allows the
81
+ user to control the parser's operation. In particular, the user
82
+ can expand subtrees on the frontier, match tokens on the frontier
83
+ against the text, and backtrack. A "step" button simply steps
84
+ through the parsing process, performing the operations that
85
+ ``RecursiveDescentParser`` would use.
86
+ """
87
+
88
+ def __init__(self, grammar, sent, trace=0):
89
+ self._sent = sent
90
+ self._parser = SteppingRecursiveDescentParser(grammar, trace)
91
+
92
+ # Set up the main window.
93
+ self._top = Tk()
94
+ self._top.title("Recursive Descent Parser Application")
95
+
96
+ # Set up key bindings.
97
+ self._init_bindings()
98
+
99
+ # Initialize the fonts.
100
+ self._init_fonts(self._top)
101
+
102
+ # Animations. animating_lock is a lock to prevent the demo
103
+ # from performing new operations while it's animating.
104
+ self._animation_frames = IntVar(self._top)
105
+ self._animation_frames.set(5)
106
+ self._animating_lock = 0
107
+ self._autostep = 0
108
+
109
+ # The user can hide the grammar.
110
+ self._show_grammar = IntVar(self._top)
111
+ self._show_grammar.set(1)
112
+
113
+ # Create the basic frames.
114
+ self._init_menubar(self._top)
115
+ self._init_buttons(self._top)
116
+ self._init_feedback(self._top)
117
+ self._init_grammar(self._top)
118
+ self._init_canvas(self._top)
119
+
120
+ # Initialize the parser.
121
+ self._parser.initialize(self._sent)
122
+
123
+ # Resize callback
124
+ self._canvas.bind("<Configure>", self._configure)
125
+
126
+ #########################################
127
+ ## Initialization Helpers
128
+ #########################################
129
+
130
+ def _init_fonts(self, root):
131
+ # See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
132
+ self._sysfont = Font(font=Button()["font"])
133
+ root.option_add("*Font", self._sysfont)
134
+
135
+ # TWhat's our font size (default=same as sysfont)
136
+ self._size = IntVar(root)
137
+ self._size.set(self._sysfont.cget("size"))
138
+
139
+ self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
140
+ self._font = Font(family="helvetica", size=self._size.get())
141
+ if self._size.get() < 0:
142
+ big = self._size.get() - 2
143
+ else:
144
+ big = self._size.get() + 2
145
+ self._bigfont = Font(family="helvetica", weight="bold", size=big)
146
+
147
+ def _init_grammar(self, parent):
148
+ # Grammar view.
149
+ self._prodframe = listframe = Frame(parent)
150
+ self._prodframe.pack(fill="both", side="left", padx=2)
151
+ self._prodlist_label = Label(
152
+ self._prodframe, font=self._boldfont, text="Available Expansions"
153
+ )
154
+ self._prodlist_label.pack()
155
+ self._prodlist = Listbox(
156
+ self._prodframe,
157
+ selectmode="single",
158
+ relief="groove",
159
+ background="white",
160
+ foreground="#909090",
161
+ font=self._font,
162
+ selectforeground="#004040",
163
+ selectbackground="#c0f0c0",
164
+ )
165
+
166
+ self._prodlist.pack(side="right", fill="both", expand=1)
167
+
168
+ self._productions = list(self._parser.grammar().productions())
169
+ for production in self._productions:
170
+ self._prodlist.insert("end", (" %s" % production))
171
+ self._prodlist.config(height=min(len(self._productions), 25))
172
+
173
+ # Add a scrollbar if there are more than 25 productions.
174
+ if len(self._productions) > 25:
175
+ listscroll = Scrollbar(self._prodframe, orient="vertical")
176
+ self._prodlist.config(yscrollcommand=listscroll.set)
177
+ listscroll.config(command=self._prodlist.yview)
178
+ listscroll.pack(side="left", fill="y")
179
+
180
+ # If they select a production, apply it.
181
+ self._prodlist.bind("<<ListboxSelect>>", self._prodlist_select)
182
+
183
+ def _init_bindings(self):
184
+ # Key bindings are a good thing.
185
+ self._top.bind("<Control-q>", self.destroy)
186
+ self._top.bind("<Control-x>", self.destroy)
187
+ self._top.bind("<Escape>", self.destroy)
188
+ self._top.bind("e", self.expand)
189
+ # self._top.bind('<Alt-e>', self.expand)
190
+ # self._top.bind('<Control-e>', self.expand)
191
+ self._top.bind("m", self.match)
192
+ self._top.bind("<Alt-m>", self.match)
193
+ self._top.bind("<Control-m>", self.match)
194
+ self._top.bind("b", self.backtrack)
195
+ self._top.bind("<Alt-b>", self.backtrack)
196
+ self._top.bind("<Control-b>", self.backtrack)
197
+ self._top.bind("<Control-z>", self.backtrack)
198
+ self._top.bind("<BackSpace>", self.backtrack)
199
+ self._top.bind("a", self.autostep)
200
+ # self._top.bind('<Control-a>', self.autostep)
201
+ self._top.bind("<Control-space>", self.autostep)
202
+ self._top.bind("<Control-c>", self.cancel_autostep)
203
+ self._top.bind("<space>", self.step)
204
+ self._top.bind("<Delete>", self.reset)
205
+ self._top.bind("<Control-p>", self.postscript)
206
+ # self._top.bind('<h>', self.help)
207
+ # self._top.bind('<Alt-h>', self.help)
208
+ self._top.bind("<Control-h>", self.help)
209
+ self._top.bind("<F1>", self.help)
210
+ # self._top.bind('<g>', self.toggle_grammar)
211
+ # self._top.bind('<Alt-g>', self.toggle_grammar)
212
+ # self._top.bind('<Control-g>', self.toggle_grammar)
213
+ self._top.bind("<Control-g>", self.edit_grammar)
214
+ self._top.bind("<Control-t>", self.edit_sentence)
215
+
216
+ def _init_buttons(self, parent):
217
+ # Set up the frames.
218
+ self._buttonframe = buttonframe = Frame(parent)
219
+ buttonframe.pack(fill="none", side="bottom", padx=3, pady=2)
220
+ Button(
221
+ buttonframe,
222
+ text="Step",
223
+ background="#90c0d0",
224
+ foreground="black",
225
+ command=self.step,
226
+ ).pack(side="left")
227
+ Button(
228
+ buttonframe,
229
+ text="Autostep",
230
+ background="#90c0d0",
231
+ foreground="black",
232
+ command=self.autostep,
233
+ ).pack(side="left")
234
+ Button(
235
+ buttonframe,
236
+ text="Expand",
237
+ underline=0,
238
+ background="#90f090",
239
+ foreground="black",
240
+ command=self.expand,
241
+ ).pack(side="left")
242
+ Button(
243
+ buttonframe,
244
+ text="Match",
245
+ underline=0,
246
+ background="#90f090",
247
+ foreground="black",
248
+ command=self.match,
249
+ ).pack(side="left")
250
+ Button(
251
+ buttonframe,
252
+ text="Backtrack",
253
+ underline=0,
254
+ background="#f0a0a0",
255
+ foreground="black",
256
+ command=self.backtrack,
257
+ ).pack(side="left")
258
+ # Replace autostep...
259
+
260
+ # self._autostep_button = Button(buttonframe, text='Autostep',
261
+ # underline=0, command=self.autostep)
262
+ # self._autostep_button.pack(side='left')
263
+
264
+ def _configure(self, event):
265
+ self._autostep = 0
266
+ (x1, y1, x2, y2) = self._cframe.scrollregion()
267
+ y2 = event.height - 6
268
+ self._canvas["scrollregion"] = "%d %d %d %d" % (x1, y1, x2, y2)
269
+ self._redraw()
270
+
271
+ def _init_feedback(self, parent):
272
+ self._feedbackframe = feedbackframe = Frame(parent)
273
+ feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3)
274
+ self._lastoper_label = Label(
275
+ feedbackframe, text="Last Operation:", font=self._font
276
+ )
277
+ self._lastoper_label.pack(side="left")
278
+ lastoperframe = Frame(feedbackframe, relief="sunken", border=1)
279
+ lastoperframe.pack(fill="x", side="right", expand=1, padx=5)
280
+ self._lastoper1 = Label(
281
+ lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font
282
+ )
283
+ self._lastoper2 = Label(
284
+ lastoperframe,
285
+ anchor="w",
286
+ width=30,
287
+ foreground="#004040",
288
+ background="#f0f0f0",
289
+ font=self._font,
290
+ )
291
+ self._lastoper1.pack(side="left")
292
+ self._lastoper2.pack(side="left", fill="x", expand=1)
293
+
294
+ def _init_canvas(self, parent):
295
+ self._cframe = CanvasFrame(
296
+ parent,
297
+ background="white",
298
+ # width=525, height=250,
299
+ closeenough=10,
300
+ border=2,
301
+ relief="sunken",
302
+ )
303
+ self._cframe.pack(expand=1, fill="both", side="top", pady=2)
304
+ canvas = self._canvas = self._cframe.canvas()
305
+
306
+ # Initially, there's no tree or text
307
+ self._tree = None
308
+ self._textwidgets = []
309
+ self._textline = None
310
+
311
+ def _init_menubar(self, parent):
312
+ menubar = Menu(parent)
313
+
314
+ filemenu = Menu(menubar, tearoff=0)
315
+ filemenu.add_command(
316
+ label="Reset Parser", underline=0, command=self.reset, accelerator="Del"
317
+ )
318
+ filemenu.add_command(
319
+ label="Print to Postscript",
320
+ underline=0,
321
+ command=self.postscript,
322
+ accelerator="Ctrl-p",
323
+ )
324
+ filemenu.add_command(
325
+ label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
326
+ )
327
+ menubar.add_cascade(label="File", underline=0, menu=filemenu)
328
+
329
+ editmenu = Menu(menubar, tearoff=0)
330
+ editmenu.add_command(
331
+ label="Edit Grammar",
332
+ underline=5,
333
+ command=self.edit_grammar,
334
+ accelerator="Ctrl-g",
335
+ )
336
+ editmenu.add_command(
337
+ label="Edit Text",
338
+ underline=5,
339
+ command=self.edit_sentence,
340
+ accelerator="Ctrl-t",
341
+ )
342
+ menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
343
+
344
+ rulemenu = Menu(menubar, tearoff=0)
345
+ rulemenu.add_command(
346
+ label="Step", underline=1, command=self.step, accelerator="Space"
347
+ )
348
+ rulemenu.add_separator()
349
+ rulemenu.add_command(
350
+ label="Match", underline=0, command=self.match, accelerator="Ctrl-m"
351
+ )
352
+ rulemenu.add_command(
353
+ label="Expand", underline=0, command=self.expand, accelerator="Ctrl-e"
354
+ )
355
+ rulemenu.add_separator()
356
+ rulemenu.add_command(
357
+ label="Backtrack", underline=0, command=self.backtrack, accelerator="Ctrl-b"
358
+ )
359
+ menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
360
+
361
+ viewmenu = Menu(menubar, tearoff=0)
362
+ viewmenu.add_checkbutton(
363
+ label="Show Grammar",
364
+ underline=0,
365
+ variable=self._show_grammar,
366
+ command=self._toggle_grammar,
367
+ )
368
+ viewmenu.add_separator()
369
+ viewmenu.add_radiobutton(
370
+ label="Tiny",
371
+ variable=self._size,
372
+ underline=0,
373
+ value=10,
374
+ command=self.resize,
375
+ )
376
+ viewmenu.add_radiobutton(
377
+ label="Small",
378
+ variable=self._size,
379
+ underline=0,
380
+ value=12,
381
+ command=self.resize,
382
+ )
383
+ viewmenu.add_radiobutton(
384
+ label="Medium",
385
+ variable=self._size,
386
+ underline=0,
387
+ value=14,
388
+ command=self.resize,
389
+ )
390
+ viewmenu.add_radiobutton(
391
+ label="Large",
392
+ variable=self._size,
393
+ underline=0,
394
+ value=18,
395
+ command=self.resize,
396
+ )
397
+ viewmenu.add_radiobutton(
398
+ label="Huge",
399
+ variable=self._size,
400
+ underline=0,
401
+ value=24,
402
+ command=self.resize,
403
+ )
404
+ menubar.add_cascade(label="View", underline=0, menu=viewmenu)
405
+
406
+ animatemenu = Menu(menubar, tearoff=0)
407
+ animatemenu.add_radiobutton(
408
+ label="No Animation", underline=0, variable=self._animation_frames, value=0
409
+ )
410
+ animatemenu.add_radiobutton(
411
+ label="Slow Animation",
412
+ underline=0,
413
+ variable=self._animation_frames,
414
+ value=10,
415
+ accelerator="-",
416
+ )
417
+ animatemenu.add_radiobutton(
418
+ label="Normal Animation",
419
+ underline=0,
420
+ variable=self._animation_frames,
421
+ value=5,
422
+ accelerator="=",
423
+ )
424
+ animatemenu.add_radiobutton(
425
+ label="Fast Animation",
426
+ underline=0,
427
+ variable=self._animation_frames,
428
+ value=2,
429
+ accelerator="+",
430
+ )
431
+ menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
432
+
433
+ helpmenu = Menu(menubar, tearoff=0)
434
+ helpmenu.add_command(label="About", underline=0, command=self.about)
435
+ helpmenu.add_command(
436
+ label="Instructions", underline=0, command=self.help, accelerator="F1"
437
+ )
438
+ menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
439
+
440
+ parent.config(menu=menubar)
441
+
442
+ #########################################
443
+ ## Helper
444
+ #########################################
445
+
446
+ def _get(self, widget, treeloc):
447
+ for i in treeloc:
448
+ widget = widget.subtrees()[i]
449
+ if isinstance(widget, TreeSegmentWidget):
450
+ widget = widget.label()
451
+ return widget
452
+
453
+ #########################################
454
+ ## Main draw procedure
455
+ #########################################
456
+
457
+ def _redraw(self):
458
+ canvas = self._canvas
459
+
460
+ # Delete the old tree, widgets, etc.
461
+ if self._tree is not None:
462
+ self._cframe.destroy_widget(self._tree)
463
+ for twidget in self._textwidgets:
464
+ self._cframe.destroy_widget(twidget)
465
+ if self._textline is not None:
466
+ self._canvas.delete(self._textline)
467
+
468
+ # Draw the tree.
469
+ helv = ("helvetica", -self._size.get())
470
+ bold = ("helvetica", -self._size.get(), "bold")
471
+ attribs = {
472
+ "tree_color": "#000000",
473
+ "tree_width": 2,
474
+ "node_font": bold,
475
+ "leaf_font": helv,
476
+ }
477
+ tree = self._parser.tree()
478
+ self._tree = tree_to_treesegment(canvas, tree, **attribs)
479
+ self._cframe.add_widget(self._tree, 30, 5)
480
+
481
+ # Draw the text.
482
+ helv = ("helvetica", -self._size.get())
483
+ bottom = y = self._cframe.scrollregion()[3]
484
+ self._textwidgets = [
485
+ TextWidget(canvas, word, font=self._font) for word in self._sent
486
+ ]
487
+ for twidget in self._textwidgets:
488
+ self._cframe.add_widget(twidget, 0, 0)
489
+ twidget.move(0, bottom - twidget.bbox()[3] - 5)
490
+ y = min(y, twidget.bbox()[1])
491
+
492
+ # Draw a line over the text, to separate it from the tree.
493
+ self._textline = canvas.create_line(-5000, y - 5, 5000, y - 5, dash=".")
494
+
495
+ # Highlight appropriate nodes.
496
+ self._highlight_nodes()
497
+ self._highlight_prodlist()
498
+
499
+ # Make sure the text lines up.
500
+ self._position_text()
501
+
502
+ def _redraw_quick(self):
503
+ # This should be more-or-less sufficient after an animation.
504
+ self._highlight_nodes()
505
+ self._highlight_prodlist()
506
+ self._position_text()
507
+
508
+ def _highlight_nodes(self):
509
+ # Highlight the list of nodes to be checked.
510
+ bold = ("helvetica", -self._size.get(), "bold")
511
+ for treeloc in self._parser.frontier()[:1]:
512
+ self._get(self._tree, treeloc)["color"] = "#20a050"
513
+ self._get(self._tree, treeloc)["font"] = bold
514
+ for treeloc in self._parser.frontier()[1:]:
515
+ self._get(self._tree, treeloc)["color"] = "#008080"
516
+
517
+ def _highlight_prodlist(self):
518
+ # Highlight the productions that can be expanded.
519
+ # Boy, too bad tkinter doesn't implement Listbox.itemconfig;
520
+ # that would be pretty useful here.
521
+ self._prodlist.delete(0, "end")
522
+ expandable = self._parser.expandable_productions()
523
+ untried = self._parser.untried_expandable_productions()
524
+ productions = self._productions
525
+ for index in range(len(productions)):
526
+ if productions[index] in expandable:
527
+ if productions[index] in untried:
528
+ self._prodlist.insert(index, " %s" % productions[index])
529
+ else:
530
+ self._prodlist.insert(index, " %s (TRIED)" % productions[index])
531
+ self._prodlist.selection_set(index)
532
+ else:
533
+ self._prodlist.insert(index, " %s" % productions[index])
534
+
535
+ def _position_text(self):
536
+ # Line up the text widgets that are matched against the tree
537
+ numwords = len(self._sent)
538
+ num_matched = numwords - len(self._parser.remaining_text())
539
+ leaves = self._tree_leaves()[:num_matched]
540
+ xmax = self._tree.bbox()[0]
541
+ for i in range(0, len(leaves)):
542
+ widget = self._textwidgets[i]
543
+ leaf = leaves[i]
544
+ widget["color"] = "#006040"
545
+ leaf["color"] = "#006040"
546
+ widget.move(leaf.bbox()[0] - widget.bbox()[0], 0)
547
+ xmax = widget.bbox()[2] + 10
548
+
549
+ # Line up the text widgets that are not matched against the tree.
550
+ for i in range(len(leaves), numwords):
551
+ widget = self._textwidgets[i]
552
+ widget["color"] = "#a0a0a0"
553
+ widget.move(xmax - widget.bbox()[0], 0)
554
+ xmax = widget.bbox()[2] + 10
555
+
556
+ # If we have a complete parse, make everything green :)
557
+ if self._parser.currently_complete():
558
+ for twidget in self._textwidgets:
559
+ twidget["color"] = "#00a000"
560
+
561
+ # Move the matched leaves down to the text.
562
+ for i in range(0, len(leaves)):
563
+ widget = self._textwidgets[i]
564
+ leaf = leaves[i]
565
+ dy = widget.bbox()[1] - leaf.bbox()[3] - 10.0
566
+ dy = max(dy, leaf.parent().label().bbox()[3] - leaf.bbox()[3] + 10)
567
+ leaf.move(0, dy)
568
+
569
+ def _tree_leaves(self, tree=None):
570
+ if tree is None:
571
+ tree = self._tree
572
+ if isinstance(tree, TreeSegmentWidget):
573
+ leaves = []
574
+ for child in tree.subtrees():
575
+ leaves += self._tree_leaves(child)
576
+ return leaves
577
+ else:
578
+ return [tree]
579
+
580
+ #########################################
581
+ ## Button Callbacks
582
+ #########################################
583
+
584
+ def destroy(self, *e):
585
+ self._autostep = 0
586
+ if self._top is None:
587
+ return
588
+ self._top.destroy()
589
+ self._top = None
590
+
591
+ def reset(self, *e):
592
+ self._autostep = 0
593
+ self._parser.initialize(self._sent)
594
+ self._lastoper1["text"] = "Reset Application"
595
+ self._lastoper2["text"] = ""
596
+ self._redraw()
597
+
598
+ def autostep(self, *e):
599
+ if self._animation_frames.get() == 0:
600
+ self._animation_frames.set(2)
601
+ if self._autostep:
602
+ self._autostep = 0
603
+ else:
604
+ self._autostep = 1
605
+ self._step()
606
+
607
+ def cancel_autostep(self, *e):
608
+ # self._autostep_button['text'] = 'Autostep'
609
+ self._autostep = 0
610
+
611
+ # Make sure to stop auto-stepping if we get any user input.
612
+ def step(self, *e):
613
+ self._autostep = 0
614
+ self._step()
615
+
616
+ def match(self, *e):
617
+ self._autostep = 0
618
+ self._match()
619
+
620
+ def expand(self, *e):
621
+ self._autostep = 0
622
+ self._expand()
623
+
624
+ def backtrack(self, *e):
625
+ self._autostep = 0
626
+ self._backtrack()
627
+
628
+ def _step(self):
629
+ if self._animating_lock:
630
+ return
631
+
632
+ # Try expanding, matching, and backtracking (in that order)
633
+ if self._expand():
634
+ pass
635
+ elif self._parser.untried_match() and self._match():
636
+ pass
637
+ elif self._backtrack():
638
+ pass
639
+ else:
640
+ self._lastoper1["text"] = "Finished"
641
+ self._lastoper2["text"] = ""
642
+ self._autostep = 0
643
+
644
+ # Check if we just completed a parse.
645
+ if self._parser.currently_complete():
646
+ self._autostep = 0
647
+ self._lastoper2["text"] += " [COMPLETE PARSE]"
648
+
649
+ def _expand(self, *e):
650
+ if self._animating_lock:
651
+ return
652
+ old_frontier = self._parser.frontier()
653
+ rv = self._parser.expand()
654
+ if rv is not None:
655
+ self._lastoper1["text"] = "Expand:"
656
+ self._lastoper2["text"] = rv
657
+ self._prodlist.selection_clear(0, "end")
658
+ index = self._productions.index(rv)
659
+ self._prodlist.selection_set(index)
660
+ self._animate_expand(old_frontier[0])
661
+ return True
662
+ else:
663
+ self._lastoper1["text"] = "Expand:"
664
+ self._lastoper2["text"] = "(all expansions tried)"
665
+ return False
666
+
667
+ def _match(self, *e):
668
+ if self._animating_lock:
669
+ return
670
+ old_frontier = self._parser.frontier()
671
+ rv = self._parser.match()
672
+ if rv is not None:
673
+ self._lastoper1["text"] = "Match:"
674
+ self._lastoper2["text"] = rv
675
+ self._animate_match(old_frontier[0])
676
+ return True
677
+ else:
678
+ self._lastoper1["text"] = "Match:"
679
+ self._lastoper2["text"] = "(failed)"
680
+ return False
681
+
682
+ def _backtrack(self, *e):
683
+ if self._animating_lock:
684
+ return
685
+ if self._parser.backtrack():
686
+ elt = self._parser.tree()
687
+ for i in self._parser.frontier()[0]:
688
+ elt = elt[i]
689
+ self._lastoper1["text"] = "Backtrack"
690
+ self._lastoper2["text"] = ""
691
+ if isinstance(elt, Tree):
692
+ self._animate_backtrack(self._parser.frontier()[0])
693
+ else:
694
+ self._animate_match_backtrack(self._parser.frontier()[0])
695
+ return True
696
+ else:
697
+ self._autostep = 0
698
+ self._lastoper1["text"] = "Finished"
699
+ self._lastoper2["text"] = ""
700
+ return False
701
+
702
+ def about(self, *e):
703
+ ABOUT = (
704
+ "NLTK Recursive Descent Parser Application\n" + "Written by Edward Loper"
705
+ )
706
+ TITLE = "About: Recursive Descent Parser Application"
707
+ try:
708
+ from tkinter.messagebox import Message
709
+
710
+ Message(message=ABOUT, title=TITLE).show()
711
+ except:
712
+ ShowText(self._top, TITLE, ABOUT)
713
+
714
+ def help(self, *e):
715
+ self._autostep = 0
716
+ # The default font's not very legible; try using 'fixed' instead.
717
+ try:
718
+ ShowText(
719
+ self._top,
720
+ "Help: Recursive Descent Parser Application",
721
+ (__doc__ or "").strip(),
722
+ width=75,
723
+ font="fixed",
724
+ )
725
+ except:
726
+ ShowText(
727
+ self._top,
728
+ "Help: Recursive Descent Parser Application",
729
+ (__doc__ or "").strip(),
730
+ width=75,
731
+ )
732
+
733
+ def postscript(self, *e):
734
+ self._autostep = 0
735
+ self._cframe.print_to_file()
736
+
737
+ def mainloop(self, *args, **kwargs):
738
+ """
739
+ Enter the Tkinter mainloop. This function must be called if
740
+ this demo is created from a non-interactive program (e.g.
741
+ from a secript); otherwise, the demo will close as soon as
742
+ the script completes.
743
+ """
744
+ if in_idle():
745
+ return
746
+ self._top.mainloop(*args, **kwargs)
747
+
748
+ def resize(self, size=None):
749
+ if size is not None:
750
+ self._size.set(size)
751
+ size = self._size.get()
752
+ self._font.configure(size=-(abs(size)))
753
+ self._boldfont.configure(size=-(abs(size)))
754
+ self._sysfont.configure(size=-(abs(size)))
755
+ self._bigfont.configure(size=-(abs(size + 2)))
756
+ self._redraw()
757
+
758
+ #########################################
759
+ ## Expand Production Selection
760
+ #########################################
761
+
762
+ def _toggle_grammar(self, *e):
763
+ if self._show_grammar.get():
764
+ self._prodframe.pack(
765
+ fill="both", side="left", padx=2, after=self._feedbackframe
766
+ )
767
+ self._lastoper1["text"] = "Show Grammar"
768
+ else:
769
+ self._prodframe.pack_forget()
770
+ self._lastoper1["text"] = "Hide Grammar"
771
+ self._lastoper2["text"] = ""
772
+
773
+ # def toggle_grammar(self, *e):
774
+ # self._show_grammar = not self._show_grammar
775
+ # if self._show_grammar:
776
+ # self._prodframe.pack(fill='both', expand='y', side='left',
777
+ # after=self._feedbackframe)
778
+ # self._lastoper1['text'] = 'Show Grammar'
779
+ # else:
780
+ # self._prodframe.pack_forget()
781
+ # self._lastoper1['text'] = 'Hide Grammar'
782
+ # self._lastoper2['text'] = ''
783
+
784
+ def _prodlist_select(self, event):
785
+ selection = self._prodlist.curselection()
786
+ if len(selection) != 1:
787
+ return
788
+ index = int(selection[0])
789
+ old_frontier = self._parser.frontier()
790
+ production = self._parser.expand(self._productions[index])
791
+
792
+ if production:
793
+ self._lastoper1["text"] = "Expand:"
794
+ self._lastoper2["text"] = production
795
+ self._prodlist.selection_clear(0, "end")
796
+ self._prodlist.selection_set(index)
797
+ self._animate_expand(old_frontier[0])
798
+ else:
799
+ # Reset the production selections.
800
+ self._prodlist.selection_clear(0, "end")
801
+ for prod in self._parser.expandable_productions():
802
+ index = self._productions.index(prod)
803
+ self._prodlist.selection_set(index)
804
+
805
+ #########################################
806
+ ## Animation
807
+ #########################################
808
+
809
+ def _animate_expand(self, treeloc):
810
+ oldwidget = self._get(self._tree, treeloc)
811
+ oldtree = oldwidget.parent()
812
+ top = not isinstance(oldtree.parent(), TreeSegmentWidget)
813
+
814
+ tree = self._parser.tree()
815
+ for i in treeloc:
816
+ tree = tree[i]
817
+
818
+ widget = tree_to_treesegment(
819
+ self._canvas,
820
+ tree,
821
+ node_font=self._boldfont,
822
+ leaf_color="white",
823
+ tree_width=2,
824
+ tree_color="white",
825
+ node_color="white",
826
+ leaf_font=self._font,
827
+ )
828
+ widget.label()["color"] = "#20a050"
829
+
830
+ (oldx, oldy) = oldtree.label().bbox()[:2]
831
+ (newx, newy) = widget.label().bbox()[:2]
832
+ widget.move(oldx - newx, oldy - newy)
833
+
834
+ if top:
835
+ self._cframe.add_widget(widget, 0, 5)
836
+ widget.move(30 - widget.label().bbox()[0], 0)
837
+ self._tree = widget
838
+ else:
839
+ oldtree.parent().replace_child(oldtree, widget)
840
+
841
+ # Move the children over so they don't overlap.
842
+ # Line the children up in a strange way.
843
+ if widget.subtrees():
844
+ dx = (
845
+ oldx
846
+ + widget.label().width() / 2
847
+ - widget.subtrees()[0].bbox()[0] / 2
848
+ - widget.subtrees()[0].bbox()[2] / 2
849
+ )
850
+ for subtree in widget.subtrees():
851
+ subtree.move(dx, 0)
852
+
853
+ self._makeroom(widget)
854
+
855
+ if top:
856
+ self._cframe.destroy_widget(oldtree)
857
+ else:
858
+ oldtree.destroy()
859
+
860
+ colors = [
861
+ "gray%d" % (10 * int(10 * x / self._animation_frames.get()))
862
+ for x in range(self._animation_frames.get(), 0, -1)
863
+ ]
864
+
865
+ # Move the text string down, if necessary.
866
+ dy = widget.bbox()[3] + 30 - self._canvas.coords(self._textline)[1]
867
+ if dy > 0:
868
+ for twidget in self._textwidgets:
869
+ twidget.move(0, dy)
870
+ self._canvas.move(self._textline, 0, dy)
871
+
872
+ self._animate_expand_frame(widget, colors)
873
+
874
+ def _makeroom(self, treeseg):
875
+ """
876
+ Make sure that no sibling tree bbox's overlap.
877
+ """
878
+ parent = treeseg.parent()
879
+ if not isinstance(parent, TreeSegmentWidget):
880
+ return
881
+
882
+ index = parent.subtrees().index(treeseg)
883
+
884
+ # Handle siblings to the right
885
+ rsiblings = parent.subtrees()[index + 1 :]
886
+ if rsiblings:
887
+ dx = treeseg.bbox()[2] - rsiblings[0].bbox()[0] + 10
888
+ for sibling in rsiblings:
889
+ sibling.move(dx, 0)
890
+
891
+ # Handle siblings to the left
892
+ if index > 0:
893
+ lsibling = parent.subtrees()[index - 1]
894
+ dx = max(0, lsibling.bbox()[2] - treeseg.bbox()[0] + 10)
895
+ treeseg.move(dx, 0)
896
+
897
+ # Keep working up the tree.
898
+ self._makeroom(parent)
899
+
900
+ def _animate_expand_frame(self, widget, colors):
901
+ if len(colors) > 0:
902
+ self._animating_lock = 1
903
+ widget["color"] = colors[0]
904
+ for subtree in widget.subtrees():
905
+ if isinstance(subtree, TreeSegmentWidget):
906
+ subtree.label()["color"] = colors[0]
907
+ else:
908
+ subtree["color"] = colors[0]
909
+ self._top.after(50, self._animate_expand_frame, widget, colors[1:])
910
+ else:
911
+ widget["color"] = "black"
912
+ for subtree in widget.subtrees():
913
+ if isinstance(subtree, TreeSegmentWidget):
914
+ subtree.label()["color"] = "black"
915
+ else:
916
+ subtree["color"] = "black"
917
+ self._redraw_quick()
918
+ widget.label()["color"] = "black"
919
+ self._animating_lock = 0
920
+ if self._autostep:
921
+ self._step()
922
+
923
+ def _animate_backtrack(self, treeloc):
924
+ # Flash red first, if we're animating.
925
+ if self._animation_frames.get() == 0:
926
+ colors = []
927
+ else:
928
+ colors = ["#a00000", "#000000", "#a00000"]
929
+ colors += [
930
+ "gray%d" % (10 * int(10 * x / (self._animation_frames.get())))
931
+ for x in range(1, self._animation_frames.get() + 1)
932
+ ]
933
+
934
+ widgets = [self._get(self._tree, treeloc).parent()]
935
+ for subtree in widgets[0].subtrees():
936
+ if isinstance(subtree, TreeSegmentWidget):
937
+ widgets.append(subtree.label())
938
+ else:
939
+ widgets.append(subtree)
940
+
941
+ self._animate_backtrack_frame(widgets, colors)
942
+
943
+ def _animate_backtrack_frame(self, widgets, colors):
944
+ if len(colors) > 0:
945
+ self._animating_lock = 1
946
+ for widget in widgets:
947
+ widget["color"] = colors[0]
948
+ self._top.after(50, self._animate_backtrack_frame, widgets, colors[1:])
949
+ else:
950
+ for widget in widgets[0].subtrees():
951
+ widgets[0].remove_child(widget)
952
+ widget.destroy()
953
+ self._redraw_quick()
954
+ self._animating_lock = 0
955
+ if self._autostep:
956
+ self._step()
957
+
958
+ def _animate_match_backtrack(self, treeloc):
959
+ widget = self._get(self._tree, treeloc)
960
+ node = widget.parent().label()
961
+ dy = (node.bbox()[3] - widget.bbox()[1] + 14) / max(
962
+ 1, self._animation_frames.get()
963
+ )
964
+ self._animate_match_backtrack_frame(self._animation_frames.get(), widget, dy)
965
+
966
+ def _animate_match(self, treeloc):
967
+ widget = self._get(self._tree, treeloc)
968
+
969
+ dy = (self._textwidgets[0].bbox()[1] - widget.bbox()[3] - 10.0) / max(
970
+ 1, self._animation_frames.get()
971
+ )
972
+ self._animate_match_frame(self._animation_frames.get(), widget, dy)
973
+
974
+ def _animate_match_frame(self, frame, widget, dy):
975
+ if frame > 0:
976
+ self._animating_lock = 1
977
+ widget.move(0, dy)
978
+ self._top.after(10, self._animate_match_frame, frame - 1, widget, dy)
979
+ else:
980
+ widget["color"] = "#006040"
981
+ self._redraw_quick()
982
+ self._animating_lock = 0
983
+ if self._autostep:
984
+ self._step()
985
+
986
+ def _animate_match_backtrack_frame(self, frame, widget, dy):
987
+ if frame > 0:
988
+ self._animating_lock = 1
989
+ widget.move(0, dy)
990
+ self._top.after(
991
+ 10, self._animate_match_backtrack_frame, frame - 1, widget, dy
992
+ )
993
+ else:
994
+ widget.parent().remove_child(widget)
995
+ widget.destroy()
996
+ self._animating_lock = 0
997
+ if self._autostep:
998
+ self._step()
999
+
1000
+ def edit_grammar(self, *e):
1001
+ CFGEditor(self._top, self._parser.grammar(), self.set_grammar)
1002
+
1003
+ def set_grammar(self, grammar):
1004
+ self._parser.set_grammar(grammar)
1005
+ self._productions = list(grammar.productions())
1006
+ self._prodlist.delete(0, "end")
1007
+ for production in self._productions:
1008
+ self._prodlist.insert("end", (" %s" % production))
1009
+
1010
+ def edit_sentence(self, *e):
1011
+ sentence = " ".join(self._sent)
1012
+ title = "Edit Text"
1013
+ instr = "Enter a new sentence to parse."
1014
+ EntryDialog(self._top, sentence, instr, self.set_sentence, title)
1015
+
1016
+ def set_sentence(self, sentence):
1017
+ self._sent = sentence.split() # [XX] use tagged?
1018
+ self.reset()
1019
+
1020
+
1021
+ def app():
1022
+ """
1023
+ Create a recursive descent parser demo, using a simple grammar and
1024
+ text.
1025
+ """
1026
+ from nltk.grammar import CFG
1027
+
1028
+ grammar = CFG.fromstring(
1029
+ """
1030
+ # Grammatical productions.
1031
+ S -> NP VP
1032
+ NP -> Det N PP | Det N
1033
+ VP -> V NP PP | V NP | V
1034
+ PP -> P NP
1035
+ # Lexical productions.
1036
+ NP -> 'I'
1037
+ Det -> 'the' | 'a'
1038
+ N -> 'man' | 'park' | 'dog' | 'telescope'
1039
+ V -> 'ate' | 'saw'
1040
+ P -> 'in' | 'under' | 'with'
1041
+ """
1042
+ )
1043
+
1044
+ sent = "the dog saw a man in the park".split()
1045
+
1046
+ RecursiveDescentApp(grammar, sent).mainloop()
1047
+
1048
+
1049
+ if __name__ == "__main__":
1050
+ app()
1051
+
1052
+ __all__ = ["app"]
pipeline/nltk/app/srparser_app.py ADDED
@@ -0,0 +1,937 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Shift-Reduce Parser Application
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Edward Loper <[email protected]>
5
+ # URL: <https://www.nltk.org/>
6
+ # For license information, see LICENSE.TXT
7
+
8
+ """
9
+ A graphical tool for exploring the shift-reduce parser.
10
+
11
+ The shift-reduce parser maintains a stack, which records the structure
12
+ of the portion of the text that has been parsed. The stack is
13
+ initially empty. Its contents are shown on the left side of the main
14
+ canvas.
15
+
16
+ On the right side of the main canvas is the remaining text. This is
17
+ the portion of the text which has not yet been considered by the
18
+ parser.
19
+
20
+ The parser builds up a tree structure for the text using two
21
+ operations:
22
+
23
+ - "shift" moves the first token from the remaining text to the top
24
+ of the stack. In the demo, the top of the stack is its right-hand
25
+ side.
26
+ - "reduce" uses a grammar production to combine the rightmost stack
27
+ elements into a single tree token.
28
+
29
+ You can control the parser's operation by using the "shift" and
30
+ "reduce" buttons; or you can use the "step" button to let the parser
31
+ automatically decide which operation to apply. The parser uses the
32
+ following rules to decide which operation to apply:
33
+
34
+ - Only shift if no reductions are available.
35
+ - If multiple reductions are available, then apply the reduction
36
+ whose CFG production is listed earliest in the grammar.
37
+
38
+ The "reduce" button applies the reduction whose CFG production is
39
+ listed earliest in the grammar. There are two ways to manually choose
40
+ which reduction to apply:
41
+
42
+ - Click on a CFG production from the list of available reductions,
43
+ on the left side of the main window. The reduction based on that
44
+ production will be applied to the top of the stack.
45
+ - Click on one of the stack elements. A popup window will appear,
46
+ containing all available reductions. Select one, and it will be
47
+ applied to the top of the stack.
48
+
49
+ Note that reductions can only be applied to the top of the stack.
50
+
51
+ Keyboard Shortcuts::
52
+ [Space]\t Perform the next shift or reduce operation
53
+ [s]\t Perform a shift operation
54
+ [r]\t Perform a reduction operation
55
+ [Ctrl-z]\t Undo most recent operation
56
+ [Delete]\t Reset the parser
57
+ [g]\t Show/hide available production list
58
+ [Ctrl-a]\t Toggle animations
59
+ [h]\t Help
60
+ [Ctrl-p]\t Print
61
+ [q]\t Quit
62
+
63
+ """
64
+
65
+ from tkinter import Button, Frame, IntVar, Label, Listbox, Menu, Scrollbar, Tk
66
+ from tkinter.font import Font
67
+
68
+ from nltk.draw import CFGEditor, TreeSegmentWidget, tree_to_treesegment
69
+ from nltk.draw.util import CanvasFrame, EntryDialog, ShowText, TextWidget
70
+ from nltk.parse import SteppingShiftReduceParser
71
+ from nltk.tree import Tree
72
+ from nltk.util import in_idle
73
+
74
+ """
75
+ Possible future improvements:
76
+ - button/window to change and/or select text. Just pop up a window
77
+ with an entry, and let them modify the text; and then retokenize
78
+ it? Maybe give a warning if it contains tokens whose types are
79
+ not in the grammar.
80
+ - button/window to change and/or select grammar. Select from
81
+ several alternative grammars? Or actually change the grammar? If
82
+ the later, then I'd want to define nltk.draw.cfg, which would be
83
+ responsible for that.
84
+ """
85
+
86
+
87
+ class ShiftReduceApp:
88
+ """
89
+ A graphical tool for exploring the shift-reduce parser. The tool
90
+ displays the parser's stack and the remaining text, and allows the
91
+ user to control the parser's operation. In particular, the user
92
+ can shift tokens onto the stack, and can perform reductions on the
93
+ top elements of the stack. A "step" button simply steps through
94
+ the parsing process, performing the operations that
95
+ ``nltk.parse.ShiftReduceParser`` would use.
96
+ """
97
+
98
+ def __init__(self, grammar, sent, trace=0):
99
+ self._sent = sent
100
+ self._parser = SteppingShiftReduceParser(grammar, trace)
101
+
102
+ # Set up the main window.
103
+ self._top = Tk()
104
+ self._top.title("Shift Reduce Parser Application")
105
+
106
+ # Animations. animating_lock is a lock to prevent the demo
107
+ # from performing new operations while it's animating.
108
+ self._animating_lock = 0
109
+ self._animate = IntVar(self._top)
110
+ self._animate.set(10) # = medium
111
+
112
+ # The user can hide the grammar.
113
+ self._show_grammar = IntVar(self._top)
114
+ self._show_grammar.set(1)
115
+
116
+ # Initialize fonts.
117
+ self._init_fonts(self._top)
118
+
119
+ # Set up key bindings.
120
+ self._init_bindings()
121
+
122
+ # Create the basic frames.
123
+ self._init_menubar(self._top)
124
+ self._init_buttons(self._top)
125
+ self._init_feedback(self._top)
126
+ self._init_grammar(self._top)
127
+ self._init_canvas(self._top)
128
+
129
+ # A popup menu for reducing.
130
+ self._reduce_menu = Menu(self._canvas, tearoff=0)
131
+
132
+ # Reset the demo, and set the feedback frame to empty.
133
+ self.reset()
134
+ self._lastoper1["text"] = ""
135
+
136
+ #########################################
137
+ ## Initialization Helpers
138
+ #########################################
139
+
140
+ def _init_fonts(self, root):
141
+ # See: <http://www.astro.washington.edu/owen/ROTKFolklore.html>
142
+ self._sysfont = Font(font=Button()["font"])
143
+ root.option_add("*Font", self._sysfont)
144
+
145
+ # TWhat's our font size (default=same as sysfont)
146
+ self._size = IntVar(root)
147
+ self._size.set(self._sysfont.cget("size"))
148
+
149
+ self._boldfont = Font(family="helvetica", weight="bold", size=self._size.get())
150
+ self._font = Font(family="helvetica", size=self._size.get())
151
+
152
+ def _init_grammar(self, parent):
153
+ # Grammar view.
154
+ self._prodframe = listframe = Frame(parent)
155
+ self._prodframe.pack(fill="both", side="left", padx=2)
156
+ self._prodlist_label = Label(
157
+ self._prodframe, font=self._boldfont, text="Available Reductions"
158
+ )
159
+ self._prodlist_label.pack()
160
+ self._prodlist = Listbox(
161
+ self._prodframe,
162
+ selectmode="single",
163
+ relief="groove",
164
+ background="white",
165
+ foreground="#909090",
166
+ font=self._font,
167
+ selectforeground="#004040",
168
+ selectbackground="#c0f0c0",
169
+ )
170
+
171
+ self._prodlist.pack(side="right", fill="both", expand=1)
172
+
173
+ self._productions = list(self._parser.grammar().productions())
174
+ for production in self._productions:
175
+ self._prodlist.insert("end", (" %s" % production))
176
+ self._prodlist.config(height=min(len(self._productions), 25))
177
+
178
+ # Add a scrollbar if there are more than 25 productions.
179
+ if 1: # len(self._productions) > 25:
180
+ listscroll = Scrollbar(self._prodframe, orient="vertical")
181
+ self._prodlist.config(yscrollcommand=listscroll.set)
182
+ listscroll.config(command=self._prodlist.yview)
183
+ listscroll.pack(side="left", fill="y")
184
+
185
+ # If they select a production, apply it.
186
+ self._prodlist.bind("<<ListboxSelect>>", self._prodlist_select)
187
+
188
+ # When they hover over a production, highlight it.
189
+ self._hover = -1
190
+ self._prodlist.bind("<Motion>", self._highlight_hover)
191
+ self._prodlist.bind("<Leave>", self._clear_hover)
192
+
193
+ def _init_bindings(self):
194
+ # Quit
195
+ self._top.bind("<Control-q>", self.destroy)
196
+ self._top.bind("<Control-x>", self.destroy)
197
+ self._top.bind("<Alt-q>", self.destroy)
198
+ self._top.bind("<Alt-x>", self.destroy)
199
+
200
+ # Ops (step, shift, reduce, undo)
201
+ self._top.bind("<space>", self.step)
202
+ self._top.bind("<s>", self.shift)
203
+ self._top.bind("<Alt-s>", self.shift)
204
+ self._top.bind("<Control-s>", self.shift)
205
+ self._top.bind("<r>", self.reduce)
206
+ self._top.bind("<Alt-r>", self.reduce)
207
+ self._top.bind("<Control-r>", self.reduce)
208
+ self._top.bind("<Delete>", self.reset)
209
+ self._top.bind("<u>", self.undo)
210
+ self._top.bind("<Alt-u>", self.undo)
211
+ self._top.bind("<Control-u>", self.undo)
212
+ self._top.bind("<Control-z>", self.undo)
213
+ self._top.bind("<BackSpace>", self.undo)
214
+
215
+ # Misc
216
+ self._top.bind("<Control-p>", self.postscript)
217
+ self._top.bind("<Control-h>", self.help)
218
+ self._top.bind("<F1>", self.help)
219
+ self._top.bind("<Control-g>", self.edit_grammar)
220
+ self._top.bind("<Control-t>", self.edit_sentence)
221
+
222
+ # Animation speed control
223
+ self._top.bind("-", lambda e, a=self._animate: a.set(20))
224
+ self._top.bind("=", lambda e, a=self._animate: a.set(10))
225
+ self._top.bind("+", lambda e, a=self._animate: a.set(4))
226
+
227
+ def _init_buttons(self, parent):
228
+ # Set up the frames.
229
+ self._buttonframe = buttonframe = Frame(parent)
230
+ buttonframe.pack(fill="none", side="bottom")
231
+ Button(
232
+ buttonframe,
233
+ text="Step",
234
+ background="#90c0d0",
235
+ foreground="black",
236
+ command=self.step,
237
+ ).pack(side="left")
238
+ Button(
239
+ buttonframe,
240
+ text="Shift",
241
+ underline=0,
242
+ background="#90f090",
243
+ foreground="black",
244
+ command=self.shift,
245
+ ).pack(side="left")
246
+ Button(
247
+ buttonframe,
248
+ text="Reduce",
249
+ underline=0,
250
+ background="#90f090",
251
+ foreground="black",
252
+ command=self.reduce,
253
+ ).pack(side="left")
254
+ Button(
255
+ buttonframe,
256
+ text="Undo",
257
+ underline=0,
258
+ background="#f0a0a0",
259
+ foreground="black",
260
+ command=self.undo,
261
+ ).pack(side="left")
262
+
263
+ def _init_menubar(self, parent):
264
+ menubar = Menu(parent)
265
+
266
+ filemenu = Menu(menubar, tearoff=0)
267
+ filemenu.add_command(
268
+ label="Reset Parser", underline=0, command=self.reset, accelerator="Del"
269
+ )
270
+ filemenu.add_command(
271
+ label="Print to Postscript",
272
+ underline=0,
273
+ command=self.postscript,
274
+ accelerator="Ctrl-p",
275
+ )
276
+ filemenu.add_command(
277
+ label="Exit", underline=1, command=self.destroy, accelerator="Ctrl-x"
278
+ )
279
+ menubar.add_cascade(label="File", underline=0, menu=filemenu)
280
+
281
+ editmenu = Menu(menubar, tearoff=0)
282
+ editmenu.add_command(
283
+ label="Edit Grammar",
284
+ underline=5,
285
+ command=self.edit_grammar,
286
+ accelerator="Ctrl-g",
287
+ )
288
+ editmenu.add_command(
289
+ label="Edit Text",
290
+ underline=5,
291
+ command=self.edit_sentence,
292
+ accelerator="Ctrl-t",
293
+ )
294
+ menubar.add_cascade(label="Edit", underline=0, menu=editmenu)
295
+
296
+ rulemenu = Menu(menubar, tearoff=0)
297
+ rulemenu.add_command(
298
+ label="Step", underline=1, command=self.step, accelerator="Space"
299
+ )
300
+ rulemenu.add_separator()
301
+ rulemenu.add_command(
302
+ label="Shift", underline=0, command=self.shift, accelerator="Ctrl-s"
303
+ )
304
+ rulemenu.add_command(
305
+ label="Reduce", underline=0, command=self.reduce, accelerator="Ctrl-r"
306
+ )
307
+ rulemenu.add_separator()
308
+ rulemenu.add_command(
309
+ label="Undo", underline=0, command=self.undo, accelerator="Ctrl-u"
310
+ )
311
+ menubar.add_cascade(label="Apply", underline=0, menu=rulemenu)
312
+
313
+ viewmenu = Menu(menubar, tearoff=0)
314
+ viewmenu.add_checkbutton(
315
+ label="Show Grammar",
316
+ underline=0,
317
+ variable=self._show_grammar,
318
+ command=self._toggle_grammar,
319
+ )
320
+ viewmenu.add_separator()
321
+ viewmenu.add_radiobutton(
322
+ label="Tiny",
323
+ variable=self._size,
324
+ underline=0,
325
+ value=10,
326
+ command=self.resize,
327
+ )
328
+ viewmenu.add_radiobutton(
329
+ label="Small",
330
+ variable=self._size,
331
+ underline=0,
332
+ value=12,
333
+ command=self.resize,
334
+ )
335
+ viewmenu.add_radiobutton(
336
+ label="Medium",
337
+ variable=self._size,
338
+ underline=0,
339
+ value=14,
340
+ command=self.resize,
341
+ )
342
+ viewmenu.add_radiobutton(
343
+ label="Large",
344
+ variable=self._size,
345
+ underline=0,
346
+ value=18,
347
+ command=self.resize,
348
+ )
349
+ viewmenu.add_radiobutton(
350
+ label="Huge",
351
+ variable=self._size,
352
+ underline=0,
353
+ value=24,
354
+ command=self.resize,
355
+ )
356
+ menubar.add_cascade(label="View", underline=0, menu=viewmenu)
357
+
358
+ animatemenu = Menu(menubar, tearoff=0)
359
+ animatemenu.add_radiobutton(
360
+ label="No Animation", underline=0, variable=self._animate, value=0
361
+ )
362
+ animatemenu.add_radiobutton(
363
+ label="Slow Animation",
364
+ underline=0,
365
+ variable=self._animate,
366
+ value=20,
367
+ accelerator="-",
368
+ )
369
+ animatemenu.add_radiobutton(
370
+ label="Normal Animation",
371
+ underline=0,
372
+ variable=self._animate,
373
+ value=10,
374
+ accelerator="=",
375
+ )
376
+ animatemenu.add_radiobutton(
377
+ label="Fast Animation",
378
+ underline=0,
379
+ variable=self._animate,
380
+ value=4,
381
+ accelerator="+",
382
+ )
383
+ menubar.add_cascade(label="Animate", underline=1, menu=animatemenu)
384
+
385
+ helpmenu = Menu(menubar, tearoff=0)
386
+ helpmenu.add_command(label="About", underline=0, command=self.about)
387
+ helpmenu.add_command(
388
+ label="Instructions", underline=0, command=self.help, accelerator="F1"
389
+ )
390
+ menubar.add_cascade(label="Help", underline=0, menu=helpmenu)
391
+
392
+ parent.config(menu=menubar)
393
+
394
+ def _init_feedback(self, parent):
395
+ self._feedbackframe = feedbackframe = Frame(parent)
396
+ feedbackframe.pack(fill="x", side="bottom", padx=3, pady=3)
397
+ self._lastoper_label = Label(
398
+ feedbackframe, text="Last Operation:", font=self._font
399
+ )
400
+ self._lastoper_label.pack(side="left")
401
+ lastoperframe = Frame(feedbackframe, relief="sunken", border=1)
402
+ lastoperframe.pack(fill="x", side="right", expand=1, padx=5)
403
+ self._lastoper1 = Label(
404
+ lastoperframe, foreground="#007070", background="#f0f0f0", font=self._font
405
+ )
406
+ self._lastoper2 = Label(
407
+ lastoperframe,
408
+ anchor="w",
409
+ width=30,
410
+ foreground="#004040",
411
+ background="#f0f0f0",
412
+ font=self._font,
413
+ )
414
+ self._lastoper1.pack(side="left")
415
+ self._lastoper2.pack(side="left", fill="x", expand=1)
416
+
417
+ def _init_canvas(self, parent):
418
+ self._cframe = CanvasFrame(
419
+ parent,
420
+ background="white",
421
+ width=525,
422
+ closeenough=10,
423
+ border=2,
424
+ relief="sunken",
425
+ )
426
+ self._cframe.pack(expand=1, fill="both", side="top", pady=2)
427
+ canvas = self._canvas = self._cframe.canvas()
428
+
429
+ self._stackwidgets = []
430
+ self._rtextwidgets = []
431
+ self._titlebar = canvas.create_rectangle(
432
+ 0, 0, 0, 0, fill="#c0f0f0", outline="black"
433
+ )
434
+ self._exprline = canvas.create_line(0, 0, 0, 0, dash=".")
435
+ self._stacktop = canvas.create_line(0, 0, 0, 0, fill="#408080")
436
+ size = self._size.get() + 4
437
+ self._stacklabel = TextWidget(
438
+ canvas, "Stack", color="#004040", font=self._boldfont
439
+ )
440
+ self._rtextlabel = TextWidget(
441
+ canvas, "Remaining Text", color="#004040", font=self._boldfont
442
+ )
443
+ self._cframe.add_widget(self._stacklabel)
444
+ self._cframe.add_widget(self._rtextlabel)
445
+
446
+ #########################################
447
+ ## Main draw procedure
448
+ #########################################
449
+
450
+ def _redraw(self):
451
+ scrollregion = self._canvas["scrollregion"].split()
452
+ (cx1, cy1, cx2, cy2) = (int(c) for c in scrollregion)
453
+
454
+ # Delete the old stack & rtext widgets.
455
+ for stackwidget in self._stackwidgets:
456
+ self._cframe.destroy_widget(stackwidget)
457
+ self._stackwidgets = []
458
+ for rtextwidget in self._rtextwidgets:
459
+ self._cframe.destroy_widget(rtextwidget)
460
+ self._rtextwidgets = []
461
+
462
+ # Position the titlebar & exprline
463
+ (x1, y1, x2, y2) = self._stacklabel.bbox()
464
+ y = y2 - y1 + 10
465
+ self._canvas.coords(self._titlebar, -5000, 0, 5000, y - 4)
466
+ self._canvas.coords(self._exprline, 0, y * 2 - 10, 5000, y * 2 - 10)
467
+
468
+ # Position the titlebar labels..
469
+ (x1, y1, x2, y2) = self._stacklabel.bbox()
470
+ self._stacklabel.move(5 - x1, 3 - y1)
471
+ (x1, y1, x2, y2) = self._rtextlabel.bbox()
472
+ self._rtextlabel.move(cx2 - x2 - 5, 3 - y1)
473
+
474
+ # Draw the stack.
475
+ stackx = 5
476
+ for tok in self._parser.stack():
477
+ if isinstance(tok, Tree):
478
+ attribs = {
479
+ "tree_color": "#4080a0",
480
+ "tree_width": 2,
481
+ "node_font": self._boldfont,
482
+ "node_color": "#006060",
483
+ "leaf_color": "#006060",
484
+ "leaf_font": self._font,
485
+ }
486
+ widget = tree_to_treesegment(self._canvas, tok, **attribs)
487
+ widget.label()["color"] = "#000000"
488
+ else:
489
+ widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
490
+ widget.bind_click(self._popup_reduce)
491
+ self._stackwidgets.append(widget)
492
+ self._cframe.add_widget(widget, stackx, y)
493
+ stackx = widget.bbox()[2] + 10
494
+
495
+ # Draw the remaining text.
496
+ rtextwidth = 0
497
+ for tok in self._parser.remaining_text():
498
+ widget = TextWidget(self._canvas, tok, color="#000000", font=self._font)
499
+ self._rtextwidgets.append(widget)
500
+ self._cframe.add_widget(widget, rtextwidth, y)
501
+ rtextwidth = widget.bbox()[2] + 4
502
+
503
+ # Allow enough room to shift the next token (for animations)
504
+ if len(self._rtextwidgets) > 0:
505
+ stackx += self._rtextwidgets[0].width()
506
+
507
+ # Move the remaining text to the correct location (keep it
508
+ # right-justified, when possible); and move the remaining text
509
+ # label, if necessary.
510
+ stackx = max(stackx, self._stacklabel.width() + 25)
511
+ rlabelwidth = self._rtextlabel.width() + 10
512
+ if stackx >= cx2 - max(rtextwidth, rlabelwidth):
513
+ cx2 = stackx + max(rtextwidth, rlabelwidth)
514
+ for rtextwidget in self._rtextwidgets:
515
+ rtextwidget.move(4 + cx2 - rtextwidth, 0)
516
+ self._rtextlabel.move(cx2 - self._rtextlabel.bbox()[2] - 5, 0)
517
+
518
+ midx = (stackx + cx2 - max(rtextwidth, rlabelwidth)) / 2
519
+ self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
520
+ (x1, y1, x2, y2) = self._stacklabel.bbox()
521
+
522
+ # Set up binding to allow them to shift a token by dragging it.
523
+ if len(self._rtextwidgets) > 0:
524
+
525
+ def drag_shift(widget, midx=midx, self=self):
526
+ if widget.bbox()[0] < midx:
527
+ self.shift()
528
+ else:
529
+ self._redraw()
530
+
531
+ self._rtextwidgets[0].bind_drag(drag_shift)
532
+ self._rtextwidgets[0].bind_click(self.shift)
533
+
534
+ # Draw the stack top.
535
+ self._highlight_productions()
536
+
537
+ def _draw_stack_top(self, widget):
538
+ # hack..
539
+ midx = widget.bbox()[2] + 50
540
+ self._canvas.coords(self._stacktop, midx, 0, midx, 5000)
541
+
542
+ def _highlight_productions(self):
543
+ # Highlight the productions that can be reduced.
544
+ self._prodlist.selection_clear(0, "end")
545
+ for prod in self._parser.reducible_productions():
546
+ index = self._productions.index(prod)
547
+ self._prodlist.selection_set(index)
548
+
549
+ #########################################
550
+ ## Button Callbacks
551
+ #########################################
552
+
553
+ def destroy(self, *e):
554
+ if self._top is None:
555
+ return
556
+ self._top.destroy()
557
+ self._top = None
558
+
559
+ def reset(self, *e):
560
+ self._parser.initialize(self._sent)
561
+ self._lastoper1["text"] = "Reset App"
562
+ self._lastoper2["text"] = ""
563
+ self._redraw()
564
+
565
+ def step(self, *e):
566
+ if self.reduce():
567
+ return True
568
+ elif self.shift():
569
+ return True
570
+ else:
571
+ if list(self._parser.parses()):
572
+ self._lastoper1["text"] = "Finished:"
573
+ self._lastoper2["text"] = "Success"
574
+ else:
575
+ self._lastoper1["text"] = "Finished:"
576
+ self._lastoper2["text"] = "Failure"
577
+
578
+ def shift(self, *e):
579
+ if self._animating_lock:
580
+ return
581
+ if self._parser.shift():
582
+ tok = self._parser.stack()[-1]
583
+ self._lastoper1["text"] = "Shift:"
584
+ self._lastoper2["text"] = "%r" % tok
585
+ if self._animate.get():
586
+ self._animate_shift()
587
+ else:
588
+ self._redraw()
589
+ return True
590
+ return False
591
+
592
+ def reduce(self, *e):
593
+ if self._animating_lock:
594
+ return
595
+ production = self._parser.reduce()
596
+ if production:
597
+ self._lastoper1["text"] = "Reduce:"
598
+ self._lastoper2["text"] = "%s" % production
599
+ if self._animate.get():
600
+ self._animate_reduce()
601
+ else:
602
+ self._redraw()
603
+ return production
604
+
605
+ def undo(self, *e):
606
+ if self._animating_lock:
607
+ return
608
+ if self._parser.undo():
609
+ self._redraw()
610
+
611
+ def postscript(self, *e):
612
+ self._cframe.print_to_file()
613
+
614
+ def mainloop(self, *args, **kwargs):
615
+ """
616
+ Enter the Tkinter mainloop. This function must be called if
617
+ this demo is created from a non-interactive program (e.g.
618
+ from a secript); otherwise, the demo will close as soon as
619
+ the script completes.
620
+ """
621
+ if in_idle():
622
+ return
623
+ self._top.mainloop(*args, **kwargs)
624
+
625
+ #########################################
626
+ ## Menubar callbacks
627
+ #########################################
628
+
629
+ def resize(self, size=None):
630
+ if size is not None:
631
+ self._size.set(size)
632
+ size = self._size.get()
633
+ self._font.configure(size=-(abs(size)))
634
+ self._boldfont.configure(size=-(abs(size)))
635
+ self._sysfont.configure(size=-(abs(size)))
636
+
637
+ # self._stacklabel['font'] = ('helvetica', -size-4, 'bold')
638
+ # self._rtextlabel['font'] = ('helvetica', -size-4, 'bold')
639
+ # self._lastoper_label['font'] = ('helvetica', -size)
640
+ # self._lastoper1['font'] = ('helvetica', -size)
641
+ # self._lastoper2['font'] = ('helvetica', -size)
642
+ # self._prodlist['font'] = ('helvetica', -size)
643
+ # self._prodlist_label['font'] = ('helvetica', -size-2, 'bold')
644
+ self._redraw()
645
+
646
+ def help(self, *e):
647
+ # The default font's not very legible; try using 'fixed' instead.
648
+ try:
649
+ ShowText(
650
+ self._top,
651
+ "Help: Shift-Reduce Parser Application",
652
+ (__doc__ or "").strip(),
653
+ width=75,
654
+ font="fixed",
655
+ )
656
+ except:
657
+ ShowText(
658
+ self._top,
659
+ "Help: Shift-Reduce Parser Application",
660
+ (__doc__ or "").strip(),
661
+ width=75,
662
+ )
663
+
664
+ def about(self, *e):
665
+ ABOUT = "NLTK Shift-Reduce Parser Application\n" + "Written by Edward Loper"
666
+ TITLE = "About: Shift-Reduce Parser Application"
667
+ try:
668
+ from tkinter.messagebox import Message
669
+
670
+ Message(message=ABOUT, title=TITLE).show()
671
+ except:
672
+ ShowText(self._top, TITLE, ABOUT)
673
+
674
+ def edit_grammar(self, *e):
675
+ CFGEditor(self._top, self._parser.grammar(), self.set_grammar)
676
+
677
+ def set_grammar(self, grammar):
678
+ self._parser.set_grammar(grammar)
679
+ self._productions = list(grammar.productions())
680
+ self._prodlist.delete(0, "end")
681
+ for production in self._productions:
682
+ self._prodlist.insert("end", (" %s" % production))
683
+
684
+ def edit_sentence(self, *e):
685
+ sentence = " ".join(self._sent)
686
+ title = "Edit Text"
687
+ instr = "Enter a new sentence to parse."
688
+ EntryDialog(self._top, sentence, instr, self.set_sentence, title)
689
+
690
+ def set_sentence(self, sent):
691
+ self._sent = sent.split() # [XX] use tagged?
692
+ self.reset()
693
+
694
+ #########################################
695
+ ## Reduce Production Selection
696
+ #########################################
697
+
698
+ def _toggle_grammar(self, *e):
699
+ if self._show_grammar.get():
700
+ self._prodframe.pack(
701
+ fill="both", side="left", padx=2, after=self._feedbackframe
702
+ )
703
+ self._lastoper1["text"] = "Show Grammar"
704
+ else:
705
+ self._prodframe.pack_forget()
706
+ self._lastoper1["text"] = "Hide Grammar"
707
+ self._lastoper2["text"] = ""
708
+
709
+ def _prodlist_select(self, event):
710
+ selection = self._prodlist.curselection()
711
+ if len(selection) != 1:
712
+ return
713
+ index = int(selection[0])
714
+ production = self._parser.reduce(self._productions[index])
715
+ if production:
716
+ self._lastoper1["text"] = "Reduce:"
717
+ self._lastoper2["text"] = "%s" % production
718
+ if self._animate.get():
719
+ self._animate_reduce()
720
+ else:
721
+ self._redraw()
722
+ else:
723
+ # Reset the production selections.
724
+ self._prodlist.selection_clear(0, "end")
725
+ for prod in self._parser.reducible_productions():
726
+ index = self._productions.index(prod)
727
+ self._prodlist.selection_set(index)
728
+
729
+ def _popup_reduce(self, widget):
730
+ # Remove old commands.
731
+ productions = self._parser.reducible_productions()
732
+ if len(productions) == 0:
733
+ return
734
+
735
+ self._reduce_menu.delete(0, "end")
736
+ for production in productions:
737
+ self._reduce_menu.add_command(label=str(production), command=self.reduce)
738
+ self._reduce_menu.post(
739
+ self._canvas.winfo_pointerx(), self._canvas.winfo_pointery()
740
+ )
741
+
742
+ #########################################
743
+ ## Animations
744
+ #########################################
745
+
746
+ def _animate_shift(self):
747
+ # What widget are we shifting?
748
+ widget = self._rtextwidgets[0]
749
+
750
+ # Where are we shifting from & to?
751
+ right = widget.bbox()[0]
752
+ if len(self._stackwidgets) == 0:
753
+ left = 5
754
+ else:
755
+ left = self._stackwidgets[-1].bbox()[2] + 10
756
+
757
+ # Start animating.
758
+ dt = self._animate.get()
759
+ dx = (left - right) * 1.0 / dt
760
+ self._animate_shift_frame(dt, widget, dx)
761
+
762
+ def _animate_shift_frame(self, frame, widget, dx):
763
+ if frame > 0:
764
+ self._animating_lock = 1
765
+ widget.move(dx, 0)
766
+ self._top.after(10, self._animate_shift_frame, frame - 1, widget, dx)
767
+ else:
768
+ # but: stacktop??
769
+
770
+ # Shift the widget to the stack.
771
+ del self._rtextwidgets[0]
772
+ self._stackwidgets.append(widget)
773
+ self._animating_lock = 0
774
+
775
+ # Display the available productions.
776
+ self._draw_stack_top(widget)
777
+ self._highlight_productions()
778
+
779
+ def _animate_reduce(self):
780
+ # What widgets are we shifting?
781
+ numwidgets = len(self._parser.stack()[-1]) # number of children
782
+ widgets = self._stackwidgets[-numwidgets:]
783
+
784
+ # How far are we moving?
785
+ if isinstance(widgets[0], TreeSegmentWidget):
786
+ ydist = 15 + widgets[0].label().height()
787
+ else:
788
+ ydist = 15 + widgets[0].height()
789
+
790
+ # Start animating.
791
+ dt = self._animate.get()
792
+ dy = ydist * 2.0 / dt
793
+ self._animate_reduce_frame(dt / 2, widgets, dy)
794
+
795
+ def _animate_reduce_frame(self, frame, widgets, dy):
796
+ if frame > 0:
797
+ self._animating_lock = 1
798
+ for widget in widgets:
799
+ widget.move(0, dy)
800
+ self._top.after(10, self._animate_reduce_frame, frame - 1, widgets, dy)
801
+ else:
802
+ del self._stackwidgets[-len(widgets) :]
803
+ for widget in widgets:
804
+ self._cframe.remove_widget(widget)
805
+ tok = self._parser.stack()[-1]
806
+ if not isinstance(tok, Tree):
807
+ raise ValueError()
808
+ label = TextWidget(
809
+ self._canvas, str(tok.label()), color="#006060", font=self._boldfont
810
+ )
811
+ widget = TreeSegmentWidget(self._canvas, label, widgets, width=2)
812
+ (x1, y1, x2, y2) = self._stacklabel.bbox()
813
+ y = y2 - y1 + 10
814
+ if not self._stackwidgets:
815
+ x = 5
816
+ else:
817
+ x = self._stackwidgets[-1].bbox()[2] + 10
818
+ self._cframe.add_widget(widget, x, y)
819
+ self._stackwidgets.append(widget)
820
+
821
+ # Display the available productions.
822
+ self._draw_stack_top(widget)
823
+ self._highlight_productions()
824
+
825
+ # # Delete the old widgets..
826
+ # del self._stackwidgets[-len(widgets):]
827
+ # for widget in widgets:
828
+ # self._cframe.destroy_widget(widget)
829
+ #
830
+ # # Make a new one.
831
+ # tok = self._parser.stack()[-1]
832
+ # if isinstance(tok, Tree):
833
+ # attribs = {'tree_color': '#4080a0', 'tree_width': 2,
834
+ # 'node_font': bold, 'node_color': '#006060',
835
+ # 'leaf_color': '#006060', 'leaf_font':self._font}
836
+ # widget = tree_to_treesegment(self._canvas, tok.type(),
837
+ # **attribs)
838
+ # widget.node()['color'] = '#000000'
839
+ # else:
840
+ # widget = TextWidget(self._canvas, tok.type(),
841
+ # color='#000000', font=self._font)
842
+ # widget.bind_click(self._popup_reduce)
843
+ # (x1, y1, x2, y2) = self._stacklabel.bbox()
844
+ # y = y2-y1+10
845
+ # if not self._stackwidgets: x = 5
846
+ # else: x = self._stackwidgets[-1].bbox()[2] + 10
847
+ # self._cframe.add_widget(widget, x, y)
848
+ # self._stackwidgets.append(widget)
849
+
850
+ # self._redraw()
851
+ self._animating_lock = 0
852
+
853
+ #########################################
854
+ ## Hovering.
855
+ #########################################
856
+
857
+ def _highlight_hover(self, event):
858
+ # What production are we hovering over?
859
+ index = self._prodlist.nearest(event.y)
860
+ if self._hover == index:
861
+ return
862
+
863
+ # Clear any previous hover highlighting.
864
+ self._clear_hover()
865
+
866
+ # If the production corresponds to an available reduction,
867
+ # highlight the stack.
868
+ selection = [int(s) for s in self._prodlist.curselection()]
869
+ if index in selection:
870
+ rhslen = len(self._productions[index].rhs())
871
+ for stackwidget in self._stackwidgets[-rhslen:]:
872
+ if isinstance(stackwidget, TreeSegmentWidget):
873
+ stackwidget.label()["color"] = "#00a000"
874
+ else:
875
+ stackwidget["color"] = "#00a000"
876
+
877
+ # Remember what production we're hovering over.
878
+ self._hover = index
879
+
880
+ def _clear_hover(self, *event):
881
+ # Clear any previous hover highlighting.
882
+ if self._hover == -1:
883
+ return
884
+ self._hover = -1
885
+ for stackwidget in self._stackwidgets:
886
+ if isinstance(stackwidget, TreeSegmentWidget):
887
+ stackwidget.label()["color"] = "black"
888
+ else:
889
+ stackwidget["color"] = "black"
890
+
891
+
892
+ def app():
893
+ """
894
+ Create a shift reduce parser app, using a simple grammar and
895
+ text.
896
+ """
897
+
898
+ from nltk.grammar import CFG, Nonterminal, Production
899
+
900
+ nonterminals = "S VP NP PP P N Name V Det"
901
+ (S, VP, NP, PP, P, N, Name, V, Det) = (Nonterminal(s) for s in nonterminals.split())
902
+
903
+ productions = (
904
+ # Syntactic Productions
905
+ Production(S, [NP, VP]),
906
+ Production(NP, [Det, N]),
907
+ Production(NP, [NP, PP]),
908
+ Production(VP, [VP, PP]),
909
+ Production(VP, [V, NP, PP]),
910
+ Production(VP, [V, NP]),
911
+ Production(PP, [P, NP]),
912
+ # Lexical Productions
913
+ Production(NP, ["I"]),
914
+ Production(Det, ["the"]),
915
+ Production(Det, ["a"]),
916
+ Production(N, ["man"]),
917
+ Production(V, ["saw"]),
918
+ Production(P, ["in"]),
919
+ Production(P, ["with"]),
920
+ Production(N, ["park"]),
921
+ Production(N, ["dog"]),
922
+ Production(N, ["statue"]),
923
+ Production(Det, ["my"]),
924
+ )
925
+
926
+ grammar = CFG(S, productions)
927
+
928
+ # tokenize the sentence
929
+ sent = "my dog saw a man in the park with a statue".split()
930
+
931
+ ShiftReduceApp(grammar, sent).mainloop()
932
+
933
+
934
+ if __name__ == "__main__":
935
+ app()
936
+
937
+ __all__ = ["app"]
pipeline/nltk/app/wordfreq_app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Wordfreq Application
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Sumukh Ghodke <[email protected]>
5
+ # URL: <https://www.nltk.org/>
6
+ # For license information, see LICENSE.TXT
7
+
8
+ from matplotlib import pylab
9
+
10
+ from nltk.corpus import gutenberg
11
+ from nltk.text import Text
12
+
13
+
14
+ def plot_word_freq_dist(text):
15
+ fd = text.vocab()
16
+
17
+ samples = [item for item, _ in fd.most_common(50)]
18
+ values = [fd[sample] for sample in samples]
19
+ values = [sum(values[: i + 1]) * 100.0 / fd.N() for i in range(len(values))]
20
+ pylab.title(text.name)
21
+ pylab.xlabel("Samples")
22
+ pylab.ylabel("Cumulative Percentage")
23
+ pylab.plot(values)
24
+ pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90)
25
+ pylab.show()
26
+
27
+
28
+ def app():
29
+ t1 = Text(gutenberg.words("melville-moby_dick.txt"))
30
+ plot_word_freq_dist(t1)
31
+
32
+
33
+ if __name__ == "__main__":
34
+ app()
35
+
36
+ __all__ = ["app"]
pipeline/nltk/app/wordnet_app.py ADDED
@@ -0,0 +1,1005 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: WordNet Browser Application
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Jussi Salmela <[email protected]>
5
+ # Paul Bone <[email protected]>
6
+ # URL: <https://www.nltk.org/>
7
+ # For license information, see LICENSE.TXT
8
+
9
+ """
10
+ A WordNet Browser application which launches the default browser
11
+ (if it is not already running) and opens a new tab with a connection
12
+ to http://localhost:port/ . It also starts an HTTP server on the
13
+ specified port and begins serving browser requests. The default
14
+ port is 8000. (For command-line help, run "python wordnet -h")
15
+ This application requires that the user's web browser supports
16
+ Javascript.
17
+
18
+ BrowServer is a server for browsing the NLTK Wordnet database It first
19
+ launches a browser client to be used for browsing and then starts
20
+ serving the requests of that and maybe other clients
21
+
22
+ Usage::
23
+
24
+ browserver.py -h
25
+ browserver.py [-s] [-p <port>]
26
+
27
+ Options::
28
+
29
+ -h or --help
30
+ Display this help message.
31
+
32
+ -l <file> or --log-file <file>
33
+ Logs messages to the given file, If this option is not specified
34
+ messages are silently dropped.
35
+
36
+ -p <port> or --port <port>
37
+ Run the web server on this TCP port, defaults to 8000.
38
+
39
+ -s or --server-mode
40
+ Do not start a web browser, and do not allow a user to
41
+ shutdown the server through the web interface.
42
+ """
43
+ # TODO: throughout this package variable names and docstrings need
44
+ # modifying to be compliant with NLTK's coding standards. Tests also
45
+ # need to be develop to ensure this continues to work in the face of
46
+ # changes to other NLTK packages.
47
+
48
+ import base64
49
+ import copy
50
+ import getopt
51
+ import io
52
+ import os
53
+ import pickle
54
+ import sys
55
+ import threading
56
+ import time
57
+ import webbrowser
58
+ from collections import defaultdict
59
+ from http.server import BaseHTTPRequestHandler, HTTPServer
60
+
61
+ # Allow this program to run inside the NLTK source tree.
62
+ from sys import argv
63
+ from urllib.parse import unquote_plus
64
+
65
+ from nltk.corpus import wordnet as wn
66
+ from nltk.corpus.reader.wordnet import Lemma, Synset
67
+
68
+ firstClient = True
69
+
70
+ # True if we're not also running a web browser. The value f server_mode
71
+ # gets set by demo().
72
+ server_mode = None
73
+
74
+ # If set this is a file object for writing log messages.
75
+ logfile = None
76
+
77
+
78
+ class MyServerHandler(BaseHTTPRequestHandler):
79
+ def do_HEAD(self):
80
+ self.send_head()
81
+
82
+ def do_GET(self):
83
+ global firstClient
84
+ sp = self.path[1:]
85
+ if unquote_plus(sp) == "SHUTDOWN THE SERVER":
86
+ if server_mode:
87
+ page = "Server must be killed with SIGTERM."
88
+ type = "text/plain"
89
+ else:
90
+ print("Server shutting down!")
91
+ os._exit(0)
92
+
93
+ elif sp == "": # First request.
94
+ type = "text/html"
95
+ if not server_mode and firstClient:
96
+ firstClient = False
97
+ page = get_static_index_page(True)
98
+ else:
99
+ page = get_static_index_page(False)
100
+ word = "green"
101
+
102
+ elif sp.endswith(".html"): # Trying to fetch a HTML file TODO:
103
+ type = "text/html"
104
+ usp = unquote_plus(sp)
105
+ if usp == "NLTK Wordnet Browser Database Info.html":
106
+ word = "* Database Info *"
107
+ if os.path.isfile(usp):
108
+ with open(usp) as infile:
109
+ page = infile.read()
110
+ else:
111
+ page = (
112
+ (html_header % word) + "<p>The database info file:"
113
+ "<p><b>"
114
+ + usp
115
+ + "</b>"
116
+ + "<p>was not found. Run this:"
117
+ + "<p><b>python dbinfo_html.py</b>"
118
+ + "<p>to produce it."
119
+ + html_trailer
120
+ )
121
+ else:
122
+ # Handle files here.
123
+ word = sp
124
+ try:
125
+ page = get_static_page_by_path(usp)
126
+ except FileNotFoundError:
127
+ page = "Internal error: Path for static page '%s' is unknown" % usp
128
+ # Set type to plain to prevent XSS by printing the path as HTML
129
+ type = "text/plain"
130
+ elif sp.startswith("search"):
131
+ # This doesn't seem to work with MWEs.
132
+ type = "text/html"
133
+ parts = (sp.split("?")[1]).split("&")
134
+ word = [
135
+ p.split("=")[1].replace("+", " ")
136
+ for p in parts
137
+ if p.startswith("nextWord")
138
+ ][0]
139
+ page, word = page_from_word(word)
140
+ elif sp.startswith("lookup_"):
141
+ # TODO add a variation of this that takes a non ecoded word or MWE.
142
+ type = "text/html"
143
+ sp = sp[len("lookup_") :]
144
+ page, word = page_from_href(sp)
145
+ elif sp == "start_page":
146
+ # if this is the first request we should display help
147
+ # information, and possibly set a default word.
148
+ type = "text/html"
149
+ page, word = page_from_word("wordnet")
150
+ else:
151
+ type = "text/plain"
152
+ page = "Could not parse request: '%s'" % sp
153
+
154
+ # Send result.
155
+ self.send_head(type)
156
+ self.wfile.write(page.encode("utf8"))
157
+
158
+ def send_head(self, type=None):
159
+ self.send_response(200)
160
+ self.send_header("Content-type", type)
161
+ self.end_headers()
162
+
163
+ def log_message(self, format, *args):
164
+ global logfile
165
+
166
+ if logfile:
167
+ logfile.write(
168
+ "%s - - [%s] %s\n"
169
+ % (self.address_string(), self.log_date_time_string(), format % args)
170
+ )
171
+
172
+
173
+ def get_unique_counter_from_url(sp):
174
+ """
175
+ Extract the unique counter from the URL if it has one. Otherwise return
176
+ null.
177
+ """
178
+ pos = sp.rfind("%23")
179
+ if pos != -1:
180
+ return int(sp[(pos + 3) :])
181
+ else:
182
+ return None
183
+
184
+
185
+ def wnb(port=8000, runBrowser=True, logfilename=None):
186
+ """
187
+ Run NLTK Wordnet Browser Server.
188
+
189
+ :param port: The port number for the server to listen on, defaults to
190
+ 8000
191
+ :type port: int
192
+
193
+ :param runBrowser: True to start a web browser and point it at the web
194
+ server.
195
+ :type runBrowser: bool
196
+ """
197
+ # The webbrowser module is unpredictable, typically it blocks if it uses
198
+ # a console web browser, and doesn't block if it uses a GUI webbrowser,
199
+ # so we need to force it to have a clear correct behaviour.
200
+ #
201
+ # Normally the server should run for as long as the user wants. they
202
+ # should idealy be able to control this from the UI by closing the
203
+ # window or tab. Second best would be clicking a button to say
204
+ # 'Shutdown' that first shutsdown the server and closes the window or
205
+ # tab, or exits the text-mode browser. Both of these are unfreasable.
206
+ #
207
+ # The next best alternative is to start the server, have it close when
208
+ # it receives SIGTERM (default), and run the browser as well. The user
209
+ # may have to shutdown both programs.
210
+ #
211
+ # Since webbrowser may block, and the webserver will block, we must run
212
+ # them in separate threads.
213
+ #
214
+ global server_mode, logfile
215
+ server_mode = not runBrowser
216
+
217
+ # Setup logging.
218
+ if logfilename:
219
+ try:
220
+ logfile = open(logfilename, "a", 1) # 1 means 'line buffering'
221
+ except OSError as e:
222
+ sys.stderr.write("Couldn't open %s for writing: %s", logfilename, e)
223
+ sys.exit(1)
224
+ else:
225
+ logfile = None
226
+
227
+ # Compute URL and start web browser
228
+ url = "http://localhost:" + str(port)
229
+
230
+ server_ready = None
231
+ browser_thread = None
232
+
233
+ if runBrowser:
234
+ server_ready = threading.Event()
235
+ browser_thread = startBrowser(url, server_ready)
236
+
237
+ # Start the server.
238
+ server = HTTPServer(("", port), MyServerHandler)
239
+ if logfile:
240
+ logfile.write("NLTK Wordnet browser server running serving: %s\n" % url)
241
+ if runBrowser:
242
+ server_ready.set()
243
+
244
+ try:
245
+ server.serve_forever()
246
+ except KeyboardInterrupt:
247
+ pass
248
+
249
+ if runBrowser:
250
+ browser_thread.join()
251
+
252
+ if logfile:
253
+ logfile.close()
254
+
255
+
256
+ def startBrowser(url, server_ready):
257
+ def run():
258
+ server_ready.wait()
259
+ time.sleep(1) # Wait a little bit more, there's still the chance of
260
+ # a race condition.
261
+ webbrowser.open(url, new=2, autoraise=1)
262
+
263
+ t = threading.Thread(target=run)
264
+ t.start()
265
+ return t
266
+
267
+
268
+ #####################################################################
269
+ # Utilities
270
+ #####################################################################
271
+
272
+
273
+ """
274
+ WordNet Browser Utilities.
275
+
276
+ This provides a backend to both wxbrowse and browserver.py.
277
+ """
278
+
279
+ ################################################################################
280
+ #
281
+ # Main logic for wordnet browser.
282
+ #
283
+
284
+ # This is wrapped inside a function since wn is only available if the
285
+ # WordNet corpus is installed.
286
+ def _pos_tuples():
287
+ return [
288
+ (wn.NOUN, "N", "noun"),
289
+ (wn.VERB, "V", "verb"),
290
+ (wn.ADJ, "J", "adj"),
291
+ (wn.ADV, "R", "adv"),
292
+ ]
293
+
294
+
295
+ def _pos_match(pos_tuple):
296
+ """
297
+ This function returns the complete pos tuple for the partial pos
298
+ tuple given to it. It attempts to match it against the first
299
+ non-null component of the given pos tuple.
300
+ """
301
+ if pos_tuple[0] == "s":
302
+ pos_tuple = ("a", pos_tuple[1], pos_tuple[2])
303
+ for n, x in enumerate(pos_tuple):
304
+ if x is not None:
305
+ break
306
+ for pt in _pos_tuples():
307
+ if pt[n] == pos_tuple[n]:
308
+ return pt
309
+ return None
310
+
311
+
312
+ HYPONYM = 0
313
+ HYPERNYM = 1
314
+ CLASS_REGIONAL = 2
315
+ PART_HOLONYM = 3
316
+ PART_MERONYM = 4
317
+ ATTRIBUTE = 5
318
+ SUBSTANCE_HOLONYM = 6
319
+ SUBSTANCE_MERONYM = 7
320
+ MEMBER_HOLONYM = 8
321
+ MEMBER_MERONYM = 9
322
+ VERB_GROUP = 10
323
+ INSTANCE_HYPONYM = 12
324
+ INSTANCE_HYPERNYM = 13
325
+ CAUSE = 14
326
+ ALSO_SEE = 15
327
+ SIMILAR = 16
328
+ ENTAILMENT = 17
329
+ ANTONYM = 18
330
+ FRAMES = 19
331
+ PERTAINYM = 20
332
+
333
+ CLASS_CATEGORY = 21
334
+ CLASS_USAGE = 22
335
+ CLASS_REGIONAL = 23
336
+ CLASS_USAGE = 24
337
+ CLASS_CATEGORY = 11
338
+
339
+ DERIVATIONALLY_RELATED_FORM = 25
340
+
341
+ INDIRECT_HYPERNYMS = 26
342
+
343
+
344
+ def lemma_property(word, synset, func):
345
+ def flattern(l):
346
+ if l == []:
347
+ return []
348
+ else:
349
+ return l[0] + flattern(l[1:])
350
+
351
+ return flattern([func(l) for l in synset.lemmas() if l.name == word])
352
+
353
+
354
+ def rebuild_tree(orig_tree):
355
+ node = orig_tree[0]
356
+ children = orig_tree[1:]
357
+ return (node, [rebuild_tree(t) for t in children])
358
+
359
+
360
+ def get_relations_data(word, synset):
361
+ """
362
+ Get synset relations data for a synset. Note that this doesn't
363
+ yet support things such as full hyponym vs direct hyponym.
364
+ """
365
+ if synset.pos() == wn.NOUN:
366
+ return (
367
+ (HYPONYM, "Hyponyms", synset.hyponyms()),
368
+ (INSTANCE_HYPONYM, "Instance hyponyms", synset.instance_hyponyms()),
369
+ (HYPERNYM, "Direct hypernyms", synset.hypernyms()),
370
+ (
371
+ INDIRECT_HYPERNYMS,
372
+ "Indirect hypernyms",
373
+ rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1],
374
+ ),
375
+ # hypernyms', 'Sister terms',
376
+ (INSTANCE_HYPERNYM, "Instance hypernyms", synset.instance_hypernyms()),
377
+ # (CLASS_REGIONAL, ['domain term region'], ),
378
+ (PART_HOLONYM, "Part holonyms", synset.part_holonyms()),
379
+ (PART_MERONYM, "Part meronyms", synset.part_meronyms()),
380
+ (SUBSTANCE_HOLONYM, "Substance holonyms", synset.substance_holonyms()),
381
+ (SUBSTANCE_MERONYM, "Substance meronyms", synset.substance_meronyms()),
382
+ (MEMBER_HOLONYM, "Member holonyms", synset.member_holonyms()),
383
+ (MEMBER_MERONYM, "Member meronyms", synset.member_meronyms()),
384
+ (ATTRIBUTE, "Attributes", synset.attributes()),
385
+ (ANTONYM, "Antonyms", lemma_property(word, synset, lambda l: l.antonyms())),
386
+ (
387
+ DERIVATIONALLY_RELATED_FORM,
388
+ "Derivationally related form",
389
+ lemma_property(
390
+ word, synset, lambda l: l.derivationally_related_forms()
391
+ ),
392
+ ),
393
+ )
394
+ elif synset.pos() == wn.VERB:
395
+ return (
396
+ (ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())),
397
+ (HYPONYM, "Hyponym", synset.hyponyms()),
398
+ (HYPERNYM, "Direct hypernyms", synset.hypernyms()),
399
+ (
400
+ INDIRECT_HYPERNYMS,
401
+ "Indirect hypernyms",
402
+ rebuild_tree(synset.tree(lambda x: x.hypernyms()))[1],
403
+ ),
404
+ (ENTAILMENT, "Entailments", synset.entailments()),
405
+ (CAUSE, "Causes", synset.causes()),
406
+ (ALSO_SEE, "Also see", synset.also_sees()),
407
+ (VERB_GROUP, "Verb Groups", synset.verb_groups()),
408
+ (
409
+ DERIVATIONALLY_RELATED_FORM,
410
+ "Derivationally related form",
411
+ lemma_property(
412
+ word, synset, lambda l: l.derivationally_related_forms()
413
+ ),
414
+ ),
415
+ )
416
+ elif synset.pos() == wn.ADJ or synset.pos == wn.ADJ_SAT:
417
+ return (
418
+ (ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())),
419
+ (SIMILAR, "Similar to", synset.similar_tos()),
420
+ # Participle of verb - not supported by corpus
421
+ (
422
+ PERTAINYM,
423
+ "Pertainyms",
424
+ lemma_property(word, synset, lambda l: l.pertainyms()),
425
+ ),
426
+ (ATTRIBUTE, "Attributes", synset.attributes()),
427
+ (ALSO_SEE, "Also see", synset.also_sees()),
428
+ )
429
+ elif synset.pos() == wn.ADV:
430
+ # This is weird. adverbs such as 'quick' and 'fast' don't seem
431
+ # to have antonyms returned by the corpus.a
432
+ return (
433
+ (ANTONYM, "Antonym", lemma_property(word, synset, lambda l: l.antonyms())),
434
+ )
435
+ # Derived from adjective - not supported by corpus
436
+ else:
437
+ raise TypeError("Unhandles synset POS type: " + str(synset.pos()))
438
+
439
+
440
+ html_header = """
441
+ <!DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
442
+ 'http://www.w3.org/TR/html4/strict.dtd'>
443
+ <html>
444
+ <head>
445
+ <meta name='generator' content=
446
+ 'HTML Tidy for Windows (vers 14 February 2006), see www.w3.org'>
447
+ <meta http-equiv='Content-Type' content=
448
+ 'text/html; charset=us-ascii'>
449
+ <title>NLTK Wordnet Browser display of: %s</title></head>
450
+ <body bgcolor='#F5F5F5' text='#000000'>
451
+ """
452
+ html_trailer = """
453
+ </body>
454
+ </html>
455
+ """
456
+
457
+ explanation = """
458
+ <h3>Search Help</h3>
459
+ <ul><li>The display below the line is an example of the output the browser
460
+ shows you when you enter a search word. The search word was <b>green</b>.</li>
461
+ <li>The search result shows for different parts of speech the <b>synsets</b>
462
+ i.e. different meanings for the word.</li>
463
+ <li>All underlined texts are hypertext links. There are two types of links:
464
+ word links and others. Clicking a word link carries out a search for the word
465
+ in the Wordnet database.</li>
466
+ <li>Clicking a link of the other type opens a display section of data attached
467
+ to that link. Clicking that link a second time closes the section again.</li>
468
+ <li>Clicking <u>S:</u> opens a section showing the relations for that synset.
469
+ </li>
470
+ <li>Clicking on a relation name opens a section that displays the associated
471
+ synsets.</li>
472
+ <li>Type a search word in the <b>Word</b> field and start the search by the
473
+ <b>Enter/Return</b> key or click the <b>Search</b> button.</li>
474
+ </ul>
475
+ <hr width='100%'>
476
+ """
477
+
478
+ # HTML oriented functions
479
+
480
+
481
+ def _bold(txt):
482
+ return "<b>%s</b>" % txt
483
+
484
+
485
+ def _center(txt):
486
+ return "<center>%s</center>" % txt
487
+
488
+
489
+ def _hlev(n, txt):
490
+ return "<h%d>%s</h%d>" % (n, txt, n)
491
+
492
+
493
+ def _italic(txt):
494
+ return "<i>%s</i>" % txt
495
+
496
+
497
+ def _li(txt):
498
+ return "<li>%s</li>" % txt
499
+
500
+
501
+ def pg(word, body):
502
+ """
503
+ Return a HTML page of NLTK Browser format constructed from the
504
+ word and body
505
+
506
+ :param word: The word that the body corresponds to
507
+ :type word: str
508
+ :param body: The HTML body corresponding to the word
509
+ :type body: str
510
+ :return: a HTML page for the word-body combination
511
+ :rtype: str
512
+ """
513
+ return (html_header % word) + body + html_trailer
514
+
515
+
516
+ def _ul(txt):
517
+ return "<ul>" + txt + "</ul>"
518
+
519
+
520
+ def _abbc(txt):
521
+ """
522
+ abbc = asterisks, breaks, bold, center
523
+ """
524
+ return _center(_bold("<br>" * 10 + "*" * 10 + " " + txt + " " + "*" * 10))
525
+
526
+
527
+ full_hyponym_cont_text = _ul(_li(_italic("(has full hyponym continuation)"))) + "\n"
528
+
529
+
530
+ def _get_synset(synset_key):
531
+ """
532
+ The synset key is the unique name of the synset, this can be
533
+ retrieved via synset.name()
534
+ """
535
+ return wn.synset(synset_key)
536
+
537
+
538
+ def _collect_one_synset(word, synset, synset_relations):
539
+ """
540
+ Returns the HTML string for one synset or word
541
+
542
+ :param word: the current word
543
+ :type word: str
544
+ :param synset: a synset
545
+ :type synset: synset
546
+ :param synset_relations: information about which synset relations
547
+ to display.
548
+ :type synset_relations: dict(synset_key, set(relation_id))
549
+ :return: The HTML string built for this synset
550
+ :rtype: str
551
+ """
552
+ if isinstance(synset, tuple): # It's a word
553
+ raise NotImplementedError("word not supported by _collect_one_synset")
554
+
555
+ typ = "S"
556
+ pos_tuple = _pos_match((synset.pos(), None, None))
557
+ assert pos_tuple is not None, "pos_tuple is null: synset.pos(): %s" % synset.pos()
558
+ descr = pos_tuple[2]
559
+ ref = copy.deepcopy(Reference(word, synset_relations))
560
+ ref.toggle_synset(synset)
561
+ synset_label = typ + ";"
562
+ if synset.name() in synset_relations:
563
+ synset_label = _bold(synset_label)
564
+ s = f"<li>{make_lookup_link(ref, synset_label)} ({descr}) "
565
+
566
+ def format_lemma(w):
567
+ w = w.replace("_", " ")
568
+ if w.lower() == word:
569
+ return _bold(w)
570
+ else:
571
+ ref = Reference(w)
572
+ return make_lookup_link(ref, w)
573
+
574
+ s += ", ".join(format_lemma(l.name()) for l in synset.lemmas())
575
+
576
+ gl = " ({}) <i>{}</i> ".format(
577
+ synset.definition(),
578
+ "; ".join('"%s"' % e for e in synset.examples()),
579
+ )
580
+ return s + gl + _synset_relations(word, synset, synset_relations) + "</li>\n"
581
+
582
+
583
+ def _collect_all_synsets(word, pos, synset_relations=dict()):
584
+ """
585
+ Return a HTML unordered list of synsets for the given word and
586
+ part of speech.
587
+ """
588
+ return "<ul>%s\n</ul>\n" % "".join(
589
+ _collect_one_synset(word, synset, synset_relations)
590
+ for synset in wn.synsets(word, pos)
591
+ )
592
+
593
+
594
+ def _synset_relations(word, synset, synset_relations):
595
+ """
596
+ Builds the HTML string for the relations of a synset
597
+
598
+ :param word: The current word
599
+ :type word: str
600
+ :param synset: The synset for which we're building the relations.
601
+ :type synset: Synset
602
+ :param synset_relations: synset keys and relation types for which to display relations.
603
+ :type synset_relations: dict(synset_key, set(relation_type))
604
+ :return: The HTML for a synset's relations
605
+ :rtype: str
606
+ """
607
+
608
+ if not synset.name() in synset_relations:
609
+ return ""
610
+ ref = Reference(word, synset_relations)
611
+
612
+ def relation_html(r):
613
+ if isinstance(r, Synset):
614
+ return make_lookup_link(Reference(r.lemma_names()[0]), r.lemma_names()[0])
615
+ elif isinstance(r, Lemma):
616
+ return relation_html(r.synset())
617
+ elif isinstance(r, tuple):
618
+ # It's probably a tuple containing a Synset and a list of
619
+ # similar tuples. This forms a tree of synsets.
620
+ return "{}\n<ul>{}</ul>\n".format(
621
+ relation_html(r[0]),
622
+ "".join("<li>%s</li>\n" % relation_html(sr) for sr in r[1]),
623
+ )
624
+ else:
625
+ raise TypeError(
626
+ "r must be a synset, lemma or list, it was: type(r) = %s, r = %s"
627
+ % (type(r), r)
628
+ )
629
+
630
+ def make_synset_html(db_name, disp_name, rels):
631
+ synset_html = "<i>%s</i>\n" % make_lookup_link(
632
+ copy.deepcopy(ref).toggle_synset_relation(synset, db_name),
633
+ disp_name,
634
+ )
635
+
636
+ if db_name in ref.synset_relations[synset.name()]:
637
+ synset_html += "<ul>%s</ul>\n" % "".join(
638
+ "<li>%s</li>\n" % relation_html(r) for r in rels
639
+ )
640
+
641
+ return synset_html
642
+
643
+ html = (
644
+ "<ul>"
645
+ + "\n".join(
646
+ "<li>%s</li>" % make_synset_html(*rel_data)
647
+ for rel_data in get_relations_data(word, synset)
648
+ if rel_data[2] != []
649
+ )
650
+ + "</ul>"
651
+ )
652
+
653
+ return html
654
+
655
+
656
+ class RestrictedUnpickler(pickle.Unpickler):
657
+ """
658
+ Unpickler that prevents any class or function from being used during loading.
659
+ """
660
+
661
+ def find_class(self, module, name):
662
+ # Forbid every function
663
+ raise pickle.UnpicklingError(f"global '{module}.{name}' is forbidden")
664
+
665
+
666
+ class Reference:
667
+ """
668
+ A reference to a page that may be generated by page_word
669
+ """
670
+
671
+ def __init__(self, word, synset_relations=dict()):
672
+ """
673
+ Build a reference to a new page.
674
+
675
+ word is the word or words (separated by commas) for which to
676
+ search for synsets of
677
+
678
+ synset_relations is a dictionary of synset keys to sets of
679
+ synset relation identifaiers to unfold a list of synset
680
+ relations for.
681
+ """
682
+ self.word = word
683
+ self.synset_relations = synset_relations
684
+
685
+ def encode(self):
686
+ """
687
+ Encode this reference into a string to be used in a URL.
688
+ """
689
+ # This uses a tuple rather than an object since the python
690
+ # pickle representation is much smaller and there is no need
691
+ # to represent the complete object.
692
+ string = pickle.dumps((self.word, self.synset_relations), -1)
693
+ return base64.urlsafe_b64encode(string).decode()
694
+
695
+ @staticmethod
696
+ def decode(string):
697
+ """
698
+ Decode a reference encoded with Reference.encode
699
+ """
700
+ string = base64.urlsafe_b64decode(string.encode())
701
+ word, synset_relations = RestrictedUnpickler(io.BytesIO(string)).load()
702
+ return Reference(word, synset_relations)
703
+
704
+ def toggle_synset_relation(self, synset, relation):
705
+ """
706
+ Toggle the display of the relations for the given synset and
707
+ relation type.
708
+
709
+ This function will throw a KeyError if the synset is currently
710
+ not being displayed.
711
+ """
712
+ if relation in self.synset_relations[synset.name()]:
713
+ self.synset_relations[synset.name()].remove(relation)
714
+ else:
715
+ self.synset_relations[synset.name()].add(relation)
716
+
717
+ return self
718
+
719
+ def toggle_synset(self, synset):
720
+ """
721
+ Toggle displaying of the relation types for the given synset
722
+ """
723
+ if synset.name() in self.synset_relations:
724
+ del self.synset_relations[synset.name()]
725
+ else:
726
+ self.synset_relations[synset.name()] = set()
727
+
728
+ return self
729
+
730
+
731
+ def make_lookup_link(ref, label):
732
+ return f'<a href="lookup_{ref.encode()}">{label}</a>'
733
+
734
+
735
+ def page_from_word(word):
736
+ """
737
+ Return a HTML page for the given word.
738
+
739
+ :type word: str
740
+ :param word: The currently active word
741
+ :return: A tuple (page,word), where page is the new current HTML page
742
+ to be sent to the browser and
743
+ word is the new current word
744
+ :rtype: A tuple (str,str)
745
+ """
746
+ return page_from_reference(Reference(word))
747
+
748
+
749
+ def page_from_href(href):
750
+ """
751
+ Returns a tuple of the HTML page built and the new current word
752
+
753
+ :param href: The hypertext reference to be solved
754
+ :type href: str
755
+ :return: A tuple (page,word), where page is the new current HTML page
756
+ to be sent to the browser and
757
+ word is the new current word
758
+ :rtype: A tuple (str,str)
759
+ """
760
+ return page_from_reference(Reference.decode(href))
761
+
762
+
763
+ def page_from_reference(href):
764
+ """
765
+ Returns a tuple of the HTML page built and the new current word
766
+
767
+ :param href: The hypertext reference to be solved
768
+ :type href: str
769
+ :return: A tuple (page,word), where page is the new current HTML page
770
+ to be sent to the browser and
771
+ word is the new current word
772
+ :rtype: A tuple (str,str)
773
+ """
774
+ word = href.word
775
+ pos_forms = defaultdict(list)
776
+ words = word.split(",")
777
+ words = [w for w in [w.strip().lower().replace(" ", "_") for w in words] if w != ""]
778
+ if len(words) == 0:
779
+ # No words were found.
780
+ return "", "Please specify a word to search for."
781
+
782
+ # This looks up multiple words at once. This is probably not
783
+ # necessary and may lead to problems.
784
+ for w in words:
785
+ for pos in [wn.NOUN, wn.VERB, wn.ADJ, wn.ADV]:
786
+ form = wn.morphy(w, pos)
787
+ if form and form not in pos_forms[pos]:
788
+ pos_forms[pos].append(form)
789
+ body = ""
790
+ for pos, pos_str, name in _pos_tuples():
791
+ if pos in pos_forms:
792
+ body += _hlev(3, name) + "\n"
793
+ for w in pos_forms[pos]:
794
+ # Not all words of exc files are in the database, skip
795
+ # to the next word if a KeyError is raised.
796
+ try:
797
+ body += _collect_all_synsets(w, pos, href.synset_relations)
798
+ except KeyError:
799
+ pass
800
+ if not body:
801
+ body = "The word or words '%s' were not found in the dictionary." % word
802
+ return body, word
803
+
804
+
805
+ #####################################################################
806
+ # Static pages
807
+ #####################################################################
808
+
809
+
810
+ def get_static_page_by_path(path):
811
+ """
812
+ Return a static HTML page from the path given.
813
+ """
814
+ if path == "index_2.html":
815
+ return get_static_index_page(False)
816
+ elif path == "index.html":
817
+ return get_static_index_page(True)
818
+ elif path == "NLTK Wordnet Browser Database Info.html":
819
+ return "Display of Wordnet Database Statistics is not supported"
820
+ elif path == "upper_2.html":
821
+ return get_static_upper_page(False)
822
+ elif path == "upper.html":
823
+ return get_static_upper_page(True)
824
+ elif path == "web_help.html":
825
+ return get_static_web_help_page()
826
+ elif path == "wx_help.html":
827
+ return get_static_wx_help_page()
828
+ raise FileNotFoundError()
829
+
830
+
831
+ def get_static_web_help_page():
832
+ """
833
+ Return the static web help page.
834
+ """
835
+ return """
836
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
837
+ <html>
838
+ <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
839
+ Copyright (C) 2001-2023 NLTK Project
840
+ Author: Jussi Salmela <[email protected]>
841
+ URL: <https://www.nltk.org/>
842
+ For license information, see LICENSE.TXT -->
843
+ <head>
844
+ <meta http-equiv='Content-Type' content='text/html; charset=us-ascii'>
845
+ <title>NLTK Wordnet Browser display of: * Help *</title>
846
+ </head>
847
+ <body bgcolor='#F5F5F5' text='#000000'>
848
+ <h2>NLTK Wordnet Browser Help</h2>
849
+ <p>The NLTK Wordnet Browser is a tool to use in browsing the Wordnet database. It tries to behave like the Wordnet project's web browser but the difference is that the NLTK Wordnet Browser uses a local Wordnet database.
850
+ <p><b>You are using the Javascript client part of the NLTK Wordnet BrowseServer.</b> We assume your browser is in tab sheets enabled mode.</p>
851
+ <p>For background information on Wordnet, see the Wordnet project home page: <a href="https://wordnet.princeton.edu/"><b> https://wordnet.princeton.edu/</b></a>. For more information on the NLTK project, see the project home:
852
+ <a href="https://www.nltk.org/"><b>https://www.nltk.org/</b></a>. To get an idea of what the Wordnet version used by this browser includes choose <b>Show Database Info</b> from the <b>View</b> submenu.</p>
853
+ <h3>Word search</h3>
854
+ <p>The word to be searched is typed into the <b>New Word</b> field and the search started with Enter or by clicking the <b>Search</b> button. There is no uppercase/lowercase distinction: the search word is transformed to lowercase before the search.</p>
855
+ <p>In addition, the word does not have to be in base form. The browser tries to find the possible base form(s) by making certain morphological substitutions. Typing <b>fLIeS</b> as an obscure example gives one <a href="MfLIeS">this</a>. Click the previous link to see what this kind of search looks like and then come back to this page by using the <b>Alt+LeftArrow</b> key combination.</p>
856
+ <p>The result of a search is a display of one or more
857
+ <b>synsets</b> for every part of speech in which a form of the
858
+ search word was found to occur. A synset is a set of words
859
+ having the same sense or meaning. Each word in a synset that is
860
+ underlined is a hyperlink which can be clicked to trigger an
861
+ automatic search for that word.</p>
862
+ <p>Every synset has a hyperlink <b>S:</b> at the start of its
863
+ display line. Clicking that symbol shows you the name of every
864
+ <b>relation</b> that this synset is part of. Every relation name is a hyperlink that opens up a display for that relation. Clicking it another time closes the display again. Clicking another relation name on a line that has an opened relation closes the open relation and opens the clicked relation.</p>
865
+ <p>It is also possible to give two or more words or collocations to be searched at the same time separating them with a comma like this <a href="Mcheer up,clear up">cheer up,clear up</a>, for example. Click the previous link to see what this kind of search looks like and then come back to this page by using the <b>Alt+LeftArrow</b> key combination. As you could see the search result includes the synsets found in the same order than the forms were given in the search field.</p>
866
+ <p>
867
+ There are also word level (lexical) relations recorded in the Wordnet database. Opening this kind of relation displays lines with a hyperlink <b>W:</b> at their beginning. Clicking this link shows more info on the word in question.</p>
868
+ <h3>The Buttons</h3>
869
+ <p>The <b>Search</b> and <b>Help</b> buttons need no more explanation. </p>
870
+ <p>The <b>Show Database Info</b> button shows a collection of Wordnet database statistics.</p>
871
+ <p>The <b>Shutdown the Server</b> button is shown for the first client of the BrowServer program i.e. for the client that is automatically launched when the BrowServer is started but not for the succeeding clients in order to protect the server from accidental shutdowns.
872
+ </p></body>
873
+ </html>
874
+ """
875
+
876
+
877
+ def get_static_welcome_message():
878
+ """
879
+ Get the static welcome page.
880
+ """
881
+ return """
882
+ <h3>Search Help</h3>
883
+ <ul><li>The display below the line is an example of the output the browser
884
+ shows you when you enter a search word. The search word was <b>green</b>.</li>
885
+ <li>The search result shows for different parts of speech the <b>synsets</b>
886
+ i.e. different meanings for the word.</li>
887
+ <li>All underlined texts are hypertext links. There are two types of links:
888
+ word links and others. Clicking a word link carries out a search for the word
889
+ in the Wordnet database.</li>
890
+ <li>Clicking a link of the other type opens a display section of data attached
891
+ to that link. Clicking that link a second time closes the section again.</li>
892
+ <li>Clicking <u>S:</u> opens a section showing the relations for that synset.</li>
893
+ <li>Clicking on a relation name opens a section that displays the associated
894
+ synsets.</li>
895
+ <li>Type a search word in the <b>Next Word</b> field and start the search by the
896
+ <b>Enter/Return</b> key or click the <b>Search</b> button.</li>
897
+ </ul>
898
+ """
899
+
900
+
901
+ def get_static_index_page(with_shutdown):
902
+ """
903
+ Get the static index page.
904
+ """
905
+ template = """
906
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">
907
+ <HTML>
908
+ <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
909
+ Copyright (C) 2001-2023 NLTK Project
910
+ Author: Jussi Salmela <[email protected]>
911
+ URL: <https://www.nltk.org/>
912
+ For license information, see LICENSE.TXT -->
913
+ <HEAD>
914
+ <TITLE>NLTK Wordnet Browser</TITLE>
915
+ </HEAD>
916
+
917
+ <frameset rows="7%%,93%%">
918
+ <frame src="%s" name="header">
919
+ <frame src="start_page" name="body">
920
+ </frameset>
921
+ </HTML>
922
+ """
923
+ if with_shutdown:
924
+ upper_link = "upper.html"
925
+ else:
926
+ upper_link = "upper_2.html"
927
+
928
+ return template % upper_link
929
+
930
+
931
+ def get_static_upper_page(with_shutdown):
932
+ """
933
+ Return the upper frame page,
934
+
935
+ If with_shutdown is True then a 'shutdown' button is also provided
936
+ to shutdown the server.
937
+ """
938
+ template = """
939
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
940
+ <html>
941
+ <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
942
+ Copyright (C) 2001-2023 NLTK Project
943
+ Author: Jussi Salmela <[email protected]>
944
+ URL: <https://www.nltk.org/>
945
+ For license information, see LICENSE.TXT -->
946
+ <head>
947
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
948
+ <title>Untitled Document</title>
949
+ </head>
950
+ <body>
951
+ <form method="GET" action="search" target="body">
952
+ Current Word:&nbsp;<input type="text" id="currentWord" size="10" disabled>
953
+ Next Word:&nbsp;<input type="text" id="nextWord" name="nextWord" size="10">
954
+ <input name="searchButton" type="submit" value="Search">
955
+ </form>
956
+ <a target="body" href="web_help.html">Help</a>
957
+ %s
958
+
959
+ </body>
960
+ </html>
961
+ """
962
+ if with_shutdown:
963
+ shutdown_link = '<a href="SHUTDOWN THE SERVER">Shutdown</a>'
964
+ else:
965
+ shutdown_link = ""
966
+
967
+ return template % shutdown_link
968
+
969
+
970
+ def usage():
971
+ """
972
+ Display the command line help message.
973
+ """
974
+ print(__doc__)
975
+
976
+
977
+ def app():
978
+ # Parse and interpret options.
979
+ (opts, _) = getopt.getopt(
980
+ argv[1:], "l:p:sh", ["logfile=", "port=", "server-mode", "help"]
981
+ )
982
+ port = 8000
983
+ server_mode = False
984
+ help_mode = False
985
+ logfilename = None
986
+ for (opt, value) in opts:
987
+ if (opt == "-l") or (opt == "--logfile"):
988
+ logfilename = str(value)
989
+ elif (opt == "-p") or (opt == "--port"):
990
+ port = int(value)
991
+ elif (opt == "-s") or (opt == "--server-mode"):
992
+ server_mode = True
993
+ elif (opt == "-h") or (opt == "--help"):
994
+ help_mode = True
995
+
996
+ if help_mode:
997
+ usage()
998
+ else:
999
+ wnb(port, not server_mode, logfilename)
1000
+
1001
+
1002
+ if __name__ == "__main__":
1003
+ app()
1004
+
1005
+ __all__ = ["app"]
pipeline/nltk/book.py ADDED
@@ -0,0 +1,213 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Some texts for exploration in chapter 1 of the book
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Steven Bird <[email protected]>
5
+ #
6
+ # URL: <https://www.nltk.org/>
7
+ # For license information, see LICENSE.TXT
8
+
9
+ from nltk.corpus import (
10
+ genesis,
11
+ gutenberg,
12
+ inaugural,
13
+ nps_chat,
14
+ treebank,
15
+ webtext,
16
+ wordnet,
17
+ )
18
+ from nltk.probability import FreqDist
19
+ from nltk.text import Text
20
+ from nltk.util import bigrams
21
+
22
+ print("*** Introductory Examples for the NLTK Book ***")
23
+ print("Loading text1, ..., text9 and sent1, ..., sent9")
24
+ print("Type the name of the text or sentence to view it.")
25
+ print("Type: 'texts()' or 'sents()' to list the materials.")
26
+
27
+ text1 = Text(gutenberg.words("melville-moby_dick.txt"))
28
+ print("text1:", text1.name)
29
+
30
+ text2 = Text(gutenberg.words("austen-sense.txt"))
31
+ print("text2:", text2.name)
32
+
33
+ text3 = Text(genesis.words("english-kjv.txt"), name="The Book of Genesis")
34
+ print("text3:", text3.name)
35
+
36
+ text4 = Text(inaugural.words(), name="Inaugural Address Corpus")
37
+ print("text4:", text4.name)
38
+
39
+ text5 = Text(nps_chat.words(), name="Chat Corpus")
40
+ print("text5:", text5.name)
41
+
42
+ text6 = Text(webtext.words("grail.txt"), name="Monty Python and the Holy Grail")
43
+ print("text6:", text6.name)
44
+
45
+ text7 = Text(treebank.words(), name="Wall Street Journal")
46
+ print("text7:", text7.name)
47
+
48
+ text8 = Text(webtext.words("singles.txt"), name="Personals Corpus")
49
+ print("text8:", text8.name)
50
+
51
+ text9 = Text(gutenberg.words("chesterton-thursday.txt"))
52
+ print("text9:", text9.name)
53
+
54
+
55
+ def texts():
56
+ print("text1:", text1.name)
57
+ print("text2:", text2.name)
58
+ print("text3:", text3.name)
59
+ print("text4:", text4.name)
60
+ print("text5:", text5.name)
61
+ print("text6:", text6.name)
62
+ print("text7:", text7.name)
63
+ print("text8:", text8.name)
64
+ print("text9:", text9.name)
65
+
66
+
67
+ sent1 = ["Call", "me", "Ishmael", "."]
68
+ sent2 = [
69
+ "The",
70
+ "family",
71
+ "of",
72
+ "Dashwood",
73
+ "had",
74
+ "long",
75
+ "been",
76
+ "settled",
77
+ "in",
78
+ "Sussex",
79
+ ".",
80
+ ]
81
+ sent3 = [
82
+ "In",
83
+ "the",
84
+ "beginning",
85
+ "God",
86
+ "created",
87
+ "the",
88
+ "heaven",
89
+ "and",
90
+ "the",
91
+ "earth",
92
+ ".",
93
+ ]
94
+ sent4 = [
95
+ "Fellow",
96
+ "-",
97
+ "Citizens",
98
+ "of",
99
+ "the",
100
+ "Senate",
101
+ "and",
102
+ "of",
103
+ "the",
104
+ "House",
105
+ "of",
106
+ "Representatives",
107
+ ":",
108
+ ]
109
+ sent5 = [
110
+ "I",
111
+ "have",
112
+ "a",
113
+ "problem",
114
+ "with",
115
+ "people",
116
+ "PMing",
117
+ "me",
118
+ "to",
119
+ "lol",
120
+ "JOIN",
121
+ ]
122
+ sent6 = [
123
+ "SCENE",
124
+ "1",
125
+ ":",
126
+ "[",
127
+ "wind",
128
+ "]",
129
+ "[",
130
+ "clop",
131
+ "clop",
132
+ "clop",
133
+ "]",
134
+ "KING",
135
+ "ARTHUR",
136
+ ":",
137
+ "Whoa",
138
+ "there",
139
+ "!",
140
+ ]
141
+ sent7 = [
142
+ "Pierre",
143
+ "Vinken",
144
+ ",",
145
+ "61",
146
+ "years",
147
+ "old",
148
+ ",",
149
+ "will",
150
+ "join",
151
+ "the",
152
+ "board",
153
+ "as",
154
+ "a",
155
+ "nonexecutive",
156
+ "director",
157
+ "Nov.",
158
+ "29",
159
+ ".",
160
+ ]
161
+ sent8 = [
162
+ "25",
163
+ "SEXY",
164
+ "MALE",
165
+ ",",
166
+ "seeks",
167
+ "attrac",
168
+ "older",
169
+ "single",
170
+ "lady",
171
+ ",",
172
+ "for",
173
+ "discreet",
174
+ "encounters",
175
+ ".",
176
+ ]
177
+ sent9 = [
178
+ "THE",
179
+ "suburb",
180
+ "of",
181
+ "Saffron",
182
+ "Park",
183
+ "lay",
184
+ "on",
185
+ "the",
186
+ "sunset",
187
+ "side",
188
+ "of",
189
+ "London",
190
+ ",",
191
+ "as",
192
+ "red",
193
+ "and",
194
+ "ragged",
195
+ "as",
196
+ "a",
197
+ "cloud",
198
+ "of",
199
+ "sunset",
200
+ ".",
201
+ ]
202
+
203
+
204
+ def sents():
205
+ print("sent1:", " ".join(sent1))
206
+ print("sent2:", " ".join(sent2))
207
+ print("sent3:", " ".join(sent3))
208
+ print("sent4:", " ".join(sent4))
209
+ print("sent5:", " ".join(sent5))
210
+ print("sent6:", " ".join(sent6))
211
+ print("sent7:", " ".join(sent7))
212
+ print("sent8:", " ".join(sent8))
213
+ print("sent9:", " ".join(sent9))
pipeline/nltk/ccg/__init__.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Natural Language Toolkit: Combinatory Categorial Grammar
2
+ #
3
+ # Copyright (C) 2001-2023 NLTK Project
4
+ # Author: Graeme Gange <[email protected]>
5
+ # URL: <https://www.nltk.org/>
6
+ # For license information, see LICENSE.TXT
7
+
8
+ """
9
+ Combinatory Categorial Grammar.
10
+
11
+ For more information see nltk/doc/contrib/ccg/ccg.pdf
12
+ """
13
+
14
+ from nltk.ccg.chart import CCGChart, CCGChartParser, CCGEdge, CCGLeafEdge
15
+ from nltk.ccg.combinator import (
16
+ BackwardApplication,
17
+ BackwardBx,
18
+ BackwardCombinator,
19
+ BackwardComposition,
20
+ BackwardSx,
21
+ BackwardT,
22
+ DirectedBinaryCombinator,
23
+ ForwardApplication,
24
+ ForwardCombinator,
25
+ ForwardComposition,
26
+ ForwardSubstitution,
27
+ ForwardT,
28
+ UndirectedBinaryCombinator,
29
+ UndirectedComposition,
30
+ UndirectedFunctionApplication,
31
+ UndirectedSubstitution,
32
+ UndirectedTypeRaise,
33
+ )
34
+ from nltk.ccg.lexicon import CCGLexicon
pipeline/nltk/ccg/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (963 Bytes). View file
 
pipeline/nltk/ccg/__pycache__/api.cpython-39.pyc ADDED
Binary file (11.9 kB). View file