Spaces:

sunnychenxiwang
/

EasyDetect

Sleeping

EasyDetect / pipeline /nltk /draw /dispersion.py

update nltk

d916065 almost 2 years ago

1.85 kB

	# Natural Language Toolkit: Dispersion Plots
	#
	# Copyright (C) 2001-2023 NLTK Project
	# Author: Steven Bird <[email protected]>
	# URL: <https://www.nltk.org/>
	# For license information, see LICENSE.TXT

	"""
	A utility for displaying lexical dispersion.
	"""


	def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
	"""
	Generate a lexical dispersion plot.

	:param text: The source text
	:type text: list(str) or iter(str)
	:param words: The target words
	:type words: list of str
	:param ignore_case: flag to set if case should be ignored when searching text
	:type ignore_case: bool
	:return: a matplotlib Axes object that may still be modified before plotting
	:rtype: Axes
	"""

	try:
	import matplotlib.pyplot as plt
	except ImportError as e:
	raise ImportError(
	"The plot function requires matplotlib to be installed. "
	"See https://matplotlib.org/"
	) from e

	word2y = {
	word.casefold() if ignore_case else word: y
	for y, word in enumerate(reversed(words))
	}
	xs, ys = [], []
	for x, token in enumerate(text):
	token = token.casefold() if ignore_case else token
	y = word2y.get(token)
	if y is not None:
	xs.append(x)
	ys.append(y)

	_, ax = plt.subplots()
	ax.plot(xs, ys, "\|")
	ax.set_yticks(list(range(len(words))), words, color="C0")
	ax.set_ylim(-1, len(words))
	ax.set_title(title)
	ax.set_xlabel("Word Offset")
	return ax


	if __name__ == "__main__":
	import matplotlib.pyplot as plt

	from nltk.corpus import gutenberg

	words = ["Elinor", "Marianne", "Edward", "Willoughby"]
	dispersion_plot(gutenberg.words("austen-sense.txt"), words)
	plt.show()