Spaces:

sunnychenxiwang
/

EasyDetect

Sleeping

EasyDetect / pipeline /nltk /app /wordfreq_app.py

update nltk

d916065 12 months ago

957 Bytes

	# Natural Language Toolkit: Wordfreq Application
	#
	# Copyright (C) 2001-2023 NLTK Project
	# Author: Sumukh Ghodke <[email protected]>
	# URL: <https://www.nltk.org/>
	# For license information, see LICENSE.TXT

	from matplotlib import pylab

	from nltk.corpus import gutenberg
	from nltk.text import Text


	def plot_word_freq_dist(text):
	fd = text.vocab()

	samples = [item for item, _ in fd.most_common(50)]
	values = [fd[sample] for sample in samples]
	values = [sum(values[: i + 1]) * 100.0 / fd.N() for i in range(len(values))]
	pylab.title(text.name)
	pylab.xlabel("Samples")
	pylab.ylabel("Cumulative Percentage")
	pylab.plot(values)
	pylab.xticks(range(len(samples)), [str(s) for s in samples], rotation=90)
	pylab.show()


	def app():
	t1 = Text(gutenberg.words("melville-moby_dick.txt"))
	plot_word_freq_dist(t1)


	if __name__ == "__main__":
	app()

	__all__ = ["app"]