Upload 9 files
Browse files- nltk_data/corpora/cmudict.zip +3 -0
- nltk_data/corpora/cmudict/README +76 -0
- nltk_data/corpora/cmudict/cmudict +0 -0
- nltk_data/taggers/averaged_perceptron_tagger.zip +3 -0
- nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle +3 -0
- nltk_data/taggers/averaged_perceptron_tagger_eng.zip +3 -0
- nltk_data/taggers/averaged_perceptron_tagger_eng/averaged_perceptron_tagger_eng.classes.json +1 -0
- nltk_data/taggers/averaged_perceptron_tagger_eng/averaged_perceptron_tagger_eng.tagdict.json +1 -0
- nltk_data/taggers/averaged_perceptron_tagger_eng/averaged_perceptron_tagger_eng.weights.json +0 -0
nltk_data/corpora/cmudict.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d07cca47fd72ad32ea9d8ad1219f85301eeaf4568f8b6b73747506a71fb5afd6
|
3 |
+
size 896069
|
nltk_data/corpora/cmudict/README
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
The Carnegie Mellon Pronouncing Dictionary [cmudict.0.7a]
|
2 |
+
|
3 |
+
ftp://ftp.cs.cmu.edu/project/speech/dict/
|
4 |
+
https://cmusphinx.svn.sourceforge.net/svnroot/cmusphinx/trunk/cmudict/cmudict.0.7a
|
5 |
+
|
6 |
+
Copyright (C) 1993-2008 Carnegie Mellon University. All rights reserved.
|
7 |
+
|
8 |
+
File Format: Each line consists of an uppercased word,
|
9 |
+
a counter (for alternative pronunciations), and a transcription.
|
10 |
+
Vowels are marked for stress (1=primary, 2=secondary, 0=no stress).
|
11 |
+
E.g.: NATURAL 1 N AE1 CH ER0 AH0 L
|
12 |
+
|
13 |
+
The dictionary contains 127069 entries. Of these, 119400 words are assigned
|
14 |
+
a unique pronunciation, 6830 words have two pronunciations, and 839 words have
|
15 |
+
three or more pronunciations. Many of these are fast-speech variants.
|
16 |
+
|
17 |
+
Phonemes: There are 39 phonemes, as shown below:
|
18 |
+
|
19 |
+
Phoneme Example Translation Phoneme Example Translation
|
20 |
+
------- ------- ----------- ------- ------- -----------
|
21 |
+
AA odd AA D AE at AE T
|
22 |
+
AH hut HH AH T AO ought AO T
|
23 |
+
AW cow K AW AY hide HH AY D
|
24 |
+
B be B IY CH cheese CH IY Z
|
25 |
+
D dee D IY DH thee DH IY
|
26 |
+
EH Ed EH D ER hurt HH ER T
|
27 |
+
EY ate EY T F fee F IY
|
28 |
+
G green G R IY N HH he HH IY
|
29 |
+
IH it IH T IY eat IY T
|
30 |
+
JH gee JH IY K key K IY
|
31 |
+
L lee L IY M me M IY
|
32 |
+
N knee N IY NG ping P IH NG
|
33 |
+
OW oat OW T OY toy T OY
|
34 |
+
P pee P IY R read R IY D
|
35 |
+
S sea S IY SH she SH IY
|
36 |
+
T tea T IY TH theta TH EY T AH
|
37 |
+
UH hood HH UH D UW two T UW
|
38 |
+
V vee V IY W we W IY
|
39 |
+
Y yield Y IY L D Z zee Z IY
|
40 |
+
ZH seizure S IY ZH ER
|
41 |
+
|
42 |
+
(For NLTK, entries have been sorted so that, e.g. FIRE 1 and FIRE 2
|
43 |
+
are contiguous, and not separated by FIRE'S 1.)
|
44 |
+
|
45 |
+
Redistribution and use in source and binary forms, with or without
|
46 |
+
modification, are permitted provided that the following conditions
|
47 |
+
are met:
|
48 |
+
|
49 |
+
1. Redistributions of source code must retain the above copyright
|
50 |
+
notice, this list of conditions and the following disclaimer.
|
51 |
+
The contents of this file are deemed to be source code.
|
52 |
+
|
53 |
+
2. Redistributions in binary form must reproduce the above copyright
|
54 |
+
notice, this list of conditions and the following disclaimer in
|
55 |
+
the documentation and/or other materials provided with the
|
56 |
+
distribution.
|
57 |
+
|
58 |
+
This work was supported in part by funding from the Defense Advanced
|
59 |
+
Research Projects Agency, the Office of Naval Research and the National
|
60 |
+
Science Foundation of the United States of America, and by member
|
61 |
+
companies of the Carnegie Mellon Sphinx Speech Consortium. We acknowledge
|
62 |
+
the contributions of many volunteers to the expansion and improvement of
|
63 |
+
this dictionary.
|
64 |
+
|
65 |
+
THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
|
66 |
+
ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
67 |
+
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
68 |
+
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
|
69 |
+
NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
70 |
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
71 |
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
72 |
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
73 |
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
74 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
75 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
76 |
+
|
nltk_data/corpora/cmudict/cmudict
ADDED
The diff for this file is too large to render.
See raw diff
|
|
nltk_data/taggers/averaged_perceptron_tagger.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1f13cf2532daadfd6f3bc481a49859f0b8ea6432ccdcd83e6a49a5f19008de9
|
3 |
+
size 2526731
|
nltk_data/taggers/averaged_perceptron_tagger/averaged_perceptron_tagger.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25a5a19c7ced7b2bac3831da5bc0afcc2c34e5dd01cd4f361bb799949a696238
|
3 |
+
size 6138625
|
nltk_data/taggers/averaged_perceptron_tagger_eng.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6025f530624335c67d6547d44757b357b4e79bae030a0383e9887a92c1718f0b
|
3 |
+
size 1539115
|
nltk_data/taggers/averaged_perceptron_tagger_eng/averaged_perceptron_tagger_eng.classes.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[".", "(", ")", ":", "''", "EX", "JJS", "WRB", "VBG", "VBP", "NN", "SYM", "VB", "UH", "NNPS", "NNP", "``", "$", "NNS", "JJR", "MD", "RP", "VBD", "DT", "POS", "RBR", ",", "VBZ", "PDT", "VBN", "WP$", "WDT", "WP", "PRP$", "CD", "IN", "#", "CC", "RB", "FW", "RBS", "PRP", "LS", "JJ", "TO"]
|
nltk_data/taggers/averaged_perceptron_tagger_eng/averaged_perceptron_tagger_eng.tagdict.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"four": "CD", "facilities": "NNS", "controversial": "JJ", "Until": "IN", "whose": "WP$", "under": "IN", "pact": "NN", "regional": "JJ", "GE": "NNP", "every": "DT", "GM": "NNP", "Moon": "NNP", "school": "NN", "companies": "NNS", "Nasdaq": "NNP", "Paul": "NNP", "Pinkerton": "NNP", "leaders": "NNS", "guidelines": "NNS", "Sales": "NNS", "machines": "NNS", "pace": "NN", "spokesman": "NN", "new": "JJ", "ever": "RB", "men": "NNS", "here": "RB", "protection": "NN", "studio": "NN", "active": "JJ", "100": "CD", "ventures": "NNS", "items": "NNS", "employees": "NNS", "credit": "NN", "analysts": "NNS", "criticism": "NN", "golden": "JJ", "Group": "NNP", "campaign": "NN", "St.": "NNP", "replace": "VB", "Also": "RB", "Health": "NNP", "costly": "JJ", "unit": "NN", "swings": "NNS", "would": "MD", "century": "NN", "June": "NNP", "music": "NN", "asset": "NN", "N.J.": "NNP", "until": "IN", "PaineWebber": "NNP", "Breeden": "NNP", "Ministry": "NNP", "successful": "JJ", "phone": "NN", "90": "CD", "circumstances": "NNS", "me": "PRP", "1990": "CD", "1993": "CD", "1992": "CD", "word": "NN", "1994": "CD", "rights": "NNS", "movies": "NNS", "already": "RB", "my": "PRP$", "example": "NN", "estate": "NN", "psyllium": "NN", "Hurricane": "NNP", "10,000": "CD", "Digital": "NNP", "totaled": "VBD", "recovery": "NN", "Journal": "NNP", "thousands": "NNS", "machine": "NN", "how": "WRB", "Jack": "NNP", "interview": "NN", "resignation": "NN", "minority": "NN", "L.": "NNP", "after": "IN", "modest": "JJ", "president": "NN", "law": "NN", "effective": "JJ", "Maxwell": "NNP", "Telerate": "NNP", "Another": "DT", "Trust": "NNP", "order": "NN", "operations": "NNS", "office": "NN", "expects": "VBZ", "presence": "NN", "His": "PRP$", "personal": "JJ", "expectations": "NNS", "Here": "RB", "production": "NN", "400": "CD", "Judge": "NNP", "weeks": "NNS", "Spain": "NNP", "eventually": "RB", "them": "PRP", "weakness": "NN", "Thomas": "NNP", "effects": "NNS", "they": "PRP", "schools": "NNS", "bank": "NN", "represents": "VBZ", "Indeed": "RB", "each": "DT", "went": "VBD", "bond": "NN", "financial": "JJ", "fairly": "RB", "series": "NN", "substantially": "RB", "lawyers": "NNS", "by": "IN", "network": "NN", "Chancellor": "NNP", "William": "NNP", "Icahn": "NNP", "size": "NN", "University": "NNP", "N.Y.": "NNP", "enormous": "JJ", "Monday": "NNP", "September": "NNP", "National": "NNP", "days": "NNS", "appeals": "NNS", "economists": "NNS", "another": "DT", "electronic": "JJ", "Congress": "NNP", "lawsuits": "NNS", "rates": "NNS", "too": "RB", "percentage": "NN", "ceiling": "NN", "took": "VBD", "budget": "NN", "acquisition": "NN", "fashion": "NN", "Chicago": "NNP", "Cray": "NNP", "talking": "VBG", "seed": "NN", "Instead": "RB", "dozen": "NN", "Then": "RB", "strength": "NN", "responsible": "JJ", "-": ":", "practices": "NNS", "Minister": "NNP", "They": "PRP", "Bank": "NNP", "unsecured": "JJ", "Jones": "NNP", "shall": "MD", "involving": "VBG", "letter": "NN", "Mobil": "NNP", "medical": "JJ", "competitors": "NNS", "consumer": "NN", "Its": "PRP$", "came": "VBD", "Union": "NNP", "meetings": "NNS", "ending": "VBG", "specialists": "NNS", "judges": "NNS", "Mixte": "NNP", "representing": "VBG", "exports": "NNS", "wide": "JJ", "13": "CD", "certificates": "NNS", "despite": "IN", "volatility": "NN", "countries": "NNS", "high-yield": "JJ", "Washington": "NNP", "bad": "JJ", "Qintex": "NNP", "movement": "NN", "secretary": "NN", "Gorbachev": "NNP", "discussions": "NNS", "John": "NNP", "said": "VBD", "capacity": "NN", "wage": "NN", "we": "PRP", "never": "RB", "terms": "NNS", "wo": "MD", "were": "VBD", "weak": "JJ", "however": "RB", "news": "NN", "debt": "NN", "Among": "IN", "country": "NN", "uncertainty": "NN", "against": "IN", "Thomson": "NNP", "players": "NNS", "Computer": "NNP", "games": "NNS", "faces": "VBZ", "tough": "JJ", "tons": "NNS", "Board": "NNP", "250": "CD", "line-item": "JJ", "conference": "NN", "C.": "NNP", "basis": "NN", "union": "NN", "three": "CD", "been": "VBN", "C$": "$", "commission": "NN", "beer": "NN", "interest": "NN", "life": "NN", "families": "NNS", "Conn.": "NNP", "Tokyo": "NNP", "drugs": "NNS", "Poland": "NNP", "Secretary": "NNP", "Co": "NNP", "publicly": "RB", "property": "NN", "Tuesday": "NNP", "seven": "CD", "On": "IN", "is": "VBZ", "it": "PRP", "expenses": "NNS", "player": "NN", "Bush": "NNP", "experts": "NNS", "in": "IN", "victims": "NNS", "if": "IN", "things": "NNS", "damages": "NNS", "big": "JJ", "President": "NNP", "several": "JJ", "independent": "JJ", "Institute": "NNP", "hand": "NN", "Angeles": "NNP", "Morris": "NNP", "ownership": "NN", "opportunity": "NN", "cycle": "NN", "RJR": "NNP", "programs": "NNS", "client": "NN", "the": "DT", "corporate": "JJ", "investments": "NNS", "agency": "NN", "just": "RB", "unemployment": "NN", "previous": "JJ", "adding": "VBG", "buyers": "NNS", "board": "NN", "Philip": "NNP", "has": "VBZ", "gave": "VBD", "Santa": "NNP", "James": "NNP", "possible": "JJ", "Chrysler": "NNP", "30": "CD", "highly": "RB", "55": "CD", "51": "CD", "50": "CD", "securities": "NNS", "offices": "NNS", "officer": "NN", "night": "NN", "security": "NN", "Pentagon": "NNP", "attorney": "NN", "old": "JJ", "people": "NNS", "Commission": "NNP", "election": "NN", "short-term": "JJ", "Lee": "NNP", "for": "IN", "comments": "NNS", "everything": "NN", "He": "PRP", "corn": "NN", "conventional": "JJ", "Georgia-Pacific": "NNP", "brokerage": "NN", "properties": "NNS", "dollars": "NNS", "months": "NNS", "magazine": "NN", "ensure": "VB", "afternoon": "NN", "efforts": "NNS", "Still": "RB", "slightly": "RB", "Fed": "NNP", "statements": "NNS", "facility": "NN", "civil": "JJ", "magazines": "NNS", "defendants": "NNS", "initial": "JJ", "legislation": "NN", "why": "WRB", "editor": "NN", "way": "NN", "NBC": "NNP", "was": "VBD", "war": "NN", "manufacturers": "NNS", "January": "NNP", "becoming": "VBG", "true": "JJ", "analyst": "NN", "counsel": "NN", "devices": "NNS", "County": "NNP", "Greenspan": "NNP", ".": ".", "Sir": "NNP", "evidence": "NN", "''": "''", "trip": "NN", "negotiations": "NNS", "LTV": "NNP", "Francisco": "NNP", "floor": "NN", "stake": "NN", "generally": "RB", "role": "NN", "models": "NNS", "Hunt": "NNP", "fell": "VBD", "authorities": "NNS", "'m": "VBP", "Mass.": "NNP", "weekend": "NN", "billion": "CD", "reorganization": "NN", "Estate": "NNP", "Charles": "NNP", "time": "NN", "serious": "JJ", "Moscow": "NNP", "profits": "NNS", "chain": "NN", "global": "JJ", "alternatives": "NNS", "manager": "NN", "battle": "NN", "certainly": "RB", "Sept.": "NNP", "Columbia": "NNP", "environment": "NN", "finally": "RB", "must": "MD", "1991": "CD", "choice": "NN", "liability": "NN", "trouble": "NN", "Jersey": "NNP", "room": "NN", "did": "VBD", "proposals": "NNS", "standards": "NNS", "speculation": "NN", "George": "NNP", "Rey": "NNP", "says": "VBZ", "trend": "NN", "M.": "NNP", "adds": "VBZ", "shares": "NNS", "Ford": "NNP", "current": "JJ", "goes": "VBZ", "international": "JJ", "falling": "VBG", "Nov.": "NNP", "transportation": "NN", "genes": "NNS", "water": "NN", "baseball": "NN", "groups": "NNS", "Ltd": "NNP", "appears": "VBZ", "Warner": "NNP", "healthy": "JJ", "guilty": "JJ", "trial": "NN", "usually": "RB", "Inc": "NNP", "studies": "NNS", "When": "WRB", "crisis": "NN", "market": "NN", "Australia": "NNP", "August": "NNP", "positive": "JJ", "sports": "NNS", "francs": "NNS", "today": "NN", "``": "``", "October": "NNP", "These": "DT", "downturn": "NN", "cases": "NNS", "effort": "NN", "currency": "NN", "car": "NN", "abortion": "NN", "Pacific": "NNP", "believes": "VBZ", "districts": "NNS", "can": "MD", "Our": "PRP$", "heart": "NN", "subsidies": "NNS", "1.2": "CD", "requirements": "NNS", "Akzo": "NNP", "1": "CD", "fourth": "JJ", "H.": "NNP", "Why": "WRB", "economy": "NN", "product": "NN", "information": "NN", "may": "MD", "membership": "NN", "date": "NN", "man": "NN", "natural": "JJ", "commodity": "NN", "futures": "NNS", "truck": "NN", "exclusive": "JJ", "indeed": "RB", "LIN": "NNP", "Hong": "NNP", "years": "NNS", "brain": "NN", "managers": "NNS", "White": "NNP", "still": "RB", "group": "NN", "Lehman": "NNP", "policy": "NN", "main": "JJ", "nation": "NN", "She": "PRP", "not": "RB", "R.": "NNP", "now": "RB", "provision": "NN", "nor": "CC", "term": "NN", "attorneys": "NNS", "Stanley": "NNP", "quarter": "NN", "significantly": "RB", "begun": "VBN", "year": "NN", "Kong": "NNP", "shown": "VBN", "space": "NN", "looking": "VBG", "investigation": "NN", "Bloomingdale": "NNP", "Commerce": "NNP", "cars": "NNS", "million": "CD", "possibility": "NN", "language": "NN", "7\\/8": "CD", "thing": "NN", "revenue": "NN", "There": "EX", "directly": "RB", "corporations": "NNS", "Hollywood": "NNP", "tomorrow": "NN", "millions": "NNS", "city": "NN", "given": "VBN", "district": "NN", "trillion": "CD", "Dow": "NNP", "anyone": "NN", "2": "CD", "SEC": "NNP", "white": "JJ", "gives": "VBZ", "a": "DT", "mostly": "RB", "season": "NN", "probably": "RB", "surged": "VBD", "than": "IN", "Inc.": "NNP", "11": "CD", "10": "CD", "television": "NN", "12": "CD", "15": "CD", "14": "CD", "17": "CD", "16": "CD", "19": "CD", "18": "CD", "spokeswoman": "NN", "officials": "NNS", "venture": "NN", "amid": "IN", "and": "CC", "Court": "NNP", "investors": "NNS", "Marcos": "NNP", "Philadelphia": "NNP", "sells": "VBZ", "any": "DT", "equipment": "NN", "intends": "VBZ", "performance": "NN", "Du": "NNP", "200": "CD", "normal": "JJ", "price": "NN", "remarks": "NNS", "D.": "NNP", "especially": "RB", "sale": "NN", "ways": "NNS", "senior": "JJ", "typically": "RB", "laws": "NNS", "rating": "NN", "commitments": "NNS", "aggressive": "JJ", "We": "PRP", "written": "VBN", "crime": "NN", "going": "VBG", "black": "JJ", "congressional": "JJ", "contracts": "NNS", "nearly": "RB", "morning": "NN", "miles": "NNS", "where": "WRB", "college": "NN", "Grand": "NNP", "concern": "NN", "mortgage": "NN", "farmers": "NNS", "federal": "JJ", "representatives": "NNS", "materials": "NNS", "weapons": "NNS", "between": "IN", "Mitsubishi": "NNP", "jobs": "NNS", "Johnson": "NNP", "U.S.": "NNP", "26": "CD", "Each": "DT", "article": "NN", "cities": "NNS", "acquiring": "VBG", "many": "JJ", "region": "NN", "according": "VBG", "contract": "NN", "holders": "NNS", "comes": "VBZ", "among": "IN", "cancer": "NN", "150": "CD", "period": "NN", ",": ",", "60": "CD", "considering": "VBG", "unusual": "JJ", "Calif.": "NNP", "Electric": "NNP", "But": "CC", "Lynch": "NNP", "500": "CD", "engine": "NN", "direction": "NN", "Analysts": "NNS", "former": "JJ", "those": "DT", "paying": "VBG", "To": "TO", "these": "DT", "consultant": "NN", "Reagan": "NNP", "cash": "NN", "n't": "RB", "policies": "NNS", "newspaper": "NN", "situation": "NN", "trader": "NN", "then": "RB", "metric": "JJ", "telephone": "NN", "Peters": "NNP", "technology": "NN", "Israel": "NNP", "media": "NNS", "same": "JJ", "events": "NNS", "status": "NN", "oil": "NN", "I": "PRP", "IRS": "NNP", "Toyota": "NNP", "Coors": "NNP", "director": "NN", "largely": "RB", "constitutional": "JJ", "roughly": "RB", "mortgages": "NNS", "Rep.": "NNP", "without": "IN", "In": "IN", "researchers": "NNS", "If": "IN", "summer": "NN", "United": "NNP", "Service": "NNP", "being": "VBG", "money": "NN", "actions": "NNS", "Daniel": "NNP", "announcement": "NN", "death": "NN", "rose": "VBD", "seems": "VBZ", "improvement": "NN", "4": "CD", "Although": "IN", "pill": "NN", "real": "JJ", "rules": "NNS", "Sachs": "NNP", "Ortega": "NNP", "inflation": "NN", "traffic": "NN", "using": "VBG", "'ve": "VBP", "annually": "RB", "audience": "NN", "London": "NNP", "retailers": "NNS", "fully": "RB", "Moreover": "RB", "Since": "IN", "competition": "NN", "Dr.": "NNP", "New": "NNP", "gross": "JJ", "legal": "JJ", "conservative": "JJ", "critical": "JJ", "deficit": "NN", "provides": "VBZ", "football": "NN", "scientific": "JJ", "power": "NN", "leadership": "NN", "manufacturer": "NN", "on": "IN", "central": "JJ", "S.A.": "NNP", "of": "IN", "industry": "NN", "Trade": "NNP", "airline": "NN", "or": "CC", "road": "NN", "outlook": "NN", "coupon": "NN", "instruments": "NNS", "image": "NN", "parties": "NNS", "your": "PRP$", "area": "NN", "Engelken": "NNP", "Bartlett": "NNP", "trying": "VBG", "with": "IN", "Guber": "NNP", "volume": "NN", "fraud": "NN", "House": "NNP", "pulp": "NN", "gone": "VBN", "ad": "NN", "certain": "JJ", "am": "VBP", "sales": "NNS", "Thursday": "NNP", "an": "DT", "at": "IN", "film": "NN", "USX": "NNP", "4.5": "CD", "again": "RB", "event": "NN", "field": "NN", "5": "CD", "you": "PRP", "Ross": "NNP", "Las": "NNP", "poor": "JJ", "Jaguar": "NNP", "students": "NNS", "includes": "VBZ", "important": "JJ", "coverage": "NN", "stocks": "NNS", "US$": "$", "assets": "NNS", "wife": "NN", "directors": "NNS", "Street": "NNP", "minister": "NN", "Canada": "NNP", "founder": "NN", "dollar": "NN", "5\\/8": "CD", "month": "NN", "settlement": "NN", "decisions": "NNS", "children": "NNS", "Brown": "NNP", "to": "TO", "program": "NN", "health": "NN", "lawmakers": "NNS", "activities": "NNS", "woman": "NN", "far": "RB", "difference": "NN", "`": "``", "cable": "NN", "--": ":", "large": "JJ", "small": "JJ", "rate": "NN", "lawyer": "NN", "investment": "NN", "HUD": "NNP", "Korea": "NNP", "consumers": "NNS", "Paribas": "NNP", "version": "NN", "scientists": "NNS", "Ogilvy": "NNP", "Bethlehem": "NNP", "full": "JJ", "hours": "NNS", "strong": "JJ", "thrift": "NN", "prosecutors": "NNS", "ahead": "RB", "houses": "NNS", "losses": "NNS", "social": "JJ", "action": "NN", "options": "NNS", "via": "IN", "family": "NN", "S.": "NNP", "establish": "VB", "Europe": "NNP", "shareholders": "NNS", "Dinkins": "NNP", "eye": "NN", "takes": "VBZ", "11\\/16": "CD", "Hewlett-Packard": "NNP", "two": "CD", "Corp": "NNP", "6": "CD", "taken": "VBN", "markets": "NNS", "Manville": "NNP", "Intel": "NNP", "division": "NN", "company": "NN", "producing": "VBG", "town": "NN", "keeping": "VBG", "hour": "NN", "nine": "CD", "history": "NN", "purchases": "NNS", "IBM": "NNP", "adviser": "NN", "share": "NN", "numbers": "NNS", "Thompson": "NNP", "sharp": "JJ", "!": ".", "huge": "JJ", "court": "NN", "goal": "NN", "rather": "RB", "Carpenter": "NNP", "earnings": "NNS", "plant": "NN", "different": "JJ", "response": "NN", "acquisitions": "NNS", "Mexico": "NNP", ")": ")", "banks": "NNS", "What": "WP", "soon": "RB", "paper": "NN", "committee": "NN", "signs": "NNS", "its": "PRP$", "Texas": "NNP", "24": "CD", "25": "CD", "style": "NN", "27": "CD", "20": "CD", "21": "CD", "22": "CD", "23": "CD", "28": "CD", "29": "CD", "actually": "RB", "systems": "NNS", "governments": "NNS", "might": "MD", "Moody": "NNP", "someone": "NN", "seeking": "VBG", "food": "NN", "Michael": "NNP", "bigger": "JJR", "easily": "RB", "always": "RB", "week": "NN", "everyone": "NN", "generation": "NN", "house": "NN", "energy": "NN", "reduce": "VB", "idea": "NN", "slowdown": "NN", "Joseph": "NNP", "advertisers": "NNS", "operation": "NN", "beyond": "IN", "insurance": "NN", "really": "RB", "E.": "NNP", "since": "IN", "temporary": "JJ", "research": "NN", "safety": "NN", "7": "CD", "According": "VBG", "EC": "NNP", "extraordinary": "JJ", "reason": "NN", "members": "NNS", "producers": "NNS", "owners": "NNS", "benefits": "NNS", "Boston": "NNP", "computers": "NNS", "threat": "NN", "pilots": "NNS", "major": "JJ", "Hugo": "NNP", "number": "NN", "feet": "NNS", "done": "VBN", "fees": "NNS", "story": "NN", "statement": "NN", "option": "NN", "relationship": "NN", "part": "NN", "kind": "NN", "grew": "VBD", "toward": "IN", "outstanding": "JJ", "Douglas": "NNP", "It": "PRP", "substantial": "JJ", "orders": "NNS", "ratings": "NNS", "majority": "NN", "internal": "JJ", "Drexel": "NNP", "chairman": "NN", "With": "IN", "75": "CD", "shareholder": "NN", "significant": "JJ", "70": "CD", "services": "NNS", "The": "DT", "extremely": "RB", "dealers": "NNS", "OTC": "NNP", "traditional": "JJ", "three-month": "JJ", "institutions": "NNS", "sector": "NN", "particularly": "RB", "session": "NN", "businesses": "NNS", "Poor": "NNP", "regulations": "NNS", "merger": "NN", "equity": "NN", "8": "CD", "Prime": "NNP", "his": "PRP$", "gains": "NNS", "While": "IN", "5,000": "CD", "closely": "RB", "During": "IN", "during": "IN", "him": "PRP", "merchandise": "NN", "six-month": "JJ", "J.": "NNP", "common": "JJ", "activity": "NN", "wrote": "VBD", "Chairman": "NNP", "For": "IN", "France": "NNP", "culture": "NN", "defense": "NN", "are": "VBP", "jury": "NN", "2.5": "CD", "#": "#", "movie": "NN", "currently": "RB", "case": "NN", "various": "JJ", "Sony": "NNP", "conditions": "NNS", "available": "JJ", "recently": "RB", "creating": "VBG", "dividends": "NNS", "attention": "NN", "Florida": "NNP", "succeed": "VB", "opposition": "NN", "dividend": "NN", "last": "JJ", "ANC": "NNP", "annual": "JJ", "foreign": "JJ", "connection": "NN", "became": "VBD", "long-term": "JJ", "Compaq": "NNP", "reasons": "NNS", "loan": "NN", "community": "NN", "simply": "RB", "throughout": "IN", "political": "JJ", "earthquake": "NN", "whom": "WP", "reduction": "NN", "California": "NNP", "treatment": "NN", "partly": "RB", "gas": "NN", "priced": "VBN", "brokers": "NNS", "prices": "NNS", "plants": "NNS", "bill": "NN", "elections": "NNS", "33": "CD", "31": "CD", "City": "NNP", "pound": "NN", "Italy": "NNP", "voters": "NNS", "cents": "NNS", "itself": "PRP", "seen": "VBN", "Co.": "NNP", "underwriters": "NNS", "virtually": "RB", "widely": "RB", "grand": "JJ", "9": "CD", "products": "NNS", "relatively": "RB", "development": "NN", "currencies": "NNS", "Allianz": "NNP", "affairs": "NNS", "yesterday": "NN", "moment": "NN", "levels": "NNS", "{": "(", "recent": "JJ", "Miller": "NNP", "person": "NN", "organization": "NN", "one-year": "JJ", "competitive": "JJ", "Boren": "NNP", "questions": "NNS", "world": "NN", "profitable": "JJ", "retirement": "NN", "$": "$", "over-the-counter": "JJ", "workers": "NNS", "source": "NN", "Germany": "NNP", "...": ":", "customers": "NNS", "Last": "JJ", "emergency": "NN", "Of": "IN", "Air": "NNP", "game": "NN", "necessary": "JJ", "projects": "NNS", "follows": "VBZ", "individuals": "NNS", "popular": "JJ", "often": "RB", "Gulf": "NNP", "some": "DT", "3\\/4": "CD", "economic": "JJ", "3\\/8": "CD", "Frank": "NNP", "decision": "NN", "transactions": "NNS", "quickly": "RB", "Massachusetts": "NNP", "be": "VB", "Brady": "NNP", "300": "CD", "agreement": "NN", "David": "NNP", "output": "NN", "abroad": "RB", "pipeline": "NN", "goods": "NNS", "anything": "NN", "Pont": "NNP", "Roy": "NNP", "ounce": "NN", "Committee": "NNP", "into": "IN", "within": "IN", "NEC": "NNP", "nothing": "NN", "primarily": "RB", "Quebecor": "NNP", "bankruptcy": "NN", ":": ":", "himself": "PRP", "vehicle": "NN", "Ms.": "NNP", "Ltd.": "NNP", "Switzerland": "NNP", "subsidiary": "NN", "line": "NN", "Bell": "NNP", "Africa": "NNP", "us": "PRP", "Thatcher": "NNP", "maturity": "NN", "'re": "VBP", "exploration": "NN", "Those": "DT", "similar": "JJ", "Perhaps": "RB", "Hampshire": "NNP", "Westinghouse": "NNP", "single": "JJ", "Edward": "NNP", "International": "NNP", "Manhattan": "NNP", "%": "NN", "May": "NNP", "politicians": "NNS", "Mae": "NNP", "income": "NN", "department": "NN", "AG": "NNP", "problems": "NNS", "helping": "VBG", "allowing": "VBG", "reinsurance": "NN", "sides": "NNS", "structure": "NN", "vice": "NN", "age": "NN", "vehicles": "NNS", "bankers": "NNS", "An": "DT", "At": "IN", "requires": "VBZ", "having": "VBG", "results": "NNS", "Department": "NNP", "issues": "NNS", "young": "JJ", "suits": "NNS", "citing": "VBG", "UAL": "NNP", "Not": "RB", "Now": "RB", "resources": "NNS", "P&G": "NNP", "automotive": "JJ", "continues": "VBZ", "Mrs.": "NNP", "putting": "VBG", "entire": "JJ", "positions": "NNS", "race": "NN", "smaller": "JJR", "crop": "NN", "Hutton": "NNP", "makers": "NNS", "index": "NN", "business": "NN", "giving": "VBG", "Alan": "NNP", "access": "NN", "volatile": "JJ", "firms": "NNS", "America": "NNP", "pushing": "VBG", "jointly": "RB", "others": "NNS", "great": "JJ", "38": "CD", "technical": "JJ", "Energy": "NNP", "larger": "JJR", "37": "CD", "35": "CD", "CBS": "NNP", "survey": "NN", "Motor": "NNP", "opinion": "NN", "residents": "NNS", "gene": "NN", "makes": "VBZ", "maker": "NN", "apple": "NN", "Robert": "NNP", "private": "JJ", "privately": "RB", "scandal": "NN", "from": "IN", "&": "CC", "few": "JJ", "Fe": "NNP", "year-ago": "JJ", "themselves": "PRP", "chip": "NN", "reflects": "VBZ", "Wednesday": "NNP", "sharply": "RB", "women": "NNS", "customer": "NN", "this": "DT", "clients": "NNS", "recession": "NN", "industrial": "JJ", "F.": "NNP", "Northern": "NNP", "tax": "NN", "Mr.": "NNP", "reserves": "NNS", "something": "NN", "Party": "NNP", "BellSouth": "NNP", "holds": "VBZ", "traders": "NNS", "instead": "RB", "stock": "NN", "ABC": "NNP", "Nissan": "NNP", "Terms": "NNS", "engineering": "NN", "lines": "NNS", "Community": "NNP", "Oct.": "NNP", "software": "NN", "six": "CD", "producer": "NN", "institutional": "JJ", "Smith": "NNP", "including": "VBG", "year-earlier": "JJ", "industries": "NNS", "Exchange": "NNP", "Brooks": "NNP", "labor": "NN", "willing": "JJ", "greater": "JJR", "auto": "NN", "practice": "NN", "investor": "NN", "day": "NN", "Supreme": "NNP", "San": "NNP", "bills": "NNS", "Corry": "NNP", "doing": "VBG", "books": "NNS", "Treasury": "NNP", "our": "PRP$", "80": "CD", "Unisys": "NNP", "entertainment": "NN", "critics": "NNS", "China": "NNP", "disclose": "VB", "This": "DT", "regulatory": "JJ", "could": "MD", "Lawson": "NNP", "succeeds": "VBZ", "powerful": "JJ", "strategic": "JJ", "owner": "NN", "management": "NN", "system": "NN", "relations": "NNS", "Coast": "NNP", "their": "PRP$", "Pilson": "NNP", "final": "JJ", "Association": "NNP", "interests": "NNS", "acquire": "VB", "environmental": "JJ", "chemicals": "NNS", "reflecting": "VBG", "steel": "NN", "colleagues": "NNS", "patients": "NNS", "Peter": "NNP", "creditors": "NNS", "1.4": "CD", "1.5": "CD", "1.6": "CD", "1.1": "CD", "Richard": "NNP", "1.3": "CD", "unchanged": "JJ", "partnership": "NN", "Other": "JJ", ";": ":", "apparently": "RB", "clearly": "RB", "Development": "NNP", "documents": "NNS", "Goldman": "NNP", "After": "IN", "able": "JJ", "instance": "NN", "which": "WDT", "unless": "IN", "who": "WP", "eight": "CD", "segment": "NN", "payment": "NN", "Reserve": "NNP", "so-called": "JJ", "Some": "DT", "MCA": "NNP", "1,000": "CD", "}": ")", "Saturday": "NNP", "fact": "NN", "Paris": "NNP", "Sansui": "NNP", "Chemical": "NNP", "Under": "IN", "portfolio": "NN", "economist": "NN", "decade": "NN", "staff": "NN", "partners": "NNS", "based": "VBN", "Meanwhile": "RB", "(": "(", "should": "MD", "candidates": "NNS", "York": "NNP", "employee": "NN", "local": "JJ", "bonds": "NNS", "familiar": "JJ", "120": "CD", "ones": "NNS", "words": "NNS", "exchanges": "NNS", "buyer": "NN", "chips": "NNS", "areas": "NNS", "Because": "IN", "trucks": "NNS", "course": "NN", "taxes": "NNS", "calling": "VBG", "Wall": "NNP", "she": "PRP", "Burnham": "NNP", "temporarily": "RB", "national": "JJ", "computer": "NN", "nuclear": "JJ", "state": "NN", "July": "NNP", "Sen.": "NNP", "ability": "NN", "agencies": "NNS", "job": "NN", "takeover": "NN", "approval": "NN", "problem": "NN", "declining": "VBG", "restrictions": "NNS", "drug": "NN", "1\\/2": "CD", "1\\/4": "CD", "1\\/8": "CD", "ca": "MD", "Los": "NNP", "addition": "NN", "genetic": "JJ", "agreements": "NNS", "proposal": "NN", "Toronto": "NNP", "Center": "NNP", "Navigation": "NNP", "And": "CC", "homes": "NNS", "unlike": "IN", "value": "NN", "will": "MD", "PLC": "NNP", "Delmed": "NNP", "owns": "VBZ", "almost": "RB", "thus": "RB", "site": "NN", "partner": "NN", "You": "PRP", "perhaps": "RB", "began": "VBD", "administration": "NN", "Bear": "NNP", "member": "NN", "when": "WRB", "parts": "NNS", "largest": "JJS", "units": "NNS", "party": "NN", "gets": "VBZ", "difficult": "JJ", "effect": "NN", "Mitchell": "NNP", "Houston": "NNP", "transaction": "NN", "Senate": "NNP", "wants": "VBZ", "350": "CD", "position": "NN", "Shearson": "NNP", "latest": "JJS", "stores": "NNS", "heavily": "RB", "increasingly": "RB", "domestic": "JJ", "obtain": "VB", "sources": "NNS", "Sunday": "NNP", "rooms": "NNS", "ads": "NNS", "Friday": "NNP", "book": "NN", "cosmetics": "NNS", "Despite": "IN", "By": "IN", "provisions": "NNS", "government": "NN", "five": "CD", "immediately": "RB", "loss": "NN", "England": "NNP", "Aug.": "NNP", "success": "NN", "B.": "NNP", "payments": "NNS", "amendment": "NN", "arbitrage": "NN", "Sun": "NNP", "Kidder": "NNP", "February": "NNP", "growth": "NN", "employment": "NN", "Singapore": "NNP", "broad": "JJ", "However": "RB", "does": "VBZ", "leader": "NN", "?": ".", "Baker": "NNP", "Rothschild": "NNP", "monetary": "JJ", "expansion": "NN", "Fujitsu": "NNP", "although": "IN", "loans": "NNS", "panel": "NN", "actual": "JJ", "debentures": "NNS", "December": "NNP", "Peabody": "NNP", "holdings": "NNS", "carries": "VBZ", "carrier": "NN", "Japan": "NNP", "executives": "NNS", "letters": "NNS", "previously": "RB", "warrants": "NNS", "Two": "CD", "getting": "VBG", "strategy": "NN", "utility": "NN", "1986": "CD", "1987": "CD", "1984": "CD", "1985": "CD", "1982": "CD", "additional": "JJ", "1980": "CD", "Lawrence": "NNP", "housing": "NN", "1988": "CD", "1989": "CD", "biggest": "JJS", "November": "NNP", "funds": "NNS", "brand": "NN", "but": "CC", "delivery": "NN", "construction": "NN", "highest": "JJS", "he": "PRP", "also": "RB", "Industrial": "NNP", "whether": "IN", "cells": "NNS", "Britain": "NNP", "distribution": "NN", "minutes": "NNS", "flight": "NN", "margins": "NNS", "mutual": "JJ", "compared": "VBN", "'ll": "MD", "48": "CD", "49": "CD", "46": "CD", "Jr.": "NNP", "44": "CD", "45": "CD", "42": "CD", "Yesterday": "NN", "40": "CD", "Volume": "NN", "other": "JJ", "details": "NNS", "Corp.": "NNP", "junk": "NN", "Like": "IN", "class": "NN", "March": "NNP", "April": "NNP", "chance": "NN", "Morgan": "NNP", "Act": "NNP", "factors": "NNS", "portion": "NN", "pension": "NN"}
|
nltk_data/taggers/averaged_perceptron_tagger_eng/averaged_perceptron_tagger_eng.weights.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|