diyclassics
commited on
Commit
•
d9b7e04
1
Parent(s):
bfddc14
Update spaCy pipeline
Browse files- .gitattributes +2 -0
- README.md +16 -57
- config.cfg +93 -0
- la_vectors_floret_md-any-py3-none-any.whl +3 -0
- meta.json +47 -0
- tokenizer +3 -0
- vocab/key2row +1 -0
- vocab/lookups.bin +3 -0
- vocab/strings.json +1032 -0
- vocab/vectors +3 -0
- vocab/vectors.cfg +10 -0
.gitattributes
CHANGED
@@ -35,3 +35,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
35 |
la_vectors_floret_md-3.5.2-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
36 |
la_vectors_floret_md-3.6.0-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
37 |
la_vectors_floret_md-3.7.2-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
35 |
la_vectors_floret_md-3.5.2-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
36 |
la_vectors_floret_md-3.6.0-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
37 |
la_vectors_floret_md-3.7.2-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
38 |
+
la_vectors_floret_md-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
|
39 |
+
vocab/vectors filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,61 +1,20 @@
|
|
1 |
---
|
2 |
-
|
|
|
3 |
language:
|
4 |
- la
|
5 |
-
|
6 |
-
- cltk
|
7 |
-
- latin
|
8 |
-
- floret
|
9 |
-
library_name: spacy
|
10 |
---
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
# Model Details
|
26 |
-
|
27 |
-
## Model Description
|
28 |
-
|
29 |
-
<!-- Provide a longer summary of what this model is/does. -->
|
30 |
-
md floret vectors for Latin on Wikipedia, Oscar, and UD data.
|
31 |
-
|
32 |
-
- **Developed by:** Patrick J. Burns
|
33 |
-
- **Model type:** spaCy model
|
34 |
-
- **Language(s) (NLP):** la
|
35 |
-
- **License:** mit
|
36 |
-
- **Resources for more information:**
|
37 |
-
- [GitHub Repo](https://github.com/diyclassics/la_core_cltk_md)
|
38 |
-
|
39 |
-
# Citation
|
40 |
-
|
41 |
-
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
42 |
-
|
43 |
-
**BibTeX:**
|
44 |
-
|
45 |
-
```
|
46 |
-
@misc{burns_la_vectors_floret_md_2023,
|
47 |
-
title = {la\_vectors\_floret\_md},
|
48 |
-
version = 3.7.2,
|
49 |
-
url = {https://huggingface.co/diyclassics/la_vectors_floret_md},
|
50 |
-
abstract = {md floret vectors model for Latin},
|
51 |
-
urldate = {2023-12-23},
|
52 |
-
author = {Burns, Patrick J.},
|
53 |
-
year = {2023},
|
54 |
-
}
|
55 |
-
```
|
56 |
-
|
57 |
-
# How to Get Started with the Model
|
58 |
-
|
59 |
-
- Install with...
|
60 |
-
- `pip install https://huggingface.co/latincy/la_vectors_floret_md/resolve/main/la_vectors_floret_md-3.7.2-py3-none-any.whl
|
61 |
-
- Tested on python 3.10.8, spacy==3.7.2
|
|
|
1 |
---
|
2 |
+
tags:
|
3 |
+
- spacy
|
4 |
language:
|
5 |
- la
|
6 |
+
license: mit
|
|
|
|
|
|
|
|
|
7 |
---
|
8 |
+
Code required to train lg floret embeddings for Latin on LatinCy Assets data. Based on spaCy project [Train floret vectors from Wikipedia and OSCAR](https://github.com/explosion/projects/tree/v3/pipelines/floret_wiki_oscar_vectors).
|
9 |
+
|
10 |
+
| Feature | Description |
|
11 |
+
| --- | --- |
|
12 |
+
| **Name** | `la_vectors_floret_md` |
|
13 |
+
| **Version** | `3.8.0` |
|
14 |
+
| **spaCy** | `>=3.8.3,<3.9.0` |
|
15 |
+
| **Default Pipeline** | |
|
16 |
+
| **Components** | |
|
17 |
+
| **Vectors** | -1 keys, 50000 unique vectors (300 dimensions) |
|
18 |
+
| **Sources** | UD_Latin-Perseus<br>UD_Latin-PROIEL<br>UD_Latin-ITTB<br>UD_Latin-LLCT<br>UD_Latin-UDante<br>Wikipedia<br>OSCAR<br>Corpus Thomisticum<br>The Latin Library<br>CLTK-Tesserae Latin<br>Patrologia Latina |
|
19 |
+
| **License** | `MIT` |
|
20 |
+
| **Author** | [Patrick J. Burns](https://diyclassics.github.io/) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.cfg
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[paths]
|
2 |
+
train = null
|
3 |
+
dev = null
|
4 |
+
vectors = null
|
5 |
+
init_tok2vec = null
|
6 |
+
|
7 |
+
[system]
|
8 |
+
seed = 0
|
9 |
+
gpu_allocator = null
|
10 |
+
|
11 |
+
[nlp]
|
12 |
+
lang = "la"
|
13 |
+
pipeline = []
|
14 |
+
disabled = []
|
15 |
+
before_creation = null
|
16 |
+
after_creation = null
|
17 |
+
after_pipeline_creation = null
|
18 |
+
batch_size = 1000
|
19 |
+
tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
|
20 |
+
vectors = {"@vectors":"spacy.Vectors.v1"}
|
21 |
+
|
22 |
+
[components]
|
23 |
+
|
24 |
+
[corpora]
|
25 |
+
|
26 |
+
[corpora.dev]
|
27 |
+
@readers = "spacy.Corpus.v1"
|
28 |
+
path = ${paths.dev}
|
29 |
+
gold_preproc = false
|
30 |
+
max_length = 0
|
31 |
+
limit = 0
|
32 |
+
augmenter = null
|
33 |
+
|
34 |
+
[corpora.train]
|
35 |
+
@readers = "spacy.Corpus.v1"
|
36 |
+
path = ${paths.train}
|
37 |
+
gold_preproc = false
|
38 |
+
max_length = 0
|
39 |
+
limit = 0
|
40 |
+
augmenter = null
|
41 |
+
|
42 |
+
[training]
|
43 |
+
seed = ${system.seed}
|
44 |
+
gpu_allocator = ${system.gpu_allocator}
|
45 |
+
dropout = 0.1
|
46 |
+
accumulate_gradient = 1
|
47 |
+
patience = 1600
|
48 |
+
max_epochs = 0
|
49 |
+
max_steps = 20000
|
50 |
+
eval_frequency = 200
|
51 |
+
frozen_components = []
|
52 |
+
annotating_components = []
|
53 |
+
dev_corpus = "corpora.dev"
|
54 |
+
train_corpus = "corpora.train"
|
55 |
+
before_to_disk = null
|
56 |
+
before_update = null
|
57 |
+
logger = {"@loggers":"spacy.ConsoleLogger.v1"}
|
58 |
+
|
59 |
+
[training.batcher]
|
60 |
+
@batchers = "spacy.batch_by_words.v1"
|
61 |
+
discard_oversize = false
|
62 |
+
tolerance = 0.2
|
63 |
+
|
64 |
+
[training.batcher.size]
|
65 |
+
@schedules = "compounding.v1"
|
66 |
+
start = 100
|
67 |
+
stop = 1000
|
68 |
+
compound = 1.001
|
69 |
+
|
70 |
+
[training.optimizer]
|
71 |
+
@optimizers = "Adam.v1"
|
72 |
+
beta1 = 0.9
|
73 |
+
beta2 = 0.999
|
74 |
+
L2_is_weight_decay = true
|
75 |
+
L2 = 0.01
|
76 |
+
grad_clip = 1.0
|
77 |
+
use_averages = false
|
78 |
+
eps = 0.00000001
|
79 |
+
learn_rate = 0.001
|
80 |
+
|
81 |
+
[training.score_weights]
|
82 |
+
|
83 |
+
[initialize]
|
84 |
+
vectors = ${paths.vectors}
|
85 |
+
init_tok2vec = ${paths.init_tok2vec}
|
86 |
+
vocab_data = null
|
87 |
+
lookups = null
|
88 |
+
before_init = null
|
89 |
+
after_init = null
|
90 |
+
|
91 |
+
[initialize.components]
|
92 |
+
|
93 |
+
[initialize.tokenizer]
|
la_vectors_floret_md-any-py3-none-any.whl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88d2b361d1f74e73916938618414a4e70b60135f15432f907b5f86f8f228814a
|
3 |
+
size 53485225
|
meta.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"lang":"la",
|
3 |
+
"name":"vectors_floret_md",
|
4 |
+
"version":"3.8.0",
|
5 |
+
"description":"Code required to train lg floret embeddings for Latin on LatinCy Assets data. Based on spaCy project [Train floret vectors from Wikipedia and OSCAR](https://github.com/explosion/projects/tree/v3/pipelines/floret_wiki_oscar_vectors).",
|
6 |
+
"author":"Patrick J. Burns",
|
7 |
+
"email":"[email protected]",
|
8 |
+
"url":"https://diyclassics.github.io/",
|
9 |
+
"license":"MIT",
|
10 |
+
"spacy_version":">=3.8.3,<3.9.0",
|
11 |
+
"spacy_git_version":"be0fa81",
|
12 |
+
"vectors":{
|
13 |
+
"width":300,
|
14 |
+
"vectors":50000,
|
15 |
+
"keys":-1,
|
16 |
+
"name":"la_vectors_floret_md.vectors"
|
17 |
+
},
|
18 |
+
"labels":{
|
19 |
+
|
20 |
+
},
|
21 |
+
"pipeline":[
|
22 |
+
|
23 |
+
],
|
24 |
+
"components":[
|
25 |
+
|
26 |
+
],
|
27 |
+
"disabled":[
|
28 |
+
|
29 |
+
],
|
30 |
+
"title":"la_vectors_floret_lg",
|
31 |
+
"sources":[
|
32 |
+
"UD_Latin-Perseus",
|
33 |
+
"UD_Latin-PROIEL",
|
34 |
+
"UD_Latin-ITTB",
|
35 |
+
"UD_Latin-LLCT",
|
36 |
+
"UD_Latin-UDante",
|
37 |
+
"Wikipedia",
|
38 |
+
"OSCAR",
|
39 |
+
"Corpus Thomisticum",
|
40 |
+
"The Latin Library",
|
41 |
+
"CLTK-Tesserae Latin",
|
42 |
+
"Patrologia Latina"
|
43 |
+
],
|
44 |
+
"requirements":[
|
45 |
+
"spacy>=3.8.3,<3.9.0"
|
46 |
+
]
|
47 |
+
}
|
tokenizer
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
��prefix_search��^§|^%|^=|^—|^–|^\+(?![0-9])|^…|^……|^,|^:|^;|^\!|^\?|^¿|^؟|^¡|^\(|^\)|^\[|^\]|^\{|^\}|^<|^>|^_|^#|^\*|^&|^。|^?|^!|^,|^、|^;|^:|^~|^·|^।|^،|^۔|^؛|^٪|^\.\.+|^…|^\'|^"|^”|^“|^`|^‘|^´|^’|^‚|^,|^„|^»|^«|^「|^」|^『|^』|^(|^)|^〔|^〕|^【|^】|^《|^》|^〈|^〉|^〈|^〉|^⟦|^⟧|^\$|^£|^€|^¥|^฿|^US\$|^C\$|^A\$|^₽|^﷼|^₴|^₠|^₡|^₢|^₣|^₤|^₥|^₦|^₧|^₨|^₩|^₪|^₫|^€|^₭|^₮|^₯|^₰|^₱|^₲|^₳|^₴|^₵|^₶|^₷|^₸|^₹|^₺|^₻|^₼|^₽|^₾|^₿|^[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]�suffix_search�2�…$|……$|,$|:$|;$|\!$|\?$|¿$|؟$|¡$|\($|\)$|\[$|\]$|\{$|\}$|<$|>$|_$|#$|\*$|&$|。$|?$|!$|,$|、$|;$|:$|~$|·$|।$|،$|۔$|؛$|٪$|\.\.+$|…$|\'$|"$|”$|“$|`$|‘$|´$|’$|‚$|,$|„$|»$|«$|「$|」$|『$|』$|($|)$|〔$|〕$|【$|】$|《$|》$|〈$|〉$|〈$|〉$|⟦$|⟧$|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]$|'s$|'S$|’s$|’S$|—$|–$|(?<=[0-9])\+$|(?<=°[FfCcKk])\.$|(?<=[0-9])(?:\$|£|€|¥|฿|US\$|C\$|A\$|₽|﷼|₴|₠|₡|₢|₣|₤|₥|₦|₧|₨|₩|₪|₫|€|₭|₮|₯|₰|₱|₲|₳|₴|₵|₶|₷|₸|₹|₺|₻|₼|₽|₾|₿)$|(?<=[0-9])(?:km|km²|km³|m|m²|m³|dm|dm²|dm³|cm|cm²|cm³|mm|mm²|mm³|ha|µm|nm|yd|in|ft|kg|g|mg|µg|t|lb|oz|m/s|km/h|kmh|mph|hPa|Pa|mbar|mb|MB|kb|KB|gb|GB|tb|TB|T|G|M|K|%|км|км²|км³|м|м²|м³|дм|дм²|дм³|см|см²|см³|мм|мм²|мм³|нм|кг|г|мг|м/с|км/ч|кПа|Па|мбар|Кб|КБ|кб|Мб|МБ|мб|Гб|ГБ|гб|Тб|ТБ|тбكم|كم²|كم³|م|م²|م³|سم|سم²|سم³|مم|مم²|مم³|كم|غرام|جرام|جم|كغ|ملغ|كوب|اكواب)$|(?<=[0-9a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F%²\-\+…|……|,|:|;|\!|\?|¿|؟|¡|\(|\)|\[|\]|\{|\}|<|>|_|#|\*|&|。|?|!|,|、|;|:|~|·|।|،|۔|؛|٪(?:\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧)])\.$|(?<=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F][A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])\.$�infix_finditer�>�\.\.+|…|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]|(?<=[0-9])[+\-\*^](?=[0-9-])|(?<=[a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧])\.(?=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F]),(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])(?:-|–|—|--|---|——|~)(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])[:<>=/](?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])�token_match��url_match�
|
2 |
+
��A�
|
3 |
+
� ��A� �'��A�'�''��A�''�(*_*)��A�(*_*)�(-8��A�(-8�(-:��A�(-:�(-;��A�(-;�(-_-)��A�(-_-)�(._.)��A�(._.)�(:��A�(:�(;��A�(;�(=��A�(=�(>_<)��A�(>_<)�(^_^)��A�(^_^)�(o:��A�(o:�(¬_¬)��A�(¬_¬)�(ಠ_ಠ)��A�(ಠ_ಠ)�(╯°□°)╯︵┻━┻��A�(╯°□°)╯︵┻━┻�)-:��A�)-:�):��A�):�-_-��A�-_-�-__-��A�-__-�._.��A�._.�0.0��A�0.0�0.o��A�0.o�0_0��A�0_0�0_o��A�0_o�8)��A�8)�8-)��A�8-)�8-D��A�8-D�8D��A�8D�:'(��A�:'(�:')��A�:')�:'-(��A�:'-(�:'-)��A�:'-)�:(��A�:(�:((��A�:((�:(((��A�:(((�:()��A�:()�:)��A�:)�:))��A�:))�:)))��A�:)))�:*��A�:*�:-(��A�:-(�:-((��A�:-((�:-(((��A�:-(((�:-)��A�:-)�:-))��A�:-))�:-)))��A�:-)))�:-*��A�:-*�:-/��A�:-/�:-0��A�:-0�:-3��A�:-3�:->��A�:->�:-D��A�:-D�:-O��A�:-O�:-P��A�:-P�:-X��A�:-X�:-]��A�:-]�:-o��A�:-o�:-p��A�:-p�:-x��A�:-x�:-|��A�:-|�:-}��A�:-}�:/��A�:/�:0��A�:0�:1��A�:1�:3��A�:3�:>��A�:>�:D��A�:D�:O��A�:O�:P��A�:P�:X��A�:X�:]��A�:]�:o��A�:o�:o)��A�:o)�:p��A�:p�:x��A�:x�:|��A�:|�:}��A�:}�:’(��A�:’(�:’)��A�:’)�:’-(��A�:’-(�:’-)��A�:’-)�;)��A�;)�;-)��A�;-)�;-D��A�;-D�;D��A�;D�;_;��A�;_;�<.<��A�<.<�</3��A�</3�<3��A�<3�<33��A�<33�<333��A�<333�<space>��A�<space>�=(��A�=(�=)��A�=)�=/��A�=/�=3��A�=3�=D��A�=D�=[��A�=[�=]��A�=]�=|��A�=|�>.<��A�>.<�>.>��A�>.>�>:(��A�>:(�>:o��A�>:o�><(((*>��A�><(((*>�@_@��A�@_@�A.��A�A.�A.D.��A�A.D.�A.U.C.��A�A.U.C.�AA.��A�AA.�AAA.��A�AAA.�ACC.��A�ACC.�AGR.��A�AGR.�AP.��A�AP.�APR.��A�APR.�APRIL.��A�APRIL.�AUG.��A�AUG.�Aa.��A�Aa.�Aaa.��A�Aaa.�Acc.��A�Acc.�Agr.��A�Agr.�Ap.��A�Ap.�Apr.��A�Apr.�April.��A�April.�Aug.��A�Aug.�C++��A�C++�C.��A�C.�CAES.��A�CAES.�CAESS.��A�CAESS.�CC.��A�CC.�CN.��A�CN.�COLL.��A�COLL.�CONS.��A�CONS.�CONSS.��A�CONSS.�COS.��A�COS.�COSS.��A�COSS.�Caes.��A�Caes.�Caess.��A�Caess.�Cc.��A�Cc.�Cn.��A�Cn.�Coll.��A�Coll.�Cons.��A�Cons.�Conss.��A�Conss.�Cos.��A�Cos.�Coss.��A�Coss.�D.��A�D.�D.N.��A�D.N.�DAT.��A�DAT.�DD.��A�DD.�DEC.��A�DEC.�DECEMB.��A�DECEMB.�DECEMBR.��A�DECEMBR.�Dat.��A�Dat.�Dd.��A�Dd.�Dec.��A�Dec.�Decemb.��A�Decemb.�Decembr.��A�Decembr.�F.��A�F.�FEB.��A�FEB.�FEBR.��A�FEBR.�FEBRUAR.��A�FEBRUAR.�Feb.��A�Feb.�Febr.��A�Febr.�Februar.��A�Februar.�IAN.��A�IAN.�ID.��A�ID.�IMP.��A�IMP.�IMPP.��A�IMPP.�IMPPP.��A�IMPPP.�IUL.��A�IUL.�IUN.��A�IUN.�Ian.��A�Ian.�Id.��A�Id.�Imp.��A�Imp.�Impp.��A�Impp.�Imppp.��A�Imppp.�Iul.��A�Iul.�Iun.��A�Iun.�K.��A�K.�KAL.��A�KAL.�Kal.��A�Kal.�L.��A�L.�M'.��A�M'.�M.��A�M.�MAI.��A�MAI.�MAM.��A�MAM.�MAR.��A�MAR.�MART.��A�MART.�MED.��A�MED.�Mai.��A�Mai.�Mam.��A�Mam.�Mar.��A�Mar.�Mart.��A�Mart.�Med.��A�Med.�M’.��A�M’.�N.��A�N.�NN.��A�NN.�NOB.��A�NOB.�NON.��A�NON.�NOU.��A�NOU.�NOUEMB.��A�NOUEMB.�NOV.��A�NOV.�NOVEMB.��A�NOVEMB.�Nn.��A�Nn.�Nob.��A�Nob.�Non.��A�Non.�Nou.��A�Nou.�Nouemb.��A�Nouemb.�Nov.��A�Nov.�Novemb.��A�Novemb.�O.O��A�O.O�O.o��A�O.o�OCT.��A�OCT.�OCTOB.��A�OCTOB.�OPET.��A�OPET.�ORD.��A�ORD.�O_O��A�O_O�O_o��A�O_o�Oct.��A�Oct.�Octob.��A�Octob.�Opet.��A�Opet.�Ord.��A�Ord.�P.��A�P.�PAUL.��A�PAUL.�PF.��A�PF.�PL.��A�PL.�PLUR.��A�PLUR.�POST.��A�POST.�PP.��A�PP.�PRID.��A�PRID.�PRO.��A�PRO.�PROCOS.��A�PROCOS.�Paul.��A�Paul.�Pf.��A�Pf.�Pl.��A�Pl.�Plur.��A�Plur.�Post.��A�Post.�Pp.��A�Pp.�Prid.��A�Prid.�Pro.��A�Pro.�Procos.��A�Procos.�Q.��A�Q.�QUINT.��A�QUINT.�Quint.��A�Quint.�S.��A�S.�S.C.��A�S.C.�SCR.��A�SCR.�SEPT.��A�SEPT.�SEPTEMB.��A�SEPTEMB.�SER.��A�SER.�SERT.��A�SERT.�SEX.��A�SEX.�SEXT.��A�SEXT.�ST.��A�ST.�STA.��A�STA.�SUFF.��A�SUFF.�Scr.��A�Scr.�Sept.��A�Sept.�Septemb.��A�Septemb.�Ser.��A�Ser.�Sert.��A�Sert.�Sex.��A�Sex.�Sext.��A�Sext.�St.��A�St.�Sta.��A�Sta.�Suff.��A�Suff.�T.��A�T.�TI.��A�TI.�TRIB.��A�TRIB.�Ti.��A�Ti.�Trib.��A�Trib.�U.��A�U.�UOL.��A�UOL.�UOP.��A�UOP.�UU.��A�UU.�Uol.��A�Uol.�Uop.��A�Uop.�Uu.��A�Uu.�V.��A�V.�V.V��A�V.V�VOL.��A�VOL.�VOP.��A�VOP.�VV.��A�VV.�V_V��A�V_V�Vol.��A�Vol.�Vop.��A�Vop.�Vv.��A�Vv.�XD��A�XD�XDD��A�XDD�[-:��A�[-:�[:��A�[:�[=��A�[=�\")��A�\")�\n��A�\n�\t��A�\t�]=��A�]=�^_^��A�^_^�^__^��A�^__^�^___^��A�^___^�a.��A�a.�a.d.��A�a.d.�a.u.c.��A�a.u.c.�aa.��A�aa.�aaa.��A�aaa.�acc.��A�acc.�agr.��A�agr.�ap.��A�ap.�apr.��A�apr.�april.��A�april.�aug.��A�aug.�b.��A�b.�c.��A�c.�caes.��A�caes.�caess.��A�caess.�cc.��A�cc.�cn.��A�cn.�coll.��A�coll.�cons.��A�cons.�conss.��A�conss.�cos.��A�cos.�coss.��A�coss.�d.��A�d.�d.N.��A�d.N.�d.n.��A�d.n.�dat.��A�dat.�dd.��A�dd.�dec.��A�dec.�decemb.��A�decemb.�decembr.��A�decembr.�e.��A�e.�f.��A�f.�feb.��A�feb.�febr.��A�febr.�februar.��A�februar.�g.��A�g.�h.��A�h.�i.��A�i.�ian.��A�ian.�id.��A�id.�imp.��A�imp.�impp.��A�impp.�imppp.��A�imppp.�iul.��A�iul.�iun.��A�iun.�j.��A�j.�k.��A�k.�kal.��A�kal.�l.��A�l.�m'.��A�m'.�m.��A�m.�mai.��A�mai.�mam.��A�mam.�mar.��A�mar.�mart.��A�mart.�mecum��A�me�A�cum�med.��A�med.�m’.��A�m’.�n.��A�n.�nn.��A�nn.�nob.��A�nob.�nobiscum��A�nobis�A�cum�non.��A�non.�nou.��A�nou.�nouemb.��A�nouemb.�nov.��A�nov.�novemb.��A�novemb.�o.��A�o.�o.0��A�o.0�o.O��A�o.O�o.o��A�o.o�o_0��A�o_0�o_O��A�o_O�o_o��A�o_o�oct.��A�oct.�octob.��A�octob.�opet.��A�opet.�ord.��A�ord.�p.��A�p.�paul.��A�paul.�pf.��A�pf.�pl.��A�pl.�plur.��A�plur.�post.��A�post.�pp.��A�pp.�prid.��A�prid.�pro.��A�pro.�procos.��A�procos.�q.��A�q.�quint.��A�quint.�r.��A�r.�s.��A�s.�s.c.��A�s.c.�scr.��A�scr.�sept.��A�sept.�septemb.��A�septemb.�ser.��A�ser.�sert.��A�sert.�sex.��A�sex.�sext.��A�sext.�st.��A�st.�sta.��A�sta.�suff.��A�suff.�t.��A�t.�tecum��A�te�A�cum�ti.��A�ti.�trib.��A�trib.�u.��A�u.�uobiscum��A�uobis�A�cum�uol.��A�uol.�uop.��A�uop.�uu.��A�uu.�v.��A�v.�v.v��A�v.v�v_v��A�v_v�vobiscum��A�vobis�A�cum�vol.��A�vol.�vop.��A�vop.�vv.��A�vv.�w.��A�w.�x.��A�x.�xD��A�xD�xDD��A�xDD�y.��A�y.�z.��A�z.� ��A� C� �¯\(ツ)/¯��A�¯\(ツ)/¯�°C.��A�°�A�C�A�.�°F.��A�°�A�F�A�.�°K.��A�°�A�K�A�.�°c.��A�°�A�c�A�.�°f.��A�°�A�f�A�.�°k.��A�°�A�k�A�.�ä.��A�ä.�ö.��A�ö.�ü.��A�ü.�ಠ_ಠ��A�ಠ_ಠ�ಠ︵ಠ��A�ಠ︵ಠ�—��A�—�’��A�’�’’��A�’’�faster_heuristics�
|
vocab/key2row
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
�
|
vocab/lookups.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
|
3 |
+
size 1
|
vocab/strings.json
ADDED
@@ -0,0 +1,1032 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
"\t",
|
3 |
+
"\n",
|
4 |
+
" ",
|
5 |
+
" ",
|
6 |
+
"\"",
|
7 |
+
"'",
|
8 |
+
"''",
|
9 |
+
"'-(",
|
10 |
+
"'-)",
|
11 |
+
"(",
|
12 |
+
"(((",
|
13 |
+
"(*>",
|
14 |
+
"(*_*)",
|
15 |
+
"(-8",
|
16 |
+
"(-:",
|
17 |
+
"(-;",
|
18 |
+
"(-_-)",
|
19 |
+
"(-d",
|
20 |
+
"(._.)",
|
21 |
+
"(:",
|
22 |
+
"(;",
|
23 |
+
"(=",
|
24 |
+
"(>_<)",
|
25 |
+
"(^_^)",
|
26 |
+
"(o:",
|
27 |
+
"(x:",
|
28 |
+
"(x_x)",
|
29 |
+
"(\u00ac_\u00ac)",
|
30 |
+
"(\u0ca0_\u0ca0)",
|
31 |
+
"(\u256f\u00b0\u25a1\u00b0\uff09\u256f\ufe35\u253b\u2501\u253b",
|
32 |
+
")",
|
33 |
+
")))",
|
34 |
+
")-:",
|
35 |
+
")/\u00af",
|
36 |
+
"):",
|
37 |
+
"*",
|
38 |
+
"-",
|
39 |
+
"-((",
|
40 |
+
"-))",
|
41 |
+
"-/",
|
42 |
+
"-0",
|
43 |
+
"-3",
|
44 |
+
"-8",
|
45 |
+
"-D",
|
46 |
+
"-O",
|
47 |
+
"-P",
|
48 |
+
"-X",
|
49 |
+
"-_-",
|
50 |
+
"-__-",
|
51 |
+
"-d",
|
52 |
+
"-o",
|
53 |
+
"-p",
|
54 |
+
"-x",
|
55 |
+
"-|",
|
56 |
+
".",
|
57 |
+
".C.",
|
58 |
+
".D.",
|
59 |
+
".N.",
|
60 |
+
"._.",
|
61 |
+
".c.",
|
62 |
+
".d.",
|
63 |
+
".n.",
|
64 |
+
"/",
|
65 |
+
"/3",
|
66 |
+
"/d",
|
67 |
+
"0",
|
68 |
+
"0.0",
|
69 |
+
"0.o",
|
70 |
+
"0_0",
|
71 |
+
"0_o",
|
72 |
+
"1",
|
73 |
+
"3",
|
74 |
+
"33",
|
75 |
+
"333",
|
76 |
+
"8",
|
77 |
+
"8)",
|
78 |
+
"8-",
|
79 |
+
"8-)",
|
80 |
+
"8-D",
|
81 |
+
"8-d",
|
82 |
+
"8D",
|
83 |
+
"8d",
|
84 |
+
":",
|
85 |
+
":'(",
|
86 |
+
":')",
|
87 |
+
":'-(",
|
88 |
+
":'-)",
|
89 |
+
":(",
|
90 |
+
":((",
|
91 |
+
":(((",
|
92 |
+
":()",
|
93 |
+
":)",
|
94 |
+
":))",
|
95 |
+
":)))",
|
96 |
+
":*",
|
97 |
+
":-(",
|
98 |
+
":-((",
|
99 |
+
":-(((",
|
100 |
+
":-)",
|
101 |
+
":-))",
|
102 |
+
":-)))",
|
103 |
+
":-*",
|
104 |
+
":-/",
|
105 |
+
":-0",
|
106 |
+
":-3",
|
107 |
+
":->",
|
108 |
+
":-D",
|
109 |
+
":-O",
|
110 |
+
":-P",
|
111 |
+
":-X",
|
112 |
+
":-]",
|
113 |
+
":-d",
|
114 |
+
":-o",
|
115 |
+
":-p",
|
116 |
+
":-x",
|
117 |
+
":-|",
|
118 |
+
":-}",
|
119 |
+
":/",
|
120 |
+
":0",
|
121 |
+
":1",
|
122 |
+
":3",
|
123 |
+
":>",
|
124 |
+
":D",
|
125 |
+
":O",
|
126 |
+
":P",
|
127 |
+
":X",
|
128 |
+
":]",
|
129 |
+
":d",
|
130 |
+
":o",
|
131 |
+
":o)",
|
132 |
+
":p",
|
133 |
+
":x",
|
134 |
+
":x)",
|
135 |
+
":|",
|
136 |
+
":}",
|
137 |
+
":\u2019(",
|
138 |
+
":\u2019)",
|
139 |
+
":\u2019-(",
|
140 |
+
":\u2019-)",
|
141 |
+
";",
|
142 |
+
";)",
|
143 |
+
";-)",
|
144 |
+
";-D",
|
145 |
+
";-X",
|
146 |
+
";-d",
|
147 |
+
";D",
|
148 |
+
";X",
|
149 |
+
";_;",
|
150 |
+
";d",
|
151 |
+
"<",
|
152 |
+
"<.<",
|
153 |
+
"</3",
|
154 |
+
"</d",
|
155 |
+
"<3",
|
156 |
+
"<33",
|
157 |
+
"<333",
|
158 |
+
"<d",
|
159 |
+
"<dd",
|
160 |
+
"<ddd",
|
161 |
+
"<space>",
|
162 |
+
"<xxxx>",
|
163 |
+
"=",
|
164 |
+
"=(",
|
165 |
+
"=)",
|
166 |
+
"=/",
|
167 |
+
"=3",
|
168 |
+
"=D",
|
169 |
+
"=X",
|
170 |
+
"=[",
|
171 |
+
"=]",
|
172 |
+
"=d",
|
173 |
+
"=|",
|
174 |
+
">",
|
175 |
+
">.<",
|
176 |
+
">.>",
|
177 |
+
">:(",
|
178 |
+
">:o",
|
179 |
+
">:x",
|
180 |
+
"><(((*>",
|
181 |
+
"@",
|
182 |
+
"@_@",
|
183 |
+
"A",
|
184 |
+
"A.",
|
185 |
+
"A.D.",
|
186 |
+
"A.U.C.",
|
187 |
+
"AA",
|
188 |
+
"AA.",
|
189 |
+
"AAA",
|
190 |
+
"AAA.",
|
191 |
+
"ACC",
|
192 |
+
"ACC.",
|
193 |
+
"AES",
|
194 |
+
"AGR",
|
195 |
+
"AGR.",
|
196 |
+
"AI.",
|
197 |
+
"AL.",
|
198 |
+
"AM.",
|
199 |
+
"AN.",
|
200 |
+
"AP",
|
201 |
+
"AP.",
|
202 |
+
"APR",
|
203 |
+
"APR.",
|
204 |
+
"APRIL",
|
205 |
+
"APRIL.",
|
206 |
+
"AR.",
|
207 |
+
"ART",
|
208 |
+
"AT.",
|
209 |
+
"AUG",
|
210 |
+
"AUG.",
|
211 |
+
"AUL",
|
212 |
+
"Aa",
|
213 |
+
"Aa.",
|
214 |
+
"Aaa",
|
215 |
+
"Aaa.",
|
216 |
+
"Acc",
|
217 |
+
"Acc.",
|
218 |
+
"Agr",
|
219 |
+
"Agr.",
|
220 |
+
"Ap",
|
221 |
+
"Ap.",
|
222 |
+
"Apr",
|
223 |
+
"Apr.",
|
224 |
+
"April",
|
225 |
+
"April.",
|
226 |
+
"Aug",
|
227 |
+
"Aug.",
|
228 |
+
"BR.",
|
229 |
+
"C",
|
230 |
+
"C++",
|
231 |
+
"C.",
|
232 |
+
"CAES",
|
233 |
+
"CAES.",
|
234 |
+
"CAESS",
|
235 |
+
"CAESS.",
|
236 |
+
"CC",
|
237 |
+
"CC.",
|
238 |
+
"CN",
|
239 |
+
"CN.",
|
240 |
+
"COLL",
|
241 |
+
"COLL.",
|
242 |
+
"CONS",
|
243 |
+
"CONS.",
|
244 |
+
"CONSS",
|
245 |
+
"CONSS.",
|
246 |
+
"COS",
|
247 |
+
"COS.",
|
248 |
+
"COSS",
|
249 |
+
"COSS.",
|
250 |
+
"CR.",
|
251 |
+
"CT.",
|
252 |
+
"Caes",
|
253 |
+
"Caes.",
|
254 |
+
"Caess",
|
255 |
+
"Caess.",
|
256 |
+
"Cc",
|
257 |
+
"Cc.",
|
258 |
+
"Cn",
|
259 |
+
"Cn.",
|
260 |
+
"Coll",
|
261 |
+
"Coll.",
|
262 |
+
"Cons",
|
263 |
+
"Cons.",
|
264 |
+
"Conss",
|
265 |
+
"Conss.",
|
266 |
+
"Cos",
|
267 |
+
"Cos.",
|
268 |
+
"Coss",
|
269 |
+
"Coss.",
|
270 |
+
"D",
|
271 |
+
"D.",
|
272 |
+
"D.N.",
|
273 |
+
"DAT",
|
274 |
+
"DAT.",
|
275 |
+
"DD",
|
276 |
+
"DD.",
|
277 |
+
"DEC",
|
278 |
+
"DEC.",
|
279 |
+
"DECEMB",
|
280 |
+
"DECEMB.",
|
281 |
+
"DECEMBR",
|
282 |
+
"DECEMBR.",
|
283 |
+
"Dat",
|
284 |
+
"Dat.",
|
285 |
+
"Dd",
|
286 |
+
"Dd.",
|
287 |
+
"Dec",
|
288 |
+
"Dec.",
|
289 |
+
"Decemb",
|
290 |
+
"Decemb.",
|
291 |
+
"Decembr",
|
292 |
+
"Decembr.",
|
293 |
+
"EB.",
|
294 |
+
"EBR",
|
295 |
+
"EC.",
|
296 |
+
"ED.",
|
297 |
+
"EMB",
|
298 |
+
"EPT",
|
299 |
+
"ER.",
|
300 |
+
"ERT",
|
301 |
+
"ES.",
|
302 |
+
"ESS",
|
303 |
+
"ET.",
|
304 |
+
"EX.",
|
305 |
+
"EXT",
|
306 |
+
"F",
|
307 |
+
"F.",
|
308 |
+
"FEB",
|
309 |
+
"FEB.",
|
310 |
+
"FEBR",
|
311 |
+
"FEBR.",
|
312 |
+
"FEBRUAR",
|
313 |
+
"FEBRUAR.",
|
314 |
+
"FF.",
|
315 |
+
"Feb",
|
316 |
+
"Feb.",
|
317 |
+
"Febr",
|
318 |
+
"Febr.",
|
319 |
+
"Februar",
|
320 |
+
"Februar.",
|
321 |
+
"GR.",
|
322 |
+
"I",
|
323 |
+
"IAN",
|
324 |
+
"IAN.",
|
325 |
+
"IB.",
|
326 |
+
"ID.",
|
327 |
+
"IL.",
|
328 |
+
"IMP",
|
329 |
+
"IMP.",
|
330 |
+
"IMPP",
|
331 |
+
"IMPP.",
|
332 |
+
"IMPPP",
|
333 |
+
"IMPPP.",
|
334 |
+
"INT",
|
335 |
+
"IUL",
|
336 |
+
"IUL.",
|
337 |
+
"IUN",
|
338 |
+
"IUN.",
|
339 |
+
"Ian",
|
340 |
+
"Ian.",
|
341 |
+
"Id",
|
342 |
+
"Id.",
|
343 |
+
"Imp",
|
344 |
+
"Imp.",
|
345 |
+
"Impp",
|
346 |
+
"Impp.",
|
347 |
+
"Imppp",
|
348 |
+
"Imppp.",
|
349 |
+
"Iul",
|
350 |
+
"Iul.",
|
351 |
+
"Iun",
|
352 |
+
"Iun.",
|
353 |
+
"K",
|
354 |
+
"K.",
|
355 |
+
"KAL",
|
356 |
+
"KAL.",
|
357 |
+
"Kal",
|
358 |
+
"Kal.",
|
359 |
+
"L",
|
360 |
+
"L.",
|
361 |
+
"LL.",
|
362 |
+
"LUR",
|
363 |
+
"M",
|
364 |
+
"M'.",
|
365 |
+
"M.",
|
366 |
+
"MAI",
|
367 |
+
"MAI.",
|
368 |
+
"MAM",
|
369 |
+
"MAM.",
|
370 |
+
"MAR",
|
371 |
+
"MAR.",
|
372 |
+
"MART",
|
373 |
+
"MART.",
|
374 |
+
"MB.",
|
375 |
+
"MBR",
|
376 |
+
"MED",
|
377 |
+
"MED.",
|
378 |
+
"MP.",
|
379 |
+
"MPP",
|
380 |
+
"Mai",
|
381 |
+
"Mai.",
|
382 |
+
"Mam",
|
383 |
+
"Mam.",
|
384 |
+
"Mar",
|
385 |
+
"Mar.",
|
386 |
+
"Mart",
|
387 |
+
"Mart.",
|
388 |
+
"Med",
|
389 |
+
"Med.",
|
390 |
+
"M\u2019.",
|
391 |
+
"N",
|
392 |
+
"N.",
|
393 |
+
"NN",
|
394 |
+
"NN.",
|
395 |
+
"NOB",
|
396 |
+
"NOB.",
|
397 |
+
"NON",
|
398 |
+
"NON.",
|
399 |
+
"NOU",
|
400 |
+
"NOU.",
|
401 |
+
"NOUEMB",
|
402 |
+
"NOUEMB.",
|
403 |
+
"NOV",
|
404 |
+
"NOV.",
|
405 |
+
"NOVEMB",
|
406 |
+
"NOVEMB.",
|
407 |
+
"NS.",
|
408 |
+
"NSS",
|
409 |
+
"NT.",
|
410 |
+
"Nn",
|
411 |
+
"Nn.",
|
412 |
+
"Nob",
|
413 |
+
"Nob.",
|
414 |
+
"Non",
|
415 |
+
"Non.",
|
416 |
+
"Nou",
|
417 |
+
"Nou.",
|
418 |
+
"Nouemb",
|
419 |
+
"Nouemb.",
|
420 |
+
"Nov",
|
421 |
+
"Nov.",
|
422 |
+
"Novemb",
|
423 |
+
"Novemb.",
|
424 |
+
"O",
|
425 |
+
"O.O",
|
426 |
+
"O.o",
|
427 |
+
"OB.",
|
428 |
+
"OCT",
|
429 |
+
"OCT.",
|
430 |
+
"OCTOB",
|
431 |
+
"OCTOB.",
|
432 |
+
"OL.",
|
433 |
+
"OLL",
|
434 |
+
"ON.",
|
435 |
+
"ONS",
|
436 |
+
"OP.",
|
437 |
+
"OPET",
|
438 |
+
"OPET.",
|
439 |
+
"ORD",
|
440 |
+
"ORD.",
|
441 |
+
"OS.",
|
442 |
+
"OSS",
|
443 |
+
"OST",
|
444 |
+
"OU.",
|
445 |
+
"OV.",
|
446 |
+
"O_O",
|
447 |
+
"O_o",
|
448 |
+
"Oct",
|
449 |
+
"Oct.",
|
450 |
+
"Octob",
|
451 |
+
"Octob.",
|
452 |
+
"Opet",
|
453 |
+
"Opet.",
|
454 |
+
"Ord",
|
455 |
+
"Ord.",
|
456 |
+
"P",
|
457 |
+
"P.",
|
458 |
+
"PAUL",
|
459 |
+
"PAUL.",
|
460 |
+
"PET",
|
461 |
+
"PF",
|
462 |
+
"PF.",
|
463 |
+
"PL",
|
464 |
+
"PL.",
|
465 |
+
"PLUR",
|
466 |
+
"PLUR.",
|
467 |
+
"POST",
|
468 |
+
"POST.",
|
469 |
+
"PP",
|
470 |
+
"PP.",
|
471 |
+
"PPP",
|
472 |
+
"PR.",
|
473 |
+
"PRID",
|
474 |
+
"PRID.",
|
475 |
+
"PRO",
|
476 |
+
"PRO.",
|
477 |
+
"PROCOS",
|
478 |
+
"PROCOS.",
|
479 |
+
"PT.",
|
480 |
+
"Paul",
|
481 |
+
"Paul.",
|
482 |
+
"Pf",
|
483 |
+
"Pf.",
|
484 |
+
"Pl",
|
485 |
+
"Pl.",
|
486 |
+
"Plur",
|
487 |
+
"Plur.",
|
488 |
+
"Post",
|
489 |
+
"Post.",
|
490 |
+
"Pp",
|
491 |
+
"Pp.",
|
492 |
+
"Prid",
|
493 |
+
"Prid.",
|
494 |
+
"Pro",
|
495 |
+
"Pro.",
|
496 |
+
"Procos",
|
497 |
+
"Procos.",
|
498 |
+
"Q",
|
499 |
+
"Q.",
|
500 |
+
"QUINT",
|
501 |
+
"QUINT.",
|
502 |
+
"Quint",
|
503 |
+
"Quint.",
|
504 |
+
"RD.",
|
505 |
+
"RIB",
|
506 |
+
"RID",
|
507 |
+
"RIL",
|
508 |
+
"RO.",
|
509 |
+
"RT.",
|
510 |
+
"S",
|
511 |
+
"S.",
|
512 |
+
"S.C.",
|
513 |
+
"SCR",
|
514 |
+
"SCR.",
|
515 |
+
"SEPT",
|
516 |
+
"SEPT.",
|
517 |
+
"SEPTEMB",
|
518 |
+
"SEPTEMB.",
|
519 |
+
"SER",
|
520 |
+
"SER.",
|
521 |
+
"SERT",
|
522 |
+
"SERT.",
|
523 |
+
"SEX",
|
524 |
+
"SEX.",
|
525 |
+
"SEXT",
|
526 |
+
"SEXT.",
|
527 |
+
"SS.",
|
528 |
+
"ST",
|
529 |
+
"ST.",
|
530 |
+
"STA",
|
531 |
+
"STA.",
|
532 |
+
"SUFF",
|
533 |
+
"SUFF.",
|
534 |
+
"Scr",
|
535 |
+
"Scr.",
|
536 |
+
"Sept",
|
537 |
+
"Sept.",
|
538 |
+
"Septemb",
|
539 |
+
"Septemb.",
|
540 |
+
"Ser",
|
541 |
+
"Ser.",
|
542 |
+
"Sert",
|
543 |
+
"Sert.",
|
544 |
+
"Sex",
|
545 |
+
"Sex.",
|
546 |
+
"Sext",
|
547 |
+
"Sext.",
|
548 |
+
"St",
|
549 |
+
"St.",
|
550 |
+
"Sta",
|
551 |
+
"Sta.",
|
552 |
+
"Suff",
|
553 |
+
"Suff.",
|
554 |
+
"T",
|
555 |
+
"T.",
|
556 |
+
"TA.",
|
557 |
+
"TI",
|
558 |
+
"TI.",
|
559 |
+
"TOB",
|
560 |
+
"TRIB",
|
561 |
+
"TRIB.",
|
562 |
+
"Ti",
|
563 |
+
"Ti.",
|
564 |
+
"Trib",
|
565 |
+
"Trib.",
|
566 |
+
"U",
|
567 |
+
"U.",
|
568 |
+
"UAR",
|
569 |
+
"UFF",
|
570 |
+
"UG.",
|
571 |
+
"UL.",
|
572 |
+
"UN.",
|
573 |
+
"UOL",
|
574 |
+
"UOL.",
|
575 |
+
"UOP",
|
576 |
+
"UOP.",
|
577 |
+
"UR.",
|
578 |
+
"UU",
|
579 |
+
"UU.",
|
580 |
+
"Uol",
|
581 |
+
"Uol.",
|
582 |
+
"Uop",
|
583 |
+
"Uop.",
|
584 |
+
"Uu",
|
585 |
+
"Uu.",
|
586 |
+
"V",
|
587 |
+
"V.",
|
588 |
+
"V.V",
|
589 |
+
"VOL",
|
590 |
+
"VOL.",
|
591 |
+
"VOP",
|
592 |
+
"VOP.",
|
593 |
+
"VV",
|
594 |
+
"VV.",
|
595 |
+
"V_V",
|
596 |
+
"Vol",
|
597 |
+
"Vol.",
|
598 |
+
"Vop",
|
599 |
+
"Vop.",
|
600 |
+
"Vv",
|
601 |
+
"Vv.",
|
602 |
+
"X'.",
|
603 |
+
"X++",
|
604 |
+
"X.",
|
605 |
+
"X.X",
|
606 |
+
"X.X.",
|
607 |
+
"X.X.X.",
|
608 |
+
"X.x",
|
609 |
+
"XD",
|
610 |
+
"XDD",
|
611 |
+
"XT.",
|
612 |
+
"XX",
|
613 |
+
"XX.",
|
614 |
+
"XXX",
|
615 |
+
"XXX.",
|
616 |
+
"XXXX",
|
617 |
+
"XXXX.",
|
618 |
+
"X_X",
|
619 |
+
"X_x",
|
620 |
+
"Xx",
|
621 |
+
"Xx.",
|
622 |
+
"Xxx",
|
623 |
+
"Xxx.",
|
624 |
+
"Xxxx",
|
625 |
+
"Xxxx.",
|
626 |
+
"Xxxxx",
|
627 |
+
"Xxxxx.",
|
628 |
+
"X\u2019.",
|
629 |
+
"[",
|
630 |
+
"[-:",
|
631 |
+
"[:",
|
632 |
+
"[=",
|
633 |
+
"\\",
|
634 |
+
"\\\")",
|
635 |
+
"\\n",
|
636 |
+
"\\t",
|
637 |
+
"\\x",
|
638 |
+
"]",
|
639 |
+
"]=",
|
640 |
+
"^",
|
641 |
+
"^_^",
|
642 |
+
"^__^",
|
643 |
+
"^___^",
|
644 |
+
"_*)",
|
645 |
+
"_-)",
|
646 |
+
"_.)",
|
647 |
+
"_<)",
|
648 |
+
"_^)",
|
649 |
+
"__-",
|
650 |
+
"__^",
|
651 |
+
"_\u00ac)",
|
652 |
+
"_\u0ca0)",
|
653 |
+
"a",
|
654 |
+
"a.",
|
655 |
+
"a.d",
|
656 |
+
"a.d.",
|
657 |
+
"a.u.c",
|
658 |
+
"a.u.c.",
|
659 |
+
"aa",
|
660 |
+
"aa.",
|
661 |
+
"aaa",
|
662 |
+
"aaa.",
|
663 |
+
"acc",
|
664 |
+
"acc.",
|
665 |
+
"ace",
|
666 |
+
"aes",
|
667 |
+
"agr",
|
668 |
+
"agr.",
|
669 |
+
"ai.",
|
670 |
+
"al.",
|
671 |
+
"am.",
|
672 |
+
"an.",
|
673 |
+
"ap",
|
674 |
+
"ap.",
|
675 |
+
"apr",
|
676 |
+
"apr.",
|
677 |
+
"april",
|
678 |
+
"april.",
|
679 |
+
"ar.",
|
680 |
+
"art",
|
681 |
+
"at.",
|
682 |
+
"aug",
|
683 |
+
"aug.",
|
684 |
+
"aul",
|
685 |
+
"b",
|
686 |
+
"b.",
|
687 |
+
"bis",
|
688 |
+
"br.",
|
689 |
+
"c",
|
690 |
+
"c++",
|
691 |
+
"c.",
|
692 |
+
"caes",
|
693 |
+
"caes.",
|
694 |
+
"caess",
|
695 |
+
"caess.",
|
696 |
+
"cc.",
|
697 |
+
"ce>",
|
698 |
+
"cn",
|
699 |
+
"cn.",
|
700 |
+
"coll",
|
701 |
+
"coll.",
|
702 |
+
"cons",
|
703 |
+
"cons.",
|
704 |
+
"conss",
|
705 |
+
"conss.",
|
706 |
+
"cos",
|
707 |
+
"cos.",
|
708 |
+
"coss",
|
709 |
+
"coss.",
|
710 |
+
"cr.",
|
711 |
+
"ct.",
|
712 |
+
"cum",
|
713 |
+
"d",
|
714 |
+
"d)",
|
715 |
+
"d-",
|
716 |
+
"d-)",
|
717 |
+
"d-X",
|
718 |
+
"d.",
|
719 |
+
"d.N.",
|
720 |
+
"d.d",
|
721 |
+
"d.n",
|
722 |
+
"d.n.",
|
723 |
+
"d.x",
|
724 |
+
"dX",
|
725 |
+
"d_d",
|
726 |
+
"d_x",
|
727 |
+
"dat",
|
728 |
+
"dat.",
|
729 |
+
"dd",
|
730 |
+
"dd.",
|
731 |
+
"ddd",
|
732 |
+
"dec",
|
733 |
+
"dec.",
|
734 |
+
"decemb",
|
735 |
+
"decemb.",
|
736 |
+
"decembr",
|
737 |
+
"decembr.",
|
738 |
+
"e",
|
739 |
+
"e.",
|
740 |
+
"eb.",
|
741 |
+
"ebr",
|
742 |
+
"ec.",
|
743 |
+
"ed.",
|
744 |
+
"emb",
|
745 |
+
"ept",
|
746 |
+
"er.",
|
747 |
+
"ert",
|
748 |
+
"es.",
|
749 |
+
"ess",
|
750 |
+
"et.",
|
751 |
+
"ex.",
|
752 |
+
"ext",
|
753 |
+
"f",
|
754 |
+
"f.",
|
755 |
+
"feb",
|
756 |
+
"feb.",
|
757 |
+
"febr",
|
758 |
+
"febr.",
|
759 |
+
"februar",
|
760 |
+
"februar.",
|
761 |
+
"ff.",
|
762 |
+
"g",
|
763 |
+
"g.",
|
764 |
+
"gr.",
|
765 |
+
"h",
|
766 |
+
"h.",
|
767 |
+
"i",
|
768 |
+
"i.",
|
769 |
+
"ian",
|
770 |
+
"ian.",
|
771 |
+
"ib.",
|
772 |
+
"id",
|
773 |
+
"id.",
|
774 |
+
"il.",
|
775 |
+
"imp",
|
776 |
+
"imp.",
|
777 |
+
"impp",
|
778 |
+
"impp.",
|
779 |
+
"imppp",
|
780 |
+
"imppp.",
|
781 |
+
"int",
|
782 |
+
"iul",
|
783 |
+
"iul.",
|
784 |
+
"iun",
|
785 |
+
"iun.",
|
786 |
+
"j",
|
787 |
+
"j.",
|
788 |
+
"k",
|
789 |
+
"k.",
|
790 |
+
"kal",
|
791 |
+
"kal.",
|
792 |
+
"l",
|
793 |
+
"l.",
|
794 |
+
"la",
|
795 |
+
"ll.",
|
796 |
+
"lur",
|
797 |
+
"m",
|
798 |
+
"m'.",
|
799 |
+
"m.",
|
800 |
+
"mai",
|
801 |
+
"mai.",
|
802 |
+
"mam",
|
803 |
+
"mam.",
|
804 |
+
"mar",
|
805 |
+
"mar.",
|
806 |
+
"mart",
|
807 |
+
"mart.",
|
808 |
+
"mb.",
|
809 |
+
"mbr",
|
810 |
+
"me",
|
811 |
+
"med",
|
812 |
+
"med.",
|
813 |
+
"mp.",
|
814 |
+
"mpp",
|
815 |
+
"m\u2019.",
|
816 |
+
"n",
|
817 |
+
"n.",
|
818 |
+
"nn.",
|
819 |
+
"nob",
|
820 |
+
"nob.",
|
821 |
+
"nobis",
|
822 |
+
"non",
|
823 |
+
"non.",
|
824 |
+
"nou",
|
825 |
+
"nou.",
|
826 |
+
"nouemb",
|
827 |
+
"nouemb.",
|
828 |
+
"nov",
|
829 |
+
"nov.",
|
830 |
+
"novemb",
|
831 |
+
"novemb.",
|
832 |
+
"ns.",
|
833 |
+
"nss",
|
834 |
+
"nt.",
|
835 |
+
"o",
|
836 |
+
"o.",
|
837 |
+
"o.0",
|
838 |
+
"o.O",
|
839 |
+
"o.o",
|
840 |
+
"o_0",
|
841 |
+
"o_O",
|
842 |
+
"o_o",
|
843 |
+
"ob.",
|
844 |
+
"oct",
|
845 |
+
"oct.",
|
846 |
+
"octob",
|
847 |
+
"octob.",
|
848 |
+
"ol.",
|
849 |
+
"oll",
|
850 |
+
"on.",
|
851 |
+
"ons",
|
852 |
+
"op.",
|
853 |
+
"opet",
|
854 |
+
"opet.",
|
855 |
+
"ord",
|
856 |
+
"ord.",
|
857 |
+
"os.",
|
858 |
+
"oss",
|
859 |
+
"ost",
|
860 |
+
"ou.",
|
861 |
+
"ov.",
|
862 |
+
"p",
|
863 |
+
"p.",
|
864 |
+
"paul",
|
865 |
+
"paul.",
|
866 |
+
"pet",
|
867 |
+
"pf",
|
868 |
+
"pf.",
|
869 |
+
"pl",
|
870 |
+
"pl.",
|
871 |
+
"plur",
|
872 |
+
"plur.",
|
873 |
+
"post",
|
874 |
+
"post.",
|
875 |
+
"pp",
|
876 |
+
"pp.",
|
877 |
+
"ppp",
|
878 |
+
"pr.",
|
879 |
+
"prid",
|
880 |
+
"prid.",
|
881 |
+
"pro",
|
882 |
+
"pro.",
|
883 |
+
"procos",
|
884 |
+
"procos.",
|
885 |
+
"pt.",
|
886 |
+
"q",
|
887 |
+
"q.",
|
888 |
+
"quint",
|
889 |
+
"quint.",
|
890 |
+
"r",
|
891 |
+
"r.",
|
892 |
+
"rd.",
|
893 |
+
"rib",
|
894 |
+
"rid",
|
895 |
+
"ril",
|
896 |
+
"ro.",
|
897 |
+
"rt.",
|
898 |
+
"s",
|
899 |
+
"s.",
|
900 |
+
"s.c",
|
901 |
+
"s.c.",
|
902 |
+
"scr",
|
903 |
+
"scr.",
|
904 |
+
"sept",
|
905 |
+
"sept.",
|
906 |
+
"septemb",
|
907 |
+
"septemb.",
|
908 |
+
"ser",
|
909 |
+
"ser.",
|
910 |
+
"sert",
|
911 |
+
"sert.",
|
912 |
+
"sex",
|
913 |
+
"sex.",
|
914 |
+
"sext",
|
915 |
+
"sext.",
|
916 |
+
"space",
|
917 |
+
"ss.",
|
918 |
+
"st",
|
919 |
+
"st.",
|
920 |
+
"sta",
|
921 |
+
"sta.",
|
922 |
+
"suff",
|
923 |
+
"suff.",
|
924 |
+
"t",
|
925 |
+
"t.",
|
926 |
+
"ta.",
|
927 |
+
"te",
|
928 |
+
"ti",
|
929 |
+
"ti.",
|
930 |
+
"tob",
|
931 |
+
"trib",
|
932 |
+
"trib.",
|
933 |
+
"u",
|
934 |
+
"u.",
|
935 |
+
"u.c",
|
936 |
+
"uar",
|
937 |
+
"uff",
|
938 |
+
"ug.",
|
939 |
+
"ul.",
|
940 |
+
"un.",
|
941 |
+
"uobis",
|
942 |
+
"uol",
|
943 |
+
"uol.",
|
944 |
+
"uop",
|
945 |
+
"uop.",
|
946 |
+
"ur.",
|
947 |
+
"uu",
|
948 |
+
"uu.",
|
949 |
+
"v",
|
950 |
+
"v.",
|
951 |
+
"v.v",
|
952 |
+
"v_v",
|
953 |
+
"vobis",
|
954 |
+
"vol",
|
955 |
+
"vol.",
|
956 |
+
"vop",
|
957 |
+
"vop.",
|
958 |
+
"vv",
|
959 |
+
"vv.",
|
960 |
+
"w",
|
961 |
+
"w.",
|
962 |
+
"x",
|
963 |
+
"x'.",
|
964 |
+
"x.",
|
965 |
+
"x.X",
|
966 |
+
"x.X.",
|
967 |
+
"x.d",
|
968 |
+
"x.x",
|
969 |
+
"x.x.",
|
970 |
+
"x.x.x",
|
971 |
+
"x.x.x.",
|
972 |
+
"xD",
|
973 |
+
"xDD",
|
974 |
+
"xX",
|
975 |
+
"xXX",
|
976 |
+
"x_X",
|
977 |
+
"x_d",
|
978 |
+
"x_x",
|
979 |
+
"xd",
|
980 |
+
"xdd",
|
981 |
+
"xt.",
|
982 |
+
"xx",
|
983 |
+
"xx.",
|
984 |
+
"xxx",
|
985 |
+
"xxx.",
|
986 |
+
"xxxx",
|
987 |
+
"xxxx.",
|
988 |
+
"x\u2019.",
|
989 |
+
"x\ufe35x",
|
990 |
+
"y",
|
991 |
+
"y.",
|
992 |
+
"z",
|
993 |
+
"z.",
|
994 |
+
"|",
|
995 |
+
"}",
|
996 |
+
"\u00a0",
|
997 |
+
"\u00ac",
|
998 |
+
"\u00ac_\u00ac",
|
999 |
+
"\u00af",
|
1000 |
+
"\u00af\\(x)/\u00af",
|
1001 |
+
"\u00af\\(\u30c4)/\u00af",
|
1002 |
+
"\u00b0",
|
1003 |
+
"\u00b0C.",
|
1004 |
+
"\u00b0F.",
|
1005 |
+
"\u00b0K.",
|
1006 |
+
"\u00b0X.",
|
1007 |
+
"\u00b0c.",
|
1008 |
+
"\u00b0f.",
|
1009 |
+
"\u00b0k.",
|
1010 |
+
"\u00b0x.",
|
1011 |
+
"\u00e4",
|
1012 |
+
"\u00e4.",
|
1013 |
+
"\u00f6",
|
1014 |
+
"\u00f6.",
|
1015 |
+
"\u00fc",
|
1016 |
+
"\u00fc.",
|
1017 |
+
"\u0ca0",
|
1018 |
+
"\u0ca0_\u0ca0",
|
1019 |
+
"\u0ca0\ufe35\u0ca0",
|
1020 |
+
"\u2014",
|
1021 |
+
"\u2019",
|
1022 |
+
"\u2019-(",
|
1023 |
+
"\u2019-)",
|
1024 |
+
"\u2019\u2019",
|
1025 |
+
"\u2501",
|
1026 |
+
"\u253b",
|
1027 |
+
"\u253b\u2501\u253b",
|
1028 |
+
"\u256f",
|
1029 |
+
"\u25a1",
|
1030 |
+
"\ufe35",
|
1031 |
+
"\uff09"
|
1032 |
+
]
|
vocab/vectors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:00b9e7d75775781430de3b558b9ec4263bd6b45f9b77dc45f16aea5541866b65
|
3 |
+
size 60000128
|
vocab/vectors.cfg
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"mode":"floret",
|
3 |
+
"minn":3,
|
4 |
+
"maxn":6,
|
5 |
+
"hash_count":2,
|
6 |
+
"hash_seed":2166136261,
|
7 |
+
"bow":"<",
|
8 |
+
"eow":">",
|
9 |
+
"attr":65
|
10 |
+
}
|