diyclassics commited on
Commit
d9b7e04
1 Parent(s): bfddc14

Update spaCy pipeline

Browse files
.gitattributes CHANGED
@@ -35,3 +35,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  la_vectors_floret_md-3.5.2-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
36
  la_vectors_floret_md-3.6.0-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
37
  la_vectors_floret_md-3.7.2-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
 
 
 
35
  la_vectors_floret_md-3.5.2-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
36
  la_vectors_floret_md-3.6.0-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
37
  la_vectors_floret_md-3.7.2-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
38
+ la_vectors_floret_md-any-py3-none-any.whl filter=lfs diff=lfs merge=lfs -text
39
+ vocab/vectors filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,61 +1,20 @@
1
  ---
2
- license: mit
 
3
  language:
4
  - la
5
- tags:
6
- - cltk
7
- - latin
8
- - floret
9
- library_name: spacy
10
  ---
11
-
12
- # Model Card for la_vectors_floret_md
13
-
14
- Floret vectors for Latin
15
-
16
-
17
- # Table of Contents
18
-
19
- - [Model Details](#model-details)
20
- - [Model Description](#model-description)
21
- - [Citation](#citation)
22
- - [How to Get Started with the Model](#how-to-get-started-with-the-model)
23
-
24
-
25
- # Model Details
26
-
27
- ## Model Description
28
-
29
- <!-- Provide a longer summary of what this model is/does. -->
30
- md floret vectors for Latin on Wikipedia, Oscar, and UD data.
31
-
32
- - **Developed by:** Patrick J. Burns
33
- - **Model type:** spaCy model
34
- - **Language(s) (NLP):** la
35
- - **License:** mit
36
- - **Resources for more information:**
37
- - [GitHub Repo](https://github.com/diyclassics/la_core_cltk_md)
38
-
39
- # Citation
40
-
41
- <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
42
-
43
- **BibTeX:**
44
-
45
- ```
46
- @misc{burns_la_vectors_floret_md_2023,
47
- title = {la\_vectors\_floret\_md},
48
- version = 3.7.2,
49
- url = {https://huggingface.co/diyclassics/la_vectors_floret_md},
50
- abstract = {md floret vectors model for Latin},
51
- urldate = {2023-12-23},
52
- author = {Burns, Patrick J.},
53
- year = {2023},
54
- }
55
- ```
56
-
57
- # How to Get Started with the Model
58
-
59
- - Install with...
60
- - `pip install https://huggingface.co/latincy/la_vectors_floret_md/resolve/main/la_vectors_floret_md-3.7.2-py3-none-any.whl
61
- - Tested on python 3.10.8, spacy==3.7.2
 
1
  ---
2
+ tags:
3
+ - spacy
4
  language:
5
  - la
6
+ license: mit
 
 
 
 
7
  ---
8
+ Code required to train lg floret embeddings for Latin on LatinCy Assets data. Based on spaCy project [Train floret vectors from Wikipedia and OSCAR](https://github.com/explosion/projects/tree/v3/pipelines/floret_wiki_oscar_vectors).
9
+
10
+ | Feature | Description |
11
+ | --- | --- |
12
+ | **Name** | `la_vectors_floret_md` |
13
+ | **Version** | `3.8.0` |
14
+ | **spaCy** | `>=3.8.3,<3.9.0` |
15
+ | **Default Pipeline** | |
16
+ | **Components** | |
17
+ | **Vectors** | -1 keys, 50000 unique vectors (300 dimensions) |
18
+ | **Sources** | UD_Latin-Perseus<br>UD_Latin-PROIEL<br>UD_Latin-ITTB<br>UD_Latin-LLCT<br>UD_Latin-UDante<br>Wikipedia<br>OSCAR<br>Corpus Thomisticum<br>The Latin Library<br>CLTK-Tesserae Latin<br>Patrologia Latina |
19
+ | **License** | `MIT` |
20
+ | **Author** | [Patrick J. Burns](https://diyclassics.github.io/) |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.cfg ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [paths]
2
+ train = null
3
+ dev = null
4
+ vectors = null
5
+ init_tok2vec = null
6
+
7
+ [system]
8
+ seed = 0
9
+ gpu_allocator = null
10
+
11
+ [nlp]
12
+ lang = "la"
13
+ pipeline = []
14
+ disabled = []
15
+ before_creation = null
16
+ after_creation = null
17
+ after_pipeline_creation = null
18
+ batch_size = 1000
19
+ tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
20
+ vectors = {"@vectors":"spacy.Vectors.v1"}
21
+
22
+ [components]
23
+
24
+ [corpora]
25
+
26
+ [corpora.dev]
27
+ @readers = "spacy.Corpus.v1"
28
+ path = ${paths.dev}
29
+ gold_preproc = false
30
+ max_length = 0
31
+ limit = 0
32
+ augmenter = null
33
+
34
+ [corpora.train]
35
+ @readers = "spacy.Corpus.v1"
36
+ path = ${paths.train}
37
+ gold_preproc = false
38
+ max_length = 0
39
+ limit = 0
40
+ augmenter = null
41
+
42
+ [training]
43
+ seed = ${system.seed}
44
+ gpu_allocator = ${system.gpu_allocator}
45
+ dropout = 0.1
46
+ accumulate_gradient = 1
47
+ patience = 1600
48
+ max_epochs = 0
49
+ max_steps = 20000
50
+ eval_frequency = 200
51
+ frozen_components = []
52
+ annotating_components = []
53
+ dev_corpus = "corpora.dev"
54
+ train_corpus = "corpora.train"
55
+ before_to_disk = null
56
+ before_update = null
57
+ logger = {"@loggers":"spacy.ConsoleLogger.v1"}
58
+
59
+ [training.batcher]
60
+ @batchers = "spacy.batch_by_words.v1"
61
+ discard_oversize = false
62
+ tolerance = 0.2
63
+
64
+ [training.batcher.size]
65
+ @schedules = "compounding.v1"
66
+ start = 100
67
+ stop = 1000
68
+ compound = 1.001
69
+
70
+ [training.optimizer]
71
+ @optimizers = "Adam.v1"
72
+ beta1 = 0.9
73
+ beta2 = 0.999
74
+ L2_is_weight_decay = true
75
+ L2 = 0.01
76
+ grad_clip = 1.0
77
+ use_averages = false
78
+ eps = 0.00000001
79
+ learn_rate = 0.001
80
+
81
+ [training.score_weights]
82
+
83
+ [initialize]
84
+ vectors = ${paths.vectors}
85
+ init_tok2vec = ${paths.init_tok2vec}
86
+ vocab_data = null
87
+ lookups = null
88
+ before_init = null
89
+ after_init = null
90
+
91
+ [initialize.components]
92
+
93
+ [initialize.tokenizer]
la_vectors_floret_md-any-py3-none-any.whl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88d2b361d1f74e73916938618414a4e70b60135f15432f907b5f86f8f228814a
3
+ size 53485225
meta.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "lang":"la",
3
+ "name":"vectors_floret_md",
4
+ "version":"3.8.0",
5
+ "description":"Code required to train lg floret embeddings for Latin on LatinCy Assets data. Based on spaCy project [Train floret vectors from Wikipedia and OSCAR](https://github.com/explosion/projects/tree/v3/pipelines/floret_wiki_oscar_vectors).",
6
+ "author":"Patrick J. Burns",
7
+ "email":"[email protected]",
8
+ "url":"https://diyclassics.github.io/",
9
+ "license":"MIT",
10
+ "spacy_version":">=3.8.3,<3.9.0",
11
+ "spacy_git_version":"be0fa81",
12
+ "vectors":{
13
+ "width":300,
14
+ "vectors":50000,
15
+ "keys":-1,
16
+ "name":"la_vectors_floret_md.vectors"
17
+ },
18
+ "labels":{
19
+
20
+ },
21
+ "pipeline":[
22
+
23
+ ],
24
+ "components":[
25
+
26
+ ],
27
+ "disabled":[
28
+
29
+ ],
30
+ "title":"la_vectors_floret_lg",
31
+ "sources":[
32
+ "UD_Latin-Perseus",
33
+ "UD_Latin-PROIEL",
34
+ "UD_Latin-ITTB",
35
+ "UD_Latin-LLCT",
36
+ "UD_Latin-UDante",
37
+ "Wikipedia",
38
+ "OSCAR",
39
+ "Corpus Thomisticum",
40
+ "The Latin Library",
41
+ "CLTK-Tesserae Latin",
42
+ "Patrologia Latina"
43
+ ],
44
+ "requirements":[
45
+ "spacy>=3.8.3,<3.9.0"
46
+ ]
47
+ }
tokenizer ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ��prefix_search� �^§|^%|^=|^—|^–|^\+(?![0-9])|^…|^……|^,|^:|^;|^\!|^\?|^¿|^؟|^¡|^\(|^\)|^\[|^\]|^\{|^\}|^<|^>|^_|^#|^\*|^&|^。|^?|^!|^,|^、|^;|^:|^~|^·|^।|^،|^۔|^؛|^٪|^\.\.+|^…|^\'|^"|^”|^“|^`|^‘|^´|^’|^‚|^,|^„|^»|^«|^「|^」|^『|^』|^(|^)|^〔|^〕|^【|^】|^《|^》|^〈|^〉|^〈|^〉|^⟦|^⟧|^\$|^£|^€|^¥|^฿|^US\$|^C\$|^A\$|^₽|^﷼|^₴|^₠|^₡|^₢|^₣|^₤|^₥|^₦|^₧|^₨|^₩|^₪|^₫|^€|^₭|^₮|^₯|^₰|^₱|^₲|^₳|^₴|^₵|^₶|^₷|^₸|^₹|^₺|^₻|^₼|^₽|^₾|^₿|^[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]�suffix_search�2�…$|……$|,$|:$|;$|\!$|\?$|¿$|؟$|¡$|\($|\)$|\[$|\]$|\{$|\}$|<$|>$|_$|#$|\*$|&$|。$|?$|!$|,$|、$|;$|:$|~$|·$|।$|،$|۔$|؛$|٪$|\.\.+$|…$|\'$|"$|”$|“$|`$|‘$|´$|’$|‚$|,$|„$|»$|«$|「$|」$|『$|』$|($|)$|〔$|〕$|【$|】$|《$|》$|〈$|〉$|〈$|〉$|⟦$|⟧$|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]$|'s$|'S$|’s$|’S$|—$|–$|(?<=[0-9])\+$|(?<=°[FfCcKk])\.$|(?<=[0-9])(?:\$|£|€|¥|฿|US\$|C\$|A\$|₽|﷼|₴|₠|₡|₢|₣|₤|₥|₦|₧|₨|₩|₪|₫|€|₭|₮|₯|₰|₱|₲|₳|₴|₵|₶|₷|₸|₹|₺|₻|₼|₽|₾|₿)$|(?<=[0-9])(?:km|km²|km³|m|m²|m³|dm|dm²|dm³|cm|cm²|cm³|mm|mm²|mm³|ha|µm|nm|yd|in|ft|kg|g|mg|µg|t|lb|oz|m/s|km/h|kmh|mph|hPa|Pa|mbar|mb|MB|kb|KB|gb|GB|tb|TB|T|G|M|K|%|км|км²|км³|м|м²|м³|дм|дм²|дм³|см|см²|см³|мм|мм²|мм³|нм|кг|г|мг|м/с|км/ч|кПа|Па|мбар|Кб|КБ|кб|Мб|МБ|мб|Гб|ГБ|гб|Тб|ТБ|тбكم|كم²|كم³|م|م²|م³|سم|سم²|سم³|مم|مم²|مم³|كم|غرام|جرام|جم|كغ|ملغ|كوب|اكواب)$|(?<=[0-9a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F%²\-\+…|……|,|:|;|\!|\?|¿|؟|¡|\(|\)|\[|\]|\{|\}|<|>|_|#|\*|&|。|?|!|,|、|;|:|~|·|।|،|۔|؛|٪(?:\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧)])\.$|(?<=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F][A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])\.$�infix_finditer�>�\.\.+|…|[\u00A6\u00A9\u00AE\u00B0\u0482\u058D\u058E\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09FA\u0B70\u0BF3-\u0BF8\u0BFA\u0C7F\u0D4F\u0D79\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116\u2117\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u214A\u214C\u214D\u214F\u218A\u218B\u2195-\u2199\u219C-\u219F\u21A1\u21A2\u21A4\u21A5\u21A7-\u21AD\u21AF-\u21CD\u21D0\u21D1\u21D3\u21D5-\u21F3\u2300-\u2307\u230C-\u231F\u2322-\u2328\u232B-\u237B\u237D-\u239A\u23B4-\u23DB\u23E2-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u25B6\u25B8-\u25C0\u25C2-\u25F7\u2600-\u266E\u2670-\u2767\u2794-\u27BF\u2800-\u28FF\u2B00-\u2B2F\u2B45\u2B46\u2B4D-\u2B73\u2B76-\u2B95\u2B98-\u2BC8\u2BCA-\u2BFE\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA828-\uA82B\uA836\uA837\uA839\uAA77-\uAA79\uFDFD\uFFE4\uFFE8\uFFED\uFFEE\uFFFC\uFFFD\U00010137-\U0001013F\U00010179-\U00010189\U0001018C-\U0001018E\U00010190-\U0001019B\U000101A0\U000101D0-\U000101FC\U00010877\U00010878\U00010AC8\U0001173F\U00016B3C-\U00016B3F\U00016B45\U0001BC9C\U0001D000-\U0001D0F5\U0001D100-\U0001D126\U0001D129-\U0001D164\U0001D16A-\U0001D16C\U0001D183\U0001D184\U0001D18C-\U0001D1A9\U0001D1AE-\U0001D1E8\U0001D200-\U0001D241\U0001D245\U0001D300-\U0001D356\U0001D800-\U0001D9FF\U0001DA37-\U0001DA3A\U0001DA6D-\U0001DA74\U0001DA76-\U0001DA83\U0001DA85\U0001DA86\U0001ECAC\U0001F000-\U0001F02B\U0001F030-\U0001F093\U0001F0A0-\U0001F0AE\U0001F0B1-\U0001F0BF\U0001F0C1-\U0001F0CF\U0001F0D1-\U0001F0F5\U0001F110-\U0001F16B\U0001F170-\U0001F1AC\U0001F1E6-\U0001F202\U0001F210-\U0001F23B\U0001F240-\U0001F248\U0001F250\U0001F251\U0001F260-\U0001F265\U0001F300-\U0001F3FA\U0001F400-\U0001F6D4\U0001F6E0-\U0001F6EC\U0001F6F0-\U0001F6F9\U0001F700-\U0001F773\U0001F780-\U0001F7D8\U0001F800-\U0001F80B\U0001F810-\U0001F847\U0001F850-\U0001F859\U0001F860-\U0001F887\U0001F890-\U0001F8AD\U0001F900-\U0001F90B\U0001F910-\U0001F93E\U0001F940-\U0001F970\U0001F973-\U0001F976\U0001F97A\U0001F97C-\U0001F9A2\U0001F9B0-\U0001F9B9\U0001F9C0-\U0001F9C2\U0001F9D0-\U0001F9FF\U0001FA60-\U0001FA6D]|(?<=[0-9])[+\-\*^](?=[0-9-])|(?<=[a-z\uFF41-\uFF5A\u00DF-\u00F6\u00F8-\u00FF\u0101\u0103\u0105\u0107\u0109\u010B\u010D\u010F\u0111\u0113\u0115\u0117\u0119\u011B\u011D\u011F\u0121\u0123\u0125\u0127\u0129\u012B\u012D\u012F\u0131\u0133\u0135\u0137\u0138\u013A\u013C\u013E\u0140\u0142\u0144\u0146\u0148\u0149\u014B\u014D\u014F\u0151\u0153\u0155\u0157\u0159\u015B\u015D\u015F\u0161\u0163\u0165\u0167\u0169\u016B\u016D\u016F\u0171\u0173\u0175\u0177\u017A\u017C\u017E\u017F\u0180\u0183\u0185\u0188\u018C\u018D\u0192\u0195\u0199-\u019B\u019E\u01A1\u01A3\u01A5\u01A8\u01AA\u01AB\u01AD\u01B0\u01B4\u01B6\u01B9\u01BA\u01BD-\u01BF\u01C6\u01C9\u01CC\u01CE\u01D0\u01D2\u01D4\u01D6\u01D8\u01DA\u01DC\u01DD\u01DF\u01E1\u01E3\u01E5\u01E7\u01E9\u01EB\u01ED\u01EF\u01F0\u01F3\u01F5\u01F9\u01FB\u01FD\u01FF\u0201\u0203\u0205\u0207\u0209\u020B\u020D\u020F\u0211\u0213\u0215\u0217\u0219\u021B\u021D\u021F\u0221\u0223\u0225\u0227\u0229\u022B\u022D\u022F\u0231\u0233-\u0239\u023C\u023F\u0240\u0242\u0247\u0249\u024B\u024D\u024F\u2C61\u2C65\u2C66\u2C68\u2C6A\u2C6C\u2C71\u2C73\u2C74\u2C76-\u2C7B\uA723\uA725\uA727\uA729\uA72B\uA72D\uA72F-\uA731\uA733\uA735\uA737\uA739\uA73B\uA73D\uA73F\uA741\uA743\uA745\uA747\uA749\uA74B\uA74D\uA74F\uA751\uA753\uA755\uA757\uA759\uA75B\uA75D\uA75F\uA761\uA763\uA765\uA767\uA769\uA76B\uA76D\uA76F\uA771-\uA778\uA77A\uA77C\uA77F\uA781\uA783\uA785\uA787\uA78C\uA78E\uA791\uA793-\uA795\uA797\uA799\uA79B\uA79D\uA79F\uA7A1\uA7A3\uA7A5\uA7A7\uA7A9\uA7AF\uA7B5\uA7B7\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E01\u1E03\u1E05\u1E07\u1E09\u1E0B\u1E0D\u1E0F\u1E11\u1E13\u1E15\u1E17\u1E19\u1E1B\u1E1D\u1E1F\u1E21\u1E23\u1E25\u1E27\u1E29\u1E2B\u1E2D\u1E2F\u1E31\u1E33\u1E35\u1E37\u1E39\u1E3B\u1E3D\u1E3F\u1E41\u1E43\u1E45\u1E47\u1E49\u1E4B\u1E4D\u1E4F\u1E51\u1E53\u1E55\u1E57\u1E59\u1E5B\u1E5D\u1E5F\u1E61\u1E63\u1E65\u1E67\u1E69\u1E6B\u1E6D\u1E6F\u1E71\u1E73\u1E75\u1E77\u1E79\u1E7B\u1E7D\u1E7F\u1E81\u1E83\u1E85\u1E87\u1E89\u1E8B\u1E8D\u1E8F\u1E91\u1E93\u1E95-\u1E9D\u1E9F\u1EA1\u1EA3\u1EA5\u1EA7\u1EA9\u1EAB\u1EAD\u1EAF\u1EB1\u1EB3\u1EB5\u1EB7\u1EB9\u1EBB\u1EBD\u1EBF\u1EC1\u1EC3\u1EC5\u1EC7\u1EC9\u1ECB\u1ECD\u1ECF\u1ED1\u1ED3\u1ED5\u1ED7\u1ED9\u1EDB\u1EDD\u1EDF\u1EE1\u1EE3\u1EE5\u1EE7\u1EE9\u1EEB\u1EED\u1EEF\u1EF1\u1EF3\u1EF5\u1EF7\u1EF9\u1EFB\u1EFD\u1EFFёа-яәөүҗңһα-ωάέίόώήύа-щюяіїєґѓѕјљњќѐѝ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧])\.(?=[A-Z\uFF21-\uFF3A\u00C0-\u00D6\u00D8-\u00DE\u0100\u0102\u0104\u0106\u0108\u010A\u010C\u010E\u0110\u0112\u0114\u0116\u0118\u011A\u011C\u011E\u0120\u0122\u0124\u0126\u0128\u012A\u012C\u012E\u0130\u0132\u0134\u0136\u0139\u013B\u013D\u013F\u0141\u0143\u0145\u0147\u014A\u014C\u014E\u0150\u0152\u0154\u0156\u0158\u015A\u015C\u015E\u0160\u0162\u0164\u0166\u0168\u016A\u016C\u016E\u0170\u0172\u0174\u0176\u0178\u0179\u017B\u017D\u0181\u0182\u0184\u0186\u0187\u0189-\u018B\u018E-\u0191\u0193\u0194\u0196-\u0198\u019C\u019D\u019F\u01A0\u01A2\u01A4\u01A6\u01A7\u01A9\u01AC\u01AE\u01AF\u01B1-\u01B3\u01B5\u01B7\u01B8\u01BC\u01C4\u01C7\u01CA\u01CD\u01CF\u01D1\u01D3\u01D5\u01D7\u01D9\u01DB\u01DE\u01E0\u01E2\u01E4\u01E6\u01E8\u01EA\u01EC\u01EE\u01F1\u01F4\u01F6-\u01F8\u01FA\u01FC\u01FE\u0200\u0202\u0204\u0206\u0208\u020A\u020C\u020E\u0210\u0212\u0214\u0216\u0218\u021A\u021C\u021E\u0220\u0222\u0224\u0226\u0228\u022A\u022C\u022E\u0230\u0232\u023A\u023B\u023D\u023E\u0241\u0243-\u0246\u0248\u024A\u024C\u024E\u2C60\u2C62-\u2C64\u2C67\u2C69\u2C6B\u2C6D-\u2C70\u2C72\u2C75\u2C7E\u2C7F\uA722\uA724\uA726\uA728\uA72A\uA72C\uA72E\uA732\uA734\uA736\uA738\uA73A\uA73C\uA73E\uA740\uA742\uA744\uA746\uA748\uA74A\uA74C\uA74E\uA750\uA752\uA754\uA756\uA758\uA75A\uA75C\uA75E\uA760\uA762\uA764\uA766\uA768\uA76A\uA76C\uA76E\uA779\uA77B\uA77D\uA77E\uA780\uA782\uA784\uA786\uA78B\uA78D\uA790\uA792\uA796\uA798\uA79A\uA79C\uA79E\uA7A0\uA7A2\uA7A4\uA7A6\uA7A8\uA7AA-\uA7AE\uA7B0-\uA7B4\uA7B6\uA7B8\u1E00\u1E02\u1E04\u1E06\u1E08\u1E0A\u1E0C\u1E0E\u1E10\u1E12\u1E14\u1E16\u1E18\u1E1A\u1E1C\u1E1E\u1E20\u1E22\u1E24\u1E26\u1E28\u1E2A\u1E2C\u1E2E\u1E30\u1E32\u1E34\u1E36\u1E38\u1E3A\u1E3C\u1E3E\u1E40\u1E42\u1E44\u1E46\u1E48\u1E4A\u1E4C\u1E4E\u1E50\u1E52\u1E54\u1E56\u1E58\u1E5A\u1E5C\u1E5E\u1E60\u1E62\u1E64\u1E66\u1E68\u1E6A\u1E6C\u1E6E\u1E70\u1E72\u1E74\u1E76\u1E78\u1E7A\u1E7C\u1E7E\u1E80\u1E82\u1E84\u1E86\u1E88\u1E8A\u1E8C\u1E8E\u1E90\u1E92\u1E94\u1E9E\u1EA0\u1EA2\u1EA4\u1EA6\u1EA8\u1EAA\u1EAC\u1EAE\u1EB0\u1EB2\u1EB4\u1EB6\u1EB8\u1EBA\u1EBC\u1EBE\u1EC0\u1EC2\u1EC4\u1EC6\u1EC8\u1ECA\u1ECC\u1ECE\u1ED0\u1ED2\u1ED4\u1ED6\u1ED8\u1EDA\u1EDC\u1EDE\u1EE0\u1EE2\u1EE4\u1EE6\u1EE8\u1EEA\u1EEC\u1EEE\u1EF0\u1EF2\u1EF4\u1EF6\u1EF8\u1EFA\u1EFC\u1EFEЁА-ЯӘӨҮҖҢҺΑ-ΩΆΈΊΌΏΉΎА-ЩЮЯІЇЄҐЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F\'"”“`‘´’‚,„»«「」『』()〔〕【】《》〈〉〈〉⟦⟧])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F]),(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])(?:-|–|—|--|---|——|~)(?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])|(?<=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F0-9])[:<>=/](?=[A-Za-z\uFF21-\uFF3A\uFF41-\uFF5A\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF\u0100-\u017F\u0180-\u01BF\u01C4-\u024F\u2C60-\u2C7B\u2C7E\u2C7F\uA722-\uA76F\uA771-\uA787\uA78B-\uA78E\uA790-\uA7B9\uA7FA\uAB30-\uAB5A\uAB60-\uAB64\u0250-\u02AF\u1D00-\u1D25\u1D6B-\u1D77\u1D79-\u1D9A\u1E00-\u1EFFёа-яЁА-ЯәөүҗңһӘӨҮҖҢҺα-ωάέίόώήύΑ-ΩΆΈΊΌΏΉΎа-щюяіїєґА-ЩЮЯІЇЄҐѓѕјљњќѐѝЃЅЈЉЊЌЀЍ\u1200-\u137F\u0980-\u09FF\u0591-\u05F4\uFB1D-\uFB4F\u0620-\u064A\u066E-\u06D5\u06E5-\u06FF\u0750-\u077F\u08A0-\u08BD\uFB50-\uFBB1\uFBD3-\uFD3D\uFD50-\uFDC7\uFDF0-\uFDFB\uFE70-\uFEFC\U0001EE00-\U0001EEBB\u0D80-\u0DFF\u0900-\u097F\u0C80-\u0CFF\u0B80-\u0BFF\u0C00-\u0C7F\uAC00-\uD7AF\u1100-\u11FF\u3040-\u309F\u30A0-\u30FFー\u4E00-\u62FF\u6300-\u77FF\u7800-\u8CFF\u8D00-\u9FFF\u3400-\u4DBF\U00020000-\U000215FF\U00021600-\U000230FF\U00023100-\U000245FF\U00024600-\U000260FF\U00026100-\U000275FF\U00027600-\U000290FF\U00029100-\U0002A6DF\U0002A700-\U0002B73F\U0002B740-\U0002B81F\U0002B820-\U0002CEAF\U0002CEB0-\U0002EBEF\u2E80-\u2EFF\u2F00-\u2FDF\u2FF0-\u2FFF\u3000-\u303F\u31C0-\u31EF\u3200-\u32FF\u3300-\u33FF\uF900-\uFAFF\uFE30-\uFE4F\U0001F200-\U0001F2FF\U0002F800-\U0002FA1F])�token_match��url_match�
2
+ ��A�
3
+ � ��A� �'��A�'�''��A�''�(*_*)��A�(*_*)�(-8��A�(-8�(-:��A�(-:�(-;��A�(-;�(-_-)��A�(-_-)�(._.)��A�(._.)�(:��A�(:�(;��A�(;�(=��A�(=�(>_<)��A�(>_<)�(^_^)��A�(^_^)�(o:��A�(o:�(¬_¬)��A�(¬_¬)�(ಠ_ಠ)��A�(ಠ_ಠ)�(╯°□°)╯︵┻━┻��A�(╯°□°)╯︵┻━┻�)-:��A�)-:�):��A�):�-_-��A�-_-�-__-��A�-__-�._.��A�._.�0.0��A�0.0�0.o��A�0.o�0_0��A�0_0�0_o��A�0_o�8)��A�8)�8-)��A�8-)�8-D��A�8-D�8D��A�8D�:'(��A�:'(�:')��A�:')�:'-(��A�:'-(�:'-)��A�:'-)�:(��A�:(�:((��A�:((�:(((��A�:(((�:()��A�:()�:)��A�:)�:))��A�:))�:)))��A�:)))�:*��A�:*�:-(��A�:-(�:-((��A�:-((�:-(((��A�:-(((�:-)��A�:-)�:-))��A�:-))�:-)))��A�:-)))�:-*��A�:-*�:-/��A�:-/�:-0��A�:-0�:-3��A�:-3�:->��A�:->�:-D��A�:-D�:-O��A�:-O�:-P��A�:-P�:-X��A�:-X�:-]��A�:-]�:-o��A�:-o�:-p��A�:-p�:-x��A�:-x�:-|��A�:-|�:-}��A�:-}�:/��A�:/�:0��A�:0�:1��A�:1�:3��A�:3�:>��A�:>�:D��A�:D�:O��A�:O�:P��A�:P�:X��A�:X�:]��A�:]�:o��A�:o�:o)��A�:o)�:p��A�:p�:x��A�:x�:|��A�:|�:}��A�:}�:’(��A�:’(�:’)��A�:’)�:’-(��A�:’-(�:’-)��A�:’-)�;)��A�;)�;-)��A�;-)�;-D��A�;-D�;D��A�;D�;_;��A�;_;�<.<��A�<.<�</3��A�</3�<3��A�<3�<33��A�<33�<333��A�<333�<space>��A�<space>�=(��A�=(�=)��A�=)�=/��A�=/�=3��A�=3�=D��A�=D�=[��A�=[�=]��A�=]�=|��A�=|�>.<��A�>.<�>.>��A�>.>�>:(��A�>:(�>:o��A�>:o�><(((*>��A�><(((*>�@_@��A�@_@�A.��A�A.�A.D.��A�A.D.�A.U.C.��A�A.U.C.�AA.��A�AA.�AAA.��A�AAA.�ACC.��A�ACC.�AGR.��A�AGR.�AP.��A�AP.�APR.��A�APR.�APRIL.��A�APRIL.�AUG.��A�AUG.�Aa.��A�Aa.�Aaa.��A�Aaa.�Acc.��A�Acc.�Agr.��A�Agr.�Ap.��A�Ap.�Apr.��A�Apr.�April.��A�April.�Aug.��A�Aug.�C++��A�C++�C.��A�C.�CAES.��A�CAES.�CAESS.��A�CAESS.�CC.��A�CC.�CN.��A�CN.�COLL.��A�COLL.�CONS.��A�CONS.�CONSS.��A�CONSS.�COS.��A�COS.�COSS.��A�COSS.�Caes.��A�Caes.�Caess.��A�Caess.�Cc.��A�Cc.�Cn.��A�Cn.�Coll.��A�Coll.�Cons.��A�Cons.�Conss.��A�Conss.�Cos.��A�Cos.�Coss.��A�Coss.�D.��A�D.�D.N.��A�D.N.�DAT.��A�DAT.�DD.��A�DD.�DEC.��A�DEC.�DECEMB.��A�DECEMB.�DECEMBR.��A�DECEMBR.�Dat.��A�Dat.�Dd.��A�Dd.�Dec.��A�Dec.�Decemb.��A�Decemb.�Decembr.��A�Decembr.�F.��A�F.�FEB.��A�FEB.�FEBR.��A�FEBR.�FEBRUAR.��A�FEBRUAR.�Feb.��A�Feb.�Febr.��A�Febr.�Februar.��A�Februar.�IAN.��A�IAN.�ID.��A�ID.�IMP.��A�IMP.�IMPP.��A�IMPP.�IMPPP.��A�IMPPP.�IUL.��A�IUL.�IUN.��A�IUN.�Ian.��A�Ian.�Id.��A�Id.�Imp.��A�Imp.�Impp.��A�Impp.�Imppp.��A�Imppp.�Iul.��A�Iul.�Iun.��A�Iun.�K.��A�K.�KAL.��A�KAL.�Kal.��A�Kal.�L.��A�L.�M'.��A�M'.�M.��A�M.�MAI.��A�MAI.�MAM.��A�MAM.�MAR.��A�MAR.�MART.��A�MART.�MED.��A�MED.�Mai.��A�Mai.�Mam.��A�Mam.�Mar.��A�Mar.�Mart.��A�Mart.�Med.��A�Med.�M’.��A�M’.�N.��A�N.�NN.��A�NN.�NOB.��A�NOB.�NON.��A�NON.�NOU.��A�NOU.�NOUEMB.��A�NOUEMB.�NOV.��A�NOV.�NOVEMB.��A�NOVEMB.�Nn.��A�Nn.�Nob.��A�Nob.�Non.��A�Non.�Nou.��A�Nou.�Nouemb.��A�Nouemb.�Nov.��A�Nov.�Novemb.��A�Novemb.�O.O��A�O.O�O.o��A�O.o�OCT.��A�OCT.�OCTOB.��A�OCTOB.�OPET.��A�OPET.�ORD.��A�ORD.�O_O��A�O_O�O_o��A�O_o�Oct.��A�Oct.�Octob.��A�Octob.�Opet.��A�Opet.�Ord.��A�Ord.�P.��A�P.�PAUL.��A�PAUL.�PF.��A�PF.�PL.��A�PL.�PLUR.��A�PLUR.�POST.��A�POST.�PP.��A�PP.�PRID.��A�PRID.�PRO.��A�PRO.�PROCOS.��A�PROCOS.�Paul.��A�Paul.�Pf.��A�Pf.�Pl.��A�Pl.�Plur.��A�Plur.�Post.��A�Post.�Pp.��A�Pp.�Prid.��A�Prid.�Pro.��A�Pro.�Procos.��A�Procos.�Q.��A�Q.�QUINT.��A�QUINT.�Quint.��A�Quint.�S.��A�S.�S.C.��A�S.C.�SCR.��A�SCR.�SEPT.��A�SEPT.�SEPTEMB.��A�SEPTEMB.�SER.��A�SER.�SERT.��A�SERT.�SEX.��A�SEX.�SEXT.��A�SEXT.�ST.��A�ST.�STA.��A�STA.�SUFF.��A�SUFF.�Scr.��A�Scr.�Sept.��A�Sept.�Septemb.��A�Septemb.�Ser.��A�Ser.�Sert.��A�Sert.�Sex.��A�Sex.�Sext.��A�Sext.�St.��A�St.�Sta.��A�Sta.�Suff.��A�Suff.�T.��A�T.�TI.��A�TI.�TRIB.��A�TRIB.�Ti.��A�Ti.�Trib.��A�Trib.�U.��A�U.�UOL.��A�UOL.�UOP.��A�UOP.�UU.��A�UU.�Uol.��A�Uol.�Uop.��A�Uop.�Uu.��A�Uu.�V.��A�V.�V.V��A�V.V�VOL.��A�VOL.�VOP.��A�VOP.�VV.��A�VV.�V_V��A�V_V�Vol.��A�Vol.�Vop.��A�Vop.�Vv.��A�Vv.�XD��A�XD�XDD��A�XDD�[-:��A�[-:�[:��A�[:�[=��A�[=�\")��A�\")�\n��A�\n�\t��A�\t�]=��A�]=�^_^��A�^_^�^__^��A�^__^�^___^��A�^___^�a.��A�a.�a.d.��A�a.d.�a.u.c.��A�a.u.c.�aa.��A�aa.�aaa.��A�aaa.�acc.��A�acc.�agr.��A�agr.�ap.��A�ap.�apr.��A�apr.�april.��A�april.�aug.��A�aug.�b.��A�b.�c.��A�c.�caes.��A�caes.�caess.��A�caess.�cc.��A�cc.�cn.��A�cn.�coll.��A�coll.�cons.��A�cons.�conss.��A�conss.�cos.��A�cos.�coss.��A�coss.�d.��A�d.�d.N.��A�d.N.�d.n.��A�d.n.�dat.��A�dat.�dd.��A�dd.�dec.��A�dec.�decemb.��A�decemb.�decembr.��A�decembr.�e.��A�e.�f.��A�f.�feb.��A�feb.�febr.��A�febr.�februar.��A�februar.�g.��A�g.�h.��A�h.�i.��A�i.�ian.��A�ian.�id.��A�id.�imp.��A�imp.�impp.��A�impp.�imppp.��A�imppp.�iul.��A�iul.�iun.��A�iun.�j.��A�j.�k.��A�k.�kal.��A�kal.�l.��A�l.�m'.��A�m'.�m.��A�m.�mai.��A�mai.�mam.��A�mam.�mar.��A�mar.�mart.��A�mart.�mecum��A�me�A�cum�med.��A�med.�m’.��A�m’.�n.��A�n.�nn.��A�nn.�nob.��A�nob.�nobiscum��A�nobis�A�cum�non.��A�non.�nou.��A�nou.�nouemb.��A�nouemb.�nov.��A�nov.�novemb.��A�novemb.�o.��A�o.�o.0��A�o.0�o.O��A�o.O�o.o��A�o.o�o_0��A�o_0�o_O��A�o_O�o_o��A�o_o�oct.��A�oct.�octob.��A�octob.�opet.��A�opet.�ord.��A�ord.�p.��A�p.�paul.��A�paul.�pf.��A�pf.�pl.��A�pl.�plur.��A�plur.�post.��A�post.�pp.��A�pp.�prid.��A�prid.�pro.��A�pro.�procos.��A�procos.�q.��A�q.�quint.��A�quint.�r.��A�r.�s.��A�s.�s.c.��A�s.c.�scr.��A�scr.�sept.��A�sept.�septemb.��A�septemb.�ser.��A�ser.�sert.��A�sert.�sex.��A�sex.�sext.��A�sext.�st.��A�st.�sta.��A�sta.�suff.��A�suff.�t.��A�t.�tecum��A�te�A�cum�ti.��A�ti.�trib.��A�trib.�u.��A�u.�uobiscum��A�uobis�A�cum�uol.��A�uol.�uop.��A�uop.�uu.��A�uu.�v.��A�v.�v.v��A�v.v�v_v��A�v_v�vobiscum��A�vobis�A�cum�vol.��A�vol.�vop.��A�vop.�vv.��A�vv.�w.��A�w.�x.��A�x.�xD��A�xD�xDD��A�xDD�y.��A�y.�z.��A�z.� ��A� C� �¯\(ツ)/¯��A�¯\(ツ)/¯�°C.��A�°�A�C�A�.�°F.��A�°�A�F�A�.�°K.��A�°�A�K�A�.�°c.��A�°�A�c�A�.�°f.��A�°�A�f�A�.�°k.��A�°�A�k�A�.�ä.��A�ä.�ö.��A�ö.�ü.��A�ü.�ಠ_ಠ��A�ಠ_ಠ�ಠ︵ಠ��A�ಠ︵ಠ�—��A�—�’��A�’�’’��A�’’�faster_heuristics�
vocab/key2row ADDED
@@ -0,0 +1 @@
 
 
1
+
vocab/lookups.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76be8b528d0075f7aae98d6fa57a6d3c83ae480a8469e668d7b0af968995ac71
3
+ size 1
vocab/strings.json ADDED
@@ -0,0 +1,1032 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "\t",
3
+ "\n",
4
+ " ",
5
+ " ",
6
+ "\"",
7
+ "'",
8
+ "''",
9
+ "'-(",
10
+ "'-)",
11
+ "(",
12
+ "(((",
13
+ "(*>",
14
+ "(*_*)",
15
+ "(-8",
16
+ "(-:",
17
+ "(-;",
18
+ "(-_-)",
19
+ "(-d",
20
+ "(._.)",
21
+ "(:",
22
+ "(;",
23
+ "(=",
24
+ "(>_<)",
25
+ "(^_^)",
26
+ "(o:",
27
+ "(x:",
28
+ "(x_x)",
29
+ "(\u00ac_\u00ac)",
30
+ "(\u0ca0_\u0ca0)",
31
+ "(\u256f\u00b0\u25a1\u00b0\uff09\u256f\ufe35\u253b\u2501\u253b",
32
+ ")",
33
+ ")))",
34
+ ")-:",
35
+ ")/\u00af",
36
+ "):",
37
+ "*",
38
+ "-",
39
+ "-((",
40
+ "-))",
41
+ "-/",
42
+ "-0",
43
+ "-3",
44
+ "-8",
45
+ "-D",
46
+ "-O",
47
+ "-P",
48
+ "-X",
49
+ "-_-",
50
+ "-__-",
51
+ "-d",
52
+ "-o",
53
+ "-p",
54
+ "-x",
55
+ "-|",
56
+ ".",
57
+ ".C.",
58
+ ".D.",
59
+ ".N.",
60
+ "._.",
61
+ ".c.",
62
+ ".d.",
63
+ ".n.",
64
+ "/",
65
+ "/3",
66
+ "/d",
67
+ "0",
68
+ "0.0",
69
+ "0.o",
70
+ "0_0",
71
+ "0_o",
72
+ "1",
73
+ "3",
74
+ "33",
75
+ "333",
76
+ "8",
77
+ "8)",
78
+ "8-",
79
+ "8-)",
80
+ "8-D",
81
+ "8-d",
82
+ "8D",
83
+ "8d",
84
+ ":",
85
+ ":'(",
86
+ ":')",
87
+ ":'-(",
88
+ ":'-)",
89
+ ":(",
90
+ ":((",
91
+ ":(((",
92
+ ":()",
93
+ ":)",
94
+ ":))",
95
+ ":)))",
96
+ ":*",
97
+ ":-(",
98
+ ":-((",
99
+ ":-(((",
100
+ ":-)",
101
+ ":-))",
102
+ ":-)))",
103
+ ":-*",
104
+ ":-/",
105
+ ":-0",
106
+ ":-3",
107
+ ":->",
108
+ ":-D",
109
+ ":-O",
110
+ ":-P",
111
+ ":-X",
112
+ ":-]",
113
+ ":-d",
114
+ ":-o",
115
+ ":-p",
116
+ ":-x",
117
+ ":-|",
118
+ ":-}",
119
+ ":/",
120
+ ":0",
121
+ ":1",
122
+ ":3",
123
+ ":>",
124
+ ":D",
125
+ ":O",
126
+ ":P",
127
+ ":X",
128
+ ":]",
129
+ ":d",
130
+ ":o",
131
+ ":o)",
132
+ ":p",
133
+ ":x",
134
+ ":x)",
135
+ ":|",
136
+ ":}",
137
+ ":\u2019(",
138
+ ":\u2019)",
139
+ ":\u2019-(",
140
+ ":\u2019-)",
141
+ ";",
142
+ ";)",
143
+ ";-)",
144
+ ";-D",
145
+ ";-X",
146
+ ";-d",
147
+ ";D",
148
+ ";X",
149
+ ";_;",
150
+ ";d",
151
+ "<",
152
+ "<.<",
153
+ "</3",
154
+ "</d",
155
+ "<3",
156
+ "<33",
157
+ "<333",
158
+ "<d",
159
+ "<dd",
160
+ "<ddd",
161
+ "<space>",
162
+ "<xxxx>",
163
+ "=",
164
+ "=(",
165
+ "=)",
166
+ "=/",
167
+ "=3",
168
+ "=D",
169
+ "=X",
170
+ "=[",
171
+ "=]",
172
+ "=d",
173
+ "=|",
174
+ ">",
175
+ ">.<",
176
+ ">.>",
177
+ ">:(",
178
+ ">:o",
179
+ ">:x",
180
+ "><(((*>",
181
+ "@",
182
+ "@_@",
183
+ "A",
184
+ "A.",
185
+ "A.D.",
186
+ "A.U.C.",
187
+ "AA",
188
+ "AA.",
189
+ "AAA",
190
+ "AAA.",
191
+ "ACC",
192
+ "ACC.",
193
+ "AES",
194
+ "AGR",
195
+ "AGR.",
196
+ "AI.",
197
+ "AL.",
198
+ "AM.",
199
+ "AN.",
200
+ "AP",
201
+ "AP.",
202
+ "APR",
203
+ "APR.",
204
+ "APRIL",
205
+ "APRIL.",
206
+ "AR.",
207
+ "ART",
208
+ "AT.",
209
+ "AUG",
210
+ "AUG.",
211
+ "AUL",
212
+ "Aa",
213
+ "Aa.",
214
+ "Aaa",
215
+ "Aaa.",
216
+ "Acc",
217
+ "Acc.",
218
+ "Agr",
219
+ "Agr.",
220
+ "Ap",
221
+ "Ap.",
222
+ "Apr",
223
+ "Apr.",
224
+ "April",
225
+ "April.",
226
+ "Aug",
227
+ "Aug.",
228
+ "BR.",
229
+ "C",
230
+ "C++",
231
+ "C.",
232
+ "CAES",
233
+ "CAES.",
234
+ "CAESS",
235
+ "CAESS.",
236
+ "CC",
237
+ "CC.",
238
+ "CN",
239
+ "CN.",
240
+ "COLL",
241
+ "COLL.",
242
+ "CONS",
243
+ "CONS.",
244
+ "CONSS",
245
+ "CONSS.",
246
+ "COS",
247
+ "COS.",
248
+ "COSS",
249
+ "COSS.",
250
+ "CR.",
251
+ "CT.",
252
+ "Caes",
253
+ "Caes.",
254
+ "Caess",
255
+ "Caess.",
256
+ "Cc",
257
+ "Cc.",
258
+ "Cn",
259
+ "Cn.",
260
+ "Coll",
261
+ "Coll.",
262
+ "Cons",
263
+ "Cons.",
264
+ "Conss",
265
+ "Conss.",
266
+ "Cos",
267
+ "Cos.",
268
+ "Coss",
269
+ "Coss.",
270
+ "D",
271
+ "D.",
272
+ "D.N.",
273
+ "DAT",
274
+ "DAT.",
275
+ "DD",
276
+ "DD.",
277
+ "DEC",
278
+ "DEC.",
279
+ "DECEMB",
280
+ "DECEMB.",
281
+ "DECEMBR",
282
+ "DECEMBR.",
283
+ "Dat",
284
+ "Dat.",
285
+ "Dd",
286
+ "Dd.",
287
+ "Dec",
288
+ "Dec.",
289
+ "Decemb",
290
+ "Decemb.",
291
+ "Decembr",
292
+ "Decembr.",
293
+ "EB.",
294
+ "EBR",
295
+ "EC.",
296
+ "ED.",
297
+ "EMB",
298
+ "EPT",
299
+ "ER.",
300
+ "ERT",
301
+ "ES.",
302
+ "ESS",
303
+ "ET.",
304
+ "EX.",
305
+ "EXT",
306
+ "F",
307
+ "F.",
308
+ "FEB",
309
+ "FEB.",
310
+ "FEBR",
311
+ "FEBR.",
312
+ "FEBRUAR",
313
+ "FEBRUAR.",
314
+ "FF.",
315
+ "Feb",
316
+ "Feb.",
317
+ "Febr",
318
+ "Febr.",
319
+ "Februar",
320
+ "Februar.",
321
+ "GR.",
322
+ "I",
323
+ "IAN",
324
+ "IAN.",
325
+ "IB.",
326
+ "ID.",
327
+ "IL.",
328
+ "IMP",
329
+ "IMP.",
330
+ "IMPP",
331
+ "IMPP.",
332
+ "IMPPP",
333
+ "IMPPP.",
334
+ "INT",
335
+ "IUL",
336
+ "IUL.",
337
+ "IUN",
338
+ "IUN.",
339
+ "Ian",
340
+ "Ian.",
341
+ "Id",
342
+ "Id.",
343
+ "Imp",
344
+ "Imp.",
345
+ "Impp",
346
+ "Impp.",
347
+ "Imppp",
348
+ "Imppp.",
349
+ "Iul",
350
+ "Iul.",
351
+ "Iun",
352
+ "Iun.",
353
+ "K",
354
+ "K.",
355
+ "KAL",
356
+ "KAL.",
357
+ "Kal",
358
+ "Kal.",
359
+ "L",
360
+ "L.",
361
+ "LL.",
362
+ "LUR",
363
+ "M",
364
+ "M'.",
365
+ "M.",
366
+ "MAI",
367
+ "MAI.",
368
+ "MAM",
369
+ "MAM.",
370
+ "MAR",
371
+ "MAR.",
372
+ "MART",
373
+ "MART.",
374
+ "MB.",
375
+ "MBR",
376
+ "MED",
377
+ "MED.",
378
+ "MP.",
379
+ "MPP",
380
+ "Mai",
381
+ "Mai.",
382
+ "Mam",
383
+ "Mam.",
384
+ "Mar",
385
+ "Mar.",
386
+ "Mart",
387
+ "Mart.",
388
+ "Med",
389
+ "Med.",
390
+ "M\u2019.",
391
+ "N",
392
+ "N.",
393
+ "NN",
394
+ "NN.",
395
+ "NOB",
396
+ "NOB.",
397
+ "NON",
398
+ "NON.",
399
+ "NOU",
400
+ "NOU.",
401
+ "NOUEMB",
402
+ "NOUEMB.",
403
+ "NOV",
404
+ "NOV.",
405
+ "NOVEMB",
406
+ "NOVEMB.",
407
+ "NS.",
408
+ "NSS",
409
+ "NT.",
410
+ "Nn",
411
+ "Nn.",
412
+ "Nob",
413
+ "Nob.",
414
+ "Non",
415
+ "Non.",
416
+ "Nou",
417
+ "Nou.",
418
+ "Nouemb",
419
+ "Nouemb.",
420
+ "Nov",
421
+ "Nov.",
422
+ "Novemb",
423
+ "Novemb.",
424
+ "O",
425
+ "O.O",
426
+ "O.o",
427
+ "OB.",
428
+ "OCT",
429
+ "OCT.",
430
+ "OCTOB",
431
+ "OCTOB.",
432
+ "OL.",
433
+ "OLL",
434
+ "ON.",
435
+ "ONS",
436
+ "OP.",
437
+ "OPET",
438
+ "OPET.",
439
+ "ORD",
440
+ "ORD.",
441
+ "OS.",
442
+ "OSS",
443
+ "OST",
444
+ "OU.",
445
+ "OV.",
446
+ "O_O",
447
+ "O_o",
448
+ "Oct",
449
+ "Oct.",
450
+ "Octob",
451
+ "Octob.",
452
+ "Opet",
453
+ "Opet.",
454
+ "Ord",
455
+ "Ord.",
456
+ "P",
457
+ "P.",
458
+ "PAUL",
459
+ "PAUL.",
460
+ "PET",
461
+ "PF",
462
+ "PF.",
463
+ "PL",
464
+ "PL.",
465
+ "PLUR",
466
+ "PLUR.",
467
+ "POST",
468
+ "POST.",
469
+ "PP",
470
+ "PP.",
471
+ "PPP",
472
+ "PR.",
473
+ "PRID",
474
+ "PRID.",
475
+ "PRO",
476
+ "PRO.",
477
+ "PROCOS",
478
+ "PROCOS.",
479
+ "PT.",
480
+ "Paul",
481
+ "Paul.",
482
+ "Pf",
483
+ "Pf.",
484
+ "Pl",
485
+ "Pl.",
486
+ "Plur",
487
+ "Plur.",
488
+ "Post",
489
+ "Post.",
490
+ "Pp",
491
+ "Pp.",
492
+ "Prid",
493
+ "Prid.",
494
+ "Pro",
495
+ "Pro.",
496
+ "Procos",
497
+ "Procos.",
498
+ "Q",
499
+ "Q.",
500
+ "QUINT",
501
+ "QUINT.",
502
+ "Quint",
503
+ "Quint.",
504
+ "RD.",
505
+ "RIB",
506
+ "RID",
507
+ "RIL",
508
+ "RO.",
509
+ "RT.",
510
+ "S",
511
+ "S.",
512
+ "S.C.",
513
+ "SCR",
514
+ "SCR.",
515
+ "SEPT",
516
+ "SEPT.",
517
+ "SEPTEMB",
518
+ "SEPTEMB.",
519
+ "SER",
520
+ "SER.",
521
+ "SERT",
522
+ "SERT.",
523
+ "SEX",
524
+ "SEX.",
525
+ "SEXT",
526
+ "SEXT.",
527
+ "SS.",
528
+ "ST",
529
+ "ST.",
530
+ "STA",
531
+ "STA.",
532
+ "SUFF",
533
+ "SUFF.",
534
+ "Scr",
535
+ "Scr.",
536
+ "Sept",
537
+ "Sept.",
538
+ "Septemb",
539
+ "Septemb.",
540
+ "Ser",
541
+ "Ser.",
542
+ "Sert",
543
+ "Sert.",
544
+ "Sex",
545
+ "Sex.",
546
+ "Sext",
547
+ "Sext.",
548
+ "St",
549
+ "St.",
550
+ "Sta",
551
+ "Sta.",
552
+ "Suff",
553
+ "Suff.",
554
+ "T",
555
+ "T.",
556
+ "TA.",
557
+ "TI",
558
+ "TI.",
559
+ "TOB",
560
+ "TRIB",
561
+ "TRIB.",
562
+ "Ti",
563
+ "Ti.",
564
+ "Trib",
565
+ "Trib.",
566
+ "U",
567
+ "U.",
568
+ "UAR",
569
+ "UFF",
570
+ "UG.",
571
+ "UL.",
572
+ "UN.",
573
+ "UOL",
574
+ "UOL.",
575
+ "UOP",
576
+ "UOP.",
577
+ "UR.",
578
+ "UU",
579
+ "UU.",
580
+ "Uol",
581
+ "Uol.",
582
+ "Uop",
583
+ "Uop.",
584
+ "Uu",
585
+ "Uu.",
586
+ "V",
587
+ "V.",
588
+ "V.V",
589
+ "VOL",
590
+ "VOL.",
591
+ "VOP",
592
+ "VOP.",
593
+ "VV",
594
+ "VV.",
595
+ "V_V",
596
+ "Vol",
597
+ "Vol.",
598
+ "Vop",
599
+ "Vop.",
600
+ "Vv",
601
+ "Vv.",
602
+ "X'.",
603
+ "X++",
604
+ "X.",
605
+ "X.X",
606
+ "X.X.",
607
+ "X.X.X.",
608
+ "X.x",
609
+ "XD",
610
+ "XDD",
611
+ "XT.",
612
+ "XX",
613
+ "XX.",
614
+ "XXX",
615
+ "XXX.",
616
+ "XXXX",
617
+ "XXXX.",
618
+ "X_X",
619
+ "X_x",
620
+ "Xx",
621
+ "Xx.",
622
+ "Xxx",
623
+ "Xxx.",
624
+ "Xxxx",
625
+ "Xxxx.",
626
+ "Xxxxx",
627
+ "Xxxxx.",
628
+ "X\u2019.",
629
+ "[",
630
+ "[-:",
631
+ "[:",
632
+ "[=",
633
+ "\\",
634
+ "\\\")",
635
+ "\\n",
636
+ "\\t",
637
+ "\\x",
638
+ "]",
639
+ "]=",
640
+ "^",
641
+ "^_^",
642
+ "^__^",
643
+ "^___^",
644
+ "_*)",
645
+ "_-)",
646
+ "_.)",
647
+ "_<)",
648
+ "_^)",
649
+ "__-",
650
+ "__^",
651
+ "_\u00ac)",
652
+ "_\u0ca0)",
653
+ "a",
654
+ "a.",
655
+ "a.d",
656
+ "a.d.",
657
+ "a.u.c",
658
+ "a.u.c.",
659
+ "aa",
660
+ "aa.",
661
+ "aaa",
662
+ "aaa.",
663
+ "acc",
664
+ "acc.",
665
+ "ace",
666
+ "aes",
667
+ "agr",
668
+ "agr.",
669
+ "ai.",
670
+ "al.",
671
+ "am.",
672
+ "an.",
673
+ "ap",
674
+ "ap.",
675
+ "apr",
676
+ "apr.",
677
+ "april",
678
+ "april.",
679
+ "ar.",
680
+ "art",
681
+ "at.",
682
+ "aug",
683
+ "aug.",
684
+ "aul",
685
+ "b",
686
+ "b.",
687
+ "bis",
688
+ "br.",
689
+ "c",
690
+ "c++",
691
+ "c.",
692
+ "caes",
693
+ "caes.",
694
+ "caess",
695
+ "caess.",
696
+ "cc.",
697
+ "ce>",
698
+ "cn",
699
+ "cn.",
700
+ "coll",
701
+ "coll.",
702
+ "cons",
703
+ "cons.",
704
+ "conss",
705
+ "conss.",
706
+ "cos",
707
+ "cos.",
708
+ "coss",
709
+ "coss.",
710
+ "cr.",
711
+ "ct.",
712
+ "cum",
713
+ "d",
714
+ "d)",
715
+ "d-",
716
+ "d-)",
717
+ "d-X",
718
+ "d.",
719
+ "d.N.",
720
+ "d.d",
721
+ "d.n",
722
+ "d.n.",
723
+ "d.x",
724
+ "dX",
725
+ "d_d",
726
+ "d_x",
727
+ "dat",
728
+ "dat.",
729
+ "dd",
730
+ "dd.",
731
+ "ddd",
732
+ "dec",
733
+ "dec.",
734
+ "decemb",
735
+ "decemb.",
736
+ "decembr",
737
+ "decembr.",
738
+ "e",
739
+ "e.",
740
+ "eb.",
741
+ "ebr",
742
+ "ec.",
743
+ "ed.",
744
+ "emb",
745
+ "ept",
746
+ "er.",
747
+ "ert",
748
+ "es.",
749
+ "ess",
750
+ "et.",
751
+ "ex.",
752
+ "ext",
753
+ "f",
754
+ "f.",
755
+ "feb",
756
+ "feb.",
757
+ "febr",
758
+ "febr.",
759
+ "februar",
760
+ "februar.",
761
+ "ff.",
762
+ "g",
763
+ "g.",
764
+ "gr.",
765
+ "h",
766
+ "h.",
767
+ "i",
768
+ "i.",
769
+ "ian",
770
+ "ian.",
771
+ "ib.",
772
+ "id",
773
+ "id.",
774
+ "il.",
775
+ "imp",
776
+ "imp.",
777
+ "impp",
778
+ "impp.",
779
+ "imppp",
780
+ "imppp.",
781
+ "int",
782
+ "iul",
783
+ "iul.",
784
+ "iun",
785
+ "iun.",
786
+ "j",
787
+ "j.",
788
+ "k",
789
+ "k.",
790
+ "kal",
791
+ "kal.",
792
+ "l",
793
+ "l.",
794
+ "la",
795
+ "ll.",
796
+ "lur",
797
+ "m",
798
+ "m'.",
799
+ "m.",
800
+ "mai",
801
+ "mai.",
802
+ "mam",
803
+ "mam.",
804
+ "mar",
805
+ "mar.",
806
+ "mart",
807
+ "mart.",
808
+ "mb.",
809
+ "mbr",
810
+ "me",
811
+ "med",
812
+ "med.",
813
+ "mp.",
814
+ "mpp",
815
+ "m\u2019.",
816
+ "n",
817
+ "n.",
818
+ "nn.",
819
+ "nob",
820
+ "nob.",
821
+ "nobis",
822
+ "non",
823
+ "non.",
824
+ "nou",
825
+ "nou.",
826
+ "nouemb",
827
+ "nouemb.",
828
+ "nov",
829
+ "nov.",
830
+ "novemb",
831
+ "novemb.",
832
+ "ns.",
833
+ "nss",
834
+ "nt.",
835
+ "o",
836
+ "o.",
837
+ "o.0",
838
+ "o.O",
839
+ "o.o",
840
+ "o_0",
841
+ "o_O",
842
+ "o_o",
843
+ "ob.",
844
+ "oct",
845
+ "oct.",
846
+ "octob",
847
+ "octob.",
848
+ "ol.",
849
+ "oll",
850
+ "on.",
851
+ "ons",
852
+ "op.",
853
+ "opet",
854
+ "opet.",
855
+ "ord",
856
+ "ord.",
857
+ "os.",
858
+ "oss",
859
+ "ost",
860
+ "ou.",
861
+ "ov.",
862
+ "p",
863
+ "p.",
864
+ "paul",
865
+ "paul.",
866
+ "pet",
867
+ "pf",
868
+ "pf.",
869
+ "pl",
870
+ "pl.",
871
+ "plur",
872
+ "plur.",
873
+ "post",
874
+ "post.",
875
+ "pp",
876
+ "pp.",
877
+ "ppp",
878
+ "pr.",
879
+ "prid",
880
+ "prid.",
881
+ "pro",
882
+ "pro.",
883
+ "procos",
884
+ "procos.",
885
+ "pt.",
886
+ "q",
887
+ "q.",
888
+ "quint",
889
+ "quint.",
890
+ "r",
891
+ "r.",
892
+ "rd.",
893
+ "rib",
894
+ "rid",
895
+ "ril",
896
+ "ro.",
897
+ "rt.",
898
+ "s",
899
+ "s.",
900
+ "s.c",
901
+ "s.c.",
902
+ "scr",
903
+ "scr.",
904
+ "sept",
905
+ "sept.",
906
+ "septemb",
907
+ "septemb.",
908
+ "ser",
909
+ "ser.",
910
+ "sert",
911
+ "sert.",
912
+ "sex",
913
+ "sex.",
914
+ "sext",
915
+ "sext.",
916
+ "space",
917
+ "ss.",
918
+ "st",
919
+ "st.",
920
+ "sta",
921
+ "sta.",
922
+ "suff",
923
+ "suff.",
924
+ "t",
925
+ "t.",
926
+ "ta.",
927
+ "te",
928
+ "ti",
929
+ "ti.",
930
+ "tob",
931
+ "trib",
932
+ "trib.",
933
+ "u",
934
+ "u.",
935
+ "u.c",
936
+ "uar",
937
+ "uff",
938
+ "ug.",
939
+ "ul.",
940
+ "un.",
941
+ "uobis",
942
+ "uol",
943
+ "uol.",
944
+ "uop",
945
+ "uop.",
946
+ "ur.",
947
+ "uu",
948
+ "uu.",
949
+ "v",
950
+ "v.",
951
+ "v.v",
952
+ "v_v",
953
+ "vobis",
954
+ "vol",
955
+ "vol.",
956
+ "vop",
957
+ "vop.",
958
+ "vv",
959
+ "vv.",
960
+ "w",
961
+ "w.",
962
+ "x",
963
+ "x'.",
964
+ "x.",
965
+ "x.X",
966
+ "x.X.",
967
+ "x.d",
968
+ "x.x",
969
+ "x.x.",
970
+ "x.x.x",
971
+ "x.x.x.",
972
+ "xD",
973
+ "xDD",
974
+ "xX",
975
+ "xXX",
976
+ "x_X",
977
+ "x_d",
978
+ "x_x",
979
+ "xd",
980
+ "xdd",
981
+ "xt.",
982
+ "xx",
983
+ "xx.",
984
+ "xxx",
985
+ "xxx.",
986
+ "xxxx",
987
+ "xxxx.",
988
+ "x\u2019.",
989
+ "x\ufe35x",
990
+ "y",
991
+ "y.",
992
+ "z",
993
+ "z.",
994
+ "|",
995
+ "}",
996
+ "\u00a0",
997
+ "\u00ac",
998
+ "\u00ac_\u00ac",
999
+ "\u00af",
1000
+ "\u00af\\(x)/\u00af",
1001
+ "\u00af\\(\u30c4)/\u00af",
1002
+ "\u00b0",
1003
+ "\u00b0C.",
1004
+ "\u00b0F.",
1005
+ "\u00b0K.",
1006
+ "\u00b0X.",
1007
+ "\u00b0c.",
1008
+ "\u00b0f.",
1009
+ "\u00b0k.",
1010
+ "\u00b0x.",
1011
+ "\u00e4",
1012
+ "\u00e4.",
1013
+ "\u00f6",
1014
+ "\u00f6.",
1015
+ "\u00fc",
1016
+ "\u00fc.",
1017
+ "\u0ca0",
1018
+ "\u0ca0_\u0ca0",
1019
+ "\u0ca0\ufe35\u0ca0",
1020
+ "\u2014",
1021
+ "\u2019",
1022
+ "\u2019-(",
1023
+ "\u2019-)",
1024
+ "\u2019\u2019",
1025
+ "\u2501",
1026
+ "\u253b",
1027
+ "\u253b\u2501\u253b",
1028
+ "\u256f",
1029
+ "\u25a1",
1030
+ "\ufe35",
1031
+ "\uff09"
1032
+ ]
vocab/vectors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00b9e7d75775781430de3b558b9ec4263bd6b45f9b77dc45f16aea5541866b65
3
+ size 60000128
vocab/vectors.cfg ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mode":"floret",
3
+ "minn":3,
4
+ "maxn":6,
5
+ "hash_count":2,
6
+ "hash_seed":2166136261,
7
+ "bow":"<",
8
+ "eow":">",
9
+ "attr":65
10
+ }