goldfish-models
commited on
Commit
•
5afad59
1
Parent(s):
339ffbe
Upload nzi_latn_5mb tokenizer.
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +1 -0
- spiece.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"[XXXXX21]": 20455, "[XXXXX39]": 20473, "[XXXXX45]": 20479, "[XXXXX31]": 20465, "[XXXXX22]": 20456, "<pad>": 20432, "[XXXXX0]": 20434, "[XXXXX13]": 20447, "[XXXXX18]": 20452, "[XXXXX38]": 20472, "[XXXXX41]": 20475, "[XXXXX24]": 20458, "[XXXXX17]": 20451, "[SEP]": 20431, "[XXXXX33]": 20467, "[XXXXX14]": 20448, "[XXXXX43]": 20477, "[XXXXX16]": 20450, "[XXXXX10]": 20444, "[XXXXX44]": 20478, "[XXXXX40]": 20474, "[XXXXX9]": 20443, "[MASK]": 20433, "[XXXXX6]": 20440, "[XXXXX42]": 20476, "[XXXXX30]": 20464, "[XXXXX8]": 20442, "[XXXXX3]": 20437, "[XXXXX28]": 20462, "[XXXXX7]": 20441, "[XXXXX5]": 20439, "[XXXXX2]": 20436, "[XXXXX12]": 20446, "[XXXXX26]": 20460, "[XXXXX29]": 20463, "[XXXXX11]": 20445, "[XXXXX27]": 20461, "[XXXXX35]": 20469, "[XXXXX34]": 20468, "[XXXXX19]": 20453, "[XXXXX15]": 20449, "[XXXXX23]": 20457, "[XXXXX36]": 20470, "[XXXXX1]": 20435, "[CLS]": 20430, "[XXXXX4]": 20438, "[XXXXX25]": 20459, "[XXXXX32]": 20466, "[XXXXX20]": 20454, "[XXXXX37]": 20471}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}, "additional_special_tokens": ["[XXXXX0]", "[XXXXX1]", "[XXXXX2]", "[XXXXX3]", "[XXXXX4]", "[XXXXX5]", "[XXXXX6]", "[XXXXX7]", "[XXXXX8]", "[XXXXX9]", "[XXXXX10]", "[XXXXX11]", "[XXXXX12]", "[XXXXX13]", "[XXXXX14]", "[XXXXX15]", "[XXXXX16]", "[XXXXX17]", "[XXXXX18]", "[XXXXX19]", "[XXXXX20]", "[XXXXX21]", "[XXXXX22]", "[XXXXX23]", "[XXXXX24]", "[XXXXX25]", "[XXXXX26]", "[XXXXX27]", "[XXXXX28]", "[XXXXX29]", "[XXXXX30]", "[XXXXX31]", "[XXXXX32]", "[XXXXX33]", "[XXXXX34]", "[XXXXX35]", "[XXXXX36]", "[XXXXX37]", "[XXXXX38]", "[XXXXX39]", "[XXXXX40]", "[XXXXX41]", "[XXXXX42]", "[XXXXX43]", "[XXXXX44]", "[XXXXX45]"]}
|
spiece.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:82f0b3c74356ba96f472480f5a9edd5b84c9ccdd2f1a7dd2963ec19207bb9597
|
3 |
+
size 574745
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "__type": "AddedToken"}, "sp_model_kwargs": {}, "name_or_path": "models/5mb/nzi_latn_5mb", "model_input_names": ["input_ids", "attention_mask"], "special_tokens_map_file": "models/5mb/nzi_latn_5mb/special_tokens_map.json", "tokenizer_class": "AlbertTokenizer"}
|