livinNector
commited on
Commit
•
347dec7
1
Parent(s):
13bb0e3
Upload tokenizer
Browse files- tokenizer.json +8 -2
- tokenizer_config.json +1 -1
tokenizer.json
CHANGED
@@ -265,7 +265,9 @@
|
|
265 |
"special": true
|
266 |
}
|
267 |
],
|
268 |
-
"normalizer":
|
|
|
|
|
269 |
"pre_tokenizer": {
|
270 |
"type": "Whitespace"
|
271 |
},
|
@@ -344,7 +346,11 @@
|
|
344 |
}
|
345 |
}
|
346 |
},
|
347 |
-
"decoder":
|
|
|
|
|
|
|
|
|
348 |
"model": {
|
349 |
"type": "WordPiece",
|
350 |
"unk_token": "[UNK]",
|
|
|
265 |
"special": true
|
266 |
}
|
267 |
],
|
268 |
+
"normalizer": {
|
269 |
+
"type": "NFKD"
|
270 |
+
},
|
271 |
"pre_tokenizer": {
|
272 |
"type": "Whitespace"
|
273 |
},
|
|
|
346 |
}
|
347 |
}
|
348 |
},
|
349 |
+
"decoder": {
|
350 |
+
"type": "WordPiece",
|
351 |
+
"prefix": "##",
|
352 |
+
"cleanup": true
|
353 |
+
},
|
354 |
"model": {
|
355 |
"type": "WordPiece",
|
356 |
"unk_token": "[UNK]",
|
tokenizer_config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
"clean_up_tokenization_spaces": true,
|
3 |
-
"model_max_length":
|
4 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
5 |
}
|
|
|
1 |
{
|
2 |
"clean_up_tokenization_spaces": true,
|
3 |
+
"model_max_length": 1000000000000000019884624838656,
|
4 |
"tokenizer_class": "PreTrainedTokenizerFast"
|
5 |
}
|