conan1024hao
commited on
Commit
•
bbf4b6d
1
Parent(s):
9b0b4ac
support sentencepiece tokenizer
Browse files- tokenizer_config.json +1 -1
- vocab.txt +0 -0
tokenizer_config.json
CHANGED
@@ -20,6 +20,6 @@
|
|
20 |
"special_tokens_map_file": null,
|
21 |
"tokenizer_class": "BertJapaneseTokenizer",
|
22 |
"word_tokenizer_type": "jumanpp",
|
23 |
-
"subword_tokenizer_type": "
|
24 |
"jumanpp_kwargs": {}
|
25 |
}
|
|
|
20 |
"special_tokens_map_file": null,
|
21 |
"tokenizer_class": "BertJapaneseTokenizer",
|
22 |
"word_tokenizer_type": "jumanpp",
|
23 |
+
"subword_tokenizer_type": "sentencepiece",
|
24 |
"jumanpp_kwargs": {}
|
25 |
}
|
vocab.txt
DELETED
The diff for this file is too large to render.
See raw diff
|
|