asude55
/

duyguanalizison25

Text Classification

Inference Endpoints

Model card Files Files and versions Community

duyguanalizison25 / tokenizer.json

asude55's picture

Create tokenizer.json

83a37d9 verified 7 months ago

2.07 kB

	{
	"version": "0.1",
	"truncation": {
	"max_length": 512,
	"stride": 0,
	"strategy": "longest_first"
	},
	"padding": {
	"strategy": "longest",
	"max_length": 512,
	"pad_to_multiple_of": null
	},
	"added_tokens": [
	{
	"id": 0,
	"content": "[PAD]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 1,
	"content": "[UNK]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 2,
	"content": "[CLS]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 3,
	"content": "[SEP]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	},
	{
	"id": 4,
	"content": "[MASK]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false,
	"special": true
	}
	],
	"normalizer": {
	"type": "BertNormalizer",
	"clean_text": true,
	"handle_chinese_chars": true,
	"strip_accents": null,
	"lowercase": false
	},
	"pre_tokenizer": {
	"type": "BertPreTokenizer"
	},
	"post_processor": {
	"type": "BertPostProcessor",
	"sep": {
	"type": "AddedToken",
	"content": "[SEP]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false
	},
	"cls": {
	"type": "AddedToken",
	"content": "[CLS]",
	"single_word": false,
	"lstrip": false,
	"rstrip": false,
	"normalized": false
	}
	},
	"decoder": {
	"type": "WordPiece",
	"cleanup": true
	},
	"model": {
	"type": "WordPiece",
	"unk_token": "[UNK]",
	"vocab": {
	"[PAD]": 0,
	"[UNK]": 1,
	"[CLS]": 2,
	"[SEP]": 3,
	"[MASK]": 4,
	"hello": 5,
	"world": 6
	},
	"max_input_chars_per_word": 100
	}
	}