Centrum-multinews / tokenizer_config.json
ratishsp
initial commit
8b0bf1c
raw
history blame
1.64 kB
{
"add_prefix_space": false,
"bos_token": {
"__type": "AddedToken",
"content": "<s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"cls_token": {
"__type": "AddedToken",
"content": "<s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"eos_token": {
"__type": "AddedToken",
"content": "</s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"errors": "replace",
"mask_token": {
"__type": "AddedToken",
"content": "<mask>",
"lstrip": true,
"normalized": true,
"rstrip": false,
"single_word": false
},
"model_max_length": 16384,
"name_or_path": "/home/hpcpudu1/rds/hpc-work/data/pretrain-mds/led_pretrain/ver2/gen_model/Centrum_pretrain_base_batch_16_21-7-22.1",
"pad_token": {
"__type": "AddedToken",
"content": "<pad>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"sep_token": {
"__type": "AddedToken",
"content": "</s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
},
"special_tokens_map_file": "/home/hpcpudu1/.cache/huggingface/transformers/05da652a7fca41c1c18027c1201e473217bb373e370d1283e3de49d5880cbf0c.cb2244924ab24d706b02fd7fcedaea4531566537687a539ebb94db511fd122a0",
"tokenizer_class": "LEDTokenizer",
"trim_offsets": true,
"unk_token": {
"__type": "AddedToken",
"content": "<unk>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false
}
}