xinghaow
/

zephyr-7b-dpo-full

Feature Extraction

Model card Files Files and versions Metrics Training metrics Community

zephyr-7b-dpo-full / tokenizer_config.json

xinghaow's picture

Training in progress, step 1000

0b514ab verified 4 months ago

4.36 kB

	{
	"added_tokens_decoder": {
	"0": {
	"content": "<unk>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"1": {
	"content": "<s>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"2": {
	"content": "</s>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137713": {
	"content": "<TOKENS_UNUSED_1>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137714": {
	"content": "<TOKENS_UNUSED_2>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137715": {
	"content": "<PAD__1>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137716": {
	"content": "<PAD__2>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137717": {
	"content": "<PAD__3>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137718": {
	"content": "<PAD__4>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137719": {
	"content": "<PAD__5>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137720": {
	"content": "<PAD__6>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137721": {
	"content": "<PAD__7>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137722": {
	"content": "<PAD__8>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137723": {
	"content": "<PAD__9>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137724": {
	"content": "<PAD__10>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137725": {
	"content": "<PAD__11>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137726": {
	"content": "<PAD__12>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	},
	"137727": {
	"content": "<PAD__13>",
	"lstrip": false,
	"normalized": false,
	"rstrip": false,
	"single_word": false,
	"special": true
	}
	},
	"additional_special_tokens": [
	"<TOKENS_UNUSED_1>",
	"<TOKENS_UNUSED_2>",
	"<PAD__1>",
	"<PAD__2>",
	"<PAD__3>",
	"<PAD__4>",
	"<PAD__5>",
	"<PAD__6>",
	"<PAD__7>",
	"<PAD__8>",
	"<PAD__9>",
	"<PAD__10>",
	"<PAD__11>",
	"<PAD__12>",
	"<PAD__13>"
	],
	"auto_map": {
	"AutoTokenizer": [
	"tokenization_moss2.Moss2Tokenizer",
	null
	]
	},
	"bos_token": "<s>",
	"chat_template": "{% set system_message = '<s>' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<TOKENS_UNUSED_1>' + content + '<TOKENS_UNUSED_2>' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
	"clean_up_tokenization_spaces": false,
	"eos_token": "</s>",
	"model_max_length": 1000000000000000019884624838656,
	"pad_token": "</s>",
	"padding_side": "right",
	"split_special_tokens": false,
	"tokenizer_class": "Moss2Tokenizer",
	"unk_token": "<unk>",
	"use_fast": false
	}