1205Ppdoor-eagle-3b-preview-new_iio / tokenizer_config.json
hoonikoo's picture
(Trained with Unsloth)
b860826 verified
raw
history blame
4.82 kB
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": true,
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "<pad>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"5": {
"content": "<|mask|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "<|system|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"7": {
"content": "<|user|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"8": {
"content": "<|assistant|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"9": {
"content": "<|ipython|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"10": {
"content": "<|endofmessage|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"11": {
"content": "<|python_call|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"12": {
"content": "<|reserved_6|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"13": {
"content": "<|reserved_7|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"14": {
"content": "<|reserved_8|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"15": {
"content": "<|reserved_9|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<s>",
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% if not date_string is defined %}{% if strftime_now is defined %}{% set date_string = strftime_now(\"%Y년 %m월 %d일, %H시 %M분\") %}{% else %}{% set date_string = \"2024년 10월 19일, 12시 49분\" %}{% endif %}{% endif %}{{ '<|system|>\n저는 ETRI 언어지능연구실에서 개발한 eGPT 언어모델입니다. eGPT 언어모델은 70억 미만의 모델에서 복잡한 일을 처리할 수 있도록 개발 중입니다.\n\n지식 중단 일자: 이 모델은 2022년 12월 이전의 지식으로만 학습되었습니다.\n현재 날짜 및 시간: ' + date_string + '<|endoftext|>' if messages and messages[0]['role'] != 'system' }}{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|endoftext|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|endoftext|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|endoftext|>\n'}}{% elif message['role'] == 'ipython' or message['role'] == 'tool' %}{{'<|ipython|>\n' + message['content'] + '<|endoftext|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
"clean_up_tokenization_spaces": false,
"endoftext_token": {
"__type": "AddedToken",
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"eos_token": "</s>",
"legacy": false,
"mask_token": "<|mask|>",
"model_max_length": 16384,
"pad_token": "<pad>",
"padding_side": "left",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}