mathis escriva commited on
Commit
a0cbb13
verified
1 Parent(s): 7d7305d

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -4,7 +4,7 @@
4
  "<|im_end|>"
5
  ],
6
  "eos_token": {
7
- "content": "<|endoftext|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
 
4
  "<|im_end|>"
5
  ],
6
  "eos_token": {
7
+ "content": "<|im_end|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab5732ed79313d0aea2e3f4b59b86e16d101b99533e3998ab2ea0f94d6ddf564
3
- size 11418364
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e46299e92a327fd4b3b6563245a4546e461f1869a12dc6b7827cfc7e39cd4bf
3
+ size 11418532
tokenizer_config.json CHANGED
@@ -33,11 +33,18 @@
33
  "bos_token": null,
34
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
- "eos_token": "<|endoftext|>",
37
  "errors": "replace",
 
38
  "model_max_length": 32768,
 
39
  "pad_token": "<|endoftext|>",
 
 
40
  "split_special_tokens": false,
 
41
  "tokenizer_class": "Qwen2Tokenizer",
 
 
42
  "unk_token": null
43
  }
 
33
  "bos_token": null,
34
  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
  "errors": "replace",
38
+ "max_length": 512,
39
  "model_max_length": 32768,
40
+ "pad_to_multiple_of": null,
41
  "pad_token": "<|endoftext|>",
42
+ "pad_token_type_id": 0,
43
+ "padding_side": "right",
44
  "split_special_tokens": false,
45
+ "stride": 0,
46
  "tokenizer_class": "Qwen2Tokenizer",
47
+ "truncation_side": "right",
48
+ "truncation_strategy": "longest_first",
49
  "unk_token": null
50
  }