DuongTrongChi commited on
Commit
ff35fd8
1 Parent(s): 04d1a57

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +19 -14
  2. tokenizer.json +1 -0
  3. tokenizer_config.json +7 -4
special_tokens_map.json CHANGED
@@ -1,16 +1,21 @@
1
  {
2
- "eos_token": {
3
- "content": "<|endoftext|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "pad_token": {
10
- "content": "<|endoftext|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- }
 
 
 
 
 
16
  }
 
1
  {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
20
+ "pad_token": "<|im_end|>"
21
  }
tokenizer.json CHANGED
@@ -73,6 +73,7 @@
73
  "end_of_word_suffix": "",
74
  "fuse_unk": false,
75
  "byte_fallback": false,
 
76
  "vocab": {
77
  "!": 0,
78
  "\"": 1,
 
73
  "end_of_word_suffix": "",
74
  "fuse_unk": false,
75
  "byte_fallback": false,
76
+ "ignore_merges": false,
77
  "vocab": {
78
  "!": 0,
79
  "\"": 1,
tokenizer_config.json CHANGED
@@ -26,14 +26,17 @@
26
  "special": true
27
  }
28
  },
29
- "additional_special_tokens": [],
30
- "bos_token": null,
 
 
 
31
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
32
  "clean_up_tokenization_spaces": false,
33
- "eos_token": "<|endoftext|>",
34
  "errors": "replace",
35
  "model_max_length": 32768,
36
- "pad_token": "<|endoftext|>",
37
  "split_special_tokens": false,
38
  "tokenizer_class": "Qwen2Tokenizer",
39
  "unk_token": null
 
26
  "special": true
27
  }
28
  },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": "<|im_start|>",
34
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
  "errors": "replace",
38
  "model_max_length": 32768,
39
+ "pad_token": "<|im_end|>",
40
  "split_special_tokens": false,
41
  "tokenizer_class": "Qwen2Tokenizer",
42
  "unk_token": null