Update tokenizer_config.json as Llama3

#2
Files changed (1) hide show
  1. tokenizer_config.json +1 -259
tokenizer_config.json CHANGED
@@ -2049,268 +2049,10 @@
2049
  "special": true
2050
  }
2051
  },
2052
- "additional_special_tokens": [
2053
- "<|begin_of_text|>",
2054
- "<|end_of_text|>",
2055
- "<|reserved_special_token_0|>",
2056
- "<|reserved_special_token_1|>",
2057
- "<|reserved_special_token_2|>",
2058
- "<|reserved_special_token_3|>",
2059
- "<|start_header_id|>",
2060
- "<|end_header_id|>",
2061
- "<|reserved_special_token_4|>",
2062
- "<|eot_id|>",
2063
- "<|reserved_special_token_5|>",
2064
- "<|reserved_special_token_6|>",
2065
- "<|reserved_special_token_7|>",
2066
- "<|reserved_special_token_8|>",
2067
- "<|reserved_special_token_9|>",
2068
- "<|reserved_special_token_10|>",
2069
- "<|reserved_special_token_11|>",
2070
- "<|reserved_special_token_12|>",
2071
- "<|reserved_special_token_13|>",
2072
- "<|reserved_special_token_14|>",
2073
- "<|reserved_special_token_15|>",
2074
- "<|reserved_special_token_16|>",
2075
- "<|reserved_special_token_17|>",
2076
- "<|reserved_special_token_18|>",
2077
- "<|reserved_special_token_19|>",
2078
- "<|reserved_special_token_20|>",
2079
- "<|reserved_special_token_21|>",
2080
- "<|reserved_special_token_22|>",
2081
- "<|reserved_special_token_23|>",
2082
- "<|reserved_special_token_24|>",
2083
- "<|reserved_special_token_25|>",
2084
- "<|reserved_special_token_26|>",
2085
- "<|reserved_special_token_27|>",
2086
- "<|reserved_special_token_28|>",
2087
- "<|reserved_special_token_29|>",
2088
- "<|reserved_special_token_30|>",
2089
- "<|reserved_special_token_31|>",
2090
- "<|reserved_special_token_32|>",
2091
- "<|reserved_special_token_33|>",
2092
- "<|reserved_special_token_34|>",
2093
- "<|reserved_special_token_35|>",
2094
- "<|reserved_special_token_36|>",
2095
- "<|reserved_special_token_37|>",
2096
- "<|reserved_special_token_38|>",
2097
- "<|reserved_special_token_39|>",
2098
- "<|reserved_special_token_40|>",
2099
- "<|reserved_special_token_41|>",
2100
- "<|reserved_special_token_42|>",
2101
- "<|reserved_special_token_43|>",
2102
- "<|reserved_special_token_44|>",
2103
- "<|reserved_special_token_45|>",
2104
- "<|reserved_special_token_46|>",
2105
- "<|reserved_special_token_47|>",
2106
- "<|reserved_special_token_48|>",
2107
- "<|reserved_special_token_49|>",
2108
- "<|reserved_special_token_50|>",
2109
- "<|reserved_special_token_51|>",
2110
- "<|reserved_special_token_52|>",
2111
- "<|reserved_special_token_53|>",
2112
- "<|reserved_special_token_54|>",
2113
- "<|reserved_special_token_55|>",
2114
- "<|reserved_special_token_56|>",
2115
- "<|reserved_special_token_57|>",
2116
- "<|reserved_special_token_58|>",
2117
- "<|reserved_special_token_59|>",
2118
- "<|reserved_special_token_60|>",
2119
- "<|reserved_special_token_61|>",
2120
- "<|reserved_special_token_62|>",
2121
- "<|reserved_special_token_63|>",
2122
- "<|reserved_special_token_64|>",
2123
- "<|reserved_special_token_65|>",
2124
- "<|reserved_special_token_66|>",
2125
- "<|reserved_special_token_67|>",
2126
- "<|reserved_special_token_68|>",
2127
- "<|reserved_special_token_69|>",
2128
- "<|reserved_special_token_70|>",
2129
- "<|reserved_special_token_71|>",
2130
- "<|reserved_special_token_72|>",
2131
- "<|reserved_special_token_73|>",
2132
- "<|reserved_special_token_74|>",
2133
- "<|reserved_special_token_75|>",
2134
- "<|reserved_special_token_76|>",
2135
- "<|reserved_special_token_77|>",
2136
- "<|reserved_special_token_78|>",
2137
- "<|reserved_special_token_79|>",
2138
- "<|reserved_special_token_80|>",
2139
- "<|reserved_special_token_81|>",
2140
- "<|reserved_special_token_82|>",
2141
- "<|reserved_special_token_83|>",
2142
- "<|reserved_special_token_84|>",
2143
- "<|reserved_special_token_85|>",
2144
- "<|reserved_special_token_86|>",
2145
- "<|reserved_special_token_87|>",
2146
- "<|reserved_special_token_88|>",
2147
- "<|reserved_special_token_89|>",
2148
- "<|reserved_special_token_90|>",
2149
- "<|reserved_special_token_91|>",
2150
- "<|reserved_special_token_92|>",
2151
- "<|reserved_special_token_93|>",
2152
- "<|reserved_special_token_94|>",
2153
- "<|reserved_special_token_95|>",
2154
- "<|reserved_special_token_96|>",
2155
- "<|reserved_special_token_97|>",
2156
- "<|reserved_special_token_98|>",
2157
- "<|reserved_special_token_99|>",
2158
- "<|reserved_special_token_100|>",
2159
- "<|reserved_special_token_101|>",
2160
- "<|reserved_special_token_102|>",
2161
- "<|reserved_special_token_103|>",
2162
- "<|reserved_special_token_104|>",
2163
- "<|reserved_special_token_105|>",
2164
- "<|reserved_special_token_106|>",
2165
- "<|reserved_special_token_107|>",
2166
- "<|reserved_special_token_108|>",
2167
- "<|reserved_special_token_109|>",
2168
- "<|reserved_special_token_110|>",
2169
- "<|reserved_special_token_111|>",
2170
- "<|reserved_special_token_112|>",
2171
- "<|reserved_special_token_113|>",
2172
- "<|reserved_special_token_114|>",
2173
- "<|reserved_special_token_115|>",
2174
- "<|reserved_special_token_116|>",
2175
- "<|reserved_special_token_117|>",
2176
- "<|reserved_special_token_118|>",
2177
- "<|reserved_special_token_119|>",
2178
- "<|reserved_special_token_120|>",
2179
- "<|reserved_special_token_121|>",
2180
- "<|reserved_special_token_122|>",
2181
- "<|reserved_special_token_123|>",
2182
- "<|reserved_special_token_124|>",
2183
- "<|reserved_special_token_125|>",
2184
- "<|reserved_special_token_126|>",
2185
- "<|reserved_special_token_127|>",
2186
- "<|reserved_special_token_128|>",
2187
- "<|reserved_special_token_129|>",
2188
- "<|reserved_special_token_130|>",
2189
- "<|reserved_special_token_131|>",
2190
- "<|reserved_special_token_132|>",
2191
- "<|reserved_special_token_133|>",
2192
- "<|reserved_special_token_134|>",
2193
- "<|reserved_special_token_135|>",
2194
- "<|reserved_special_token_136|>",
2195
- "<|reserved_special_token_137|>",
2196
- "<|reserved_special_token_138|>",
2197
- "<|reserved_special_token_139|>",
2198
- "<|reserved_special_token_140|>",
2199
- "<|reserved_special_token_141|>",
2200
- "<|reserved_special_token_142|>",
2201
- "<|reserved_special_token_143|>",
2202
- "<|reserved_special_token_144|>",
2203
- "<|reserved_special_token_145|>",
2204
- "<|reserved_special_token_146|>",
2205
- "<|reserved_special_token_147|>",
2206
- "<|reserved_special_token_148|>",
2207
- "<|reserved_special_token_149|>",
2208
- "<|reserved_special_token_150|>",
2209
- "<|reserved_special_token_151|>",
2210
- "<|reserved_special_token_152|>",
2211
- "<|reserved_special_token_153|>",
2212
- "<|reserved_special_token_154|>",
2213
- "<|reserved_special_token_155|>",
2214
- "<|reserved_special_token_156|>",
2215
- "<|reserved_special_token_157|>",
2216
- "<|reserved_special_token_158|>",
2217
- "<|reserved_special_token_159|>",
2218
- "<|reserved_special_token_160|>",
2219
- "<|reserved_special_token_161|>",
2220
- "<|reserved_special_token_162|>",
2221
- "<|reserved_special_token_163|>",
2222
- "<|reserved_special_token_164|>",
2223
- "<|reserved_special_token_165|>",
2224
- "<|reserved_special_token_166|>",
2225
- "<|reserved_special_token_167|>",
2226
- "<|reserved_special_token_168|>",
2227
- "<|reserved_special_token_169|>",
2228
- "<|reserved_special_token_170|>",
2229
- "<|reserved_special_token_171|>",
2230
- "<|reserved_special_token_172|>",
2231
- "<|reserved_special_token_173|>",
2232
- "<|reserved_special_token_174|>",
2233
- "<|reserved_special_token_175|>",
2234
- "<|reserved_special_token_176|>",
2235
- "<|reserved_special_token_177|>",
2236
- "<|reserved_special_token_178|>",
2237
- "<|reserved_special_token_179|>",
2238
- "<|reserved_special_token_180|>",
2239
- "<|reserved_special_token_181|>",
2240
- "<|reserved_special_token_182|>",
2241
- "<|reserved_special_token_183|>",
2242
- "<|reserved_special_token_184|>",
2243
- "<|reserved_special_token_185|>",
2244
- "<|reserved_special_token_186|>",
2245
- "<|reserved_special_token_187|>",
2246
- "<|reserved_special_token_188|>",
2247
- "<|reserved_special_token_189|>",
2248
- "<|reserved_special_token_190|>",
2249
- "<|reserved_special_token_191|>",
2250
- "<|reserved_special_token_192|>",
2251
- "<|reserved_special_token_193|>",
2252
- "<|reserved_special_token_194|>",
2253
- "<|reserved_special_token_195|>",
2254
- "<|reserved_special_token_196|>",
2255
- "<|reserved_special_token_197|>",
2256
- "<|reserved_special_token_198|>",
2257
- "<|reserved_special_token_199|>",
2258
- "<|reserved_special_token_200|>",
2259
- "<|reserved_special_token_201|>",
2260
- "<|reserved_special_token_202|>",
2261
- "<|reserved_special_token_203|>",
2262
- "<|reserved_special_token_204|>",
2263
- "<|reserved_special_token_205|>",
2264
- "<|reserved_special_token_206|>",
2265
- "<|reserved_special_token_207|>",
2266
- "<|reserved_special_token_208|>",
2267
- "<|reserved_special_token_209|>",
2268
- "<|reserved_special_token_210|>",
2269
- "<|reserved_special_token_211|>",
2270
- "<|reserved_special_token_212|>",
2271
- "<|reserved_special_token_213|>",
2272
- "<|reserved_special_token_214|>",
2273
- "<|reserved_special_token_215|>",
2274
- "<|reserved_special_token_216|>",
2275
- "<|reserved_special_token_217|>",
2276
- "<|reserved_special_token_218|>",
2277
- "<|reserved_special_token_219|>",
2278
- "<|reserved_special_token_220|>",
2279
- "<|reserved_special_token_221|>",
2280
- "<|reserved_special_token_222|>",
2281
- "<|reserved_special_token_223|>",
2282
- "<|reserved_special_token_224|>",
2283
- "<|reserved_special_token_225|>",
2284
- "<|reserved_special_token_226|>",
2285
- "<|reserved_special_token_227|>",
2286
- "<|reserved_special_token_228|>",
2287
- "<|reserved_special_token_229|>",
2288
- "<|reserved_special_token_230|>",
2289
- "<|reserved_special_token_231|>",
2290
- "<|reserved_special_token_232|>",
2291
- "<|reserved_special_token_233|>",
2292
- "<|reserved_special_token_234|>",
2293
- "<|reserved_special_token_235|>",
2294
- "<|reserved_special_token_236|>",
2295
- "<|reserved_special_token_237|>",
2296
- "<|reserved_special_token_238|>",
2297
- "<|reserved_special_token_239|>",
2298
- "<|reserved_special_token_240|>",
2299
- "<|reserved_special_token_241|>",
2300
- "<|reserved_special_token_242|>",
2301
- "<|reserved_special_token_243|>",
2302
- "<|reserved_special_token_244|>",
2303
- "<|reserved_special_token_245|>",
2304
- "<|reserved_special_token_246|>",
2305
- "<|reserved_special_token_247|>",
2306
- "<|reserved_special_token_248|>",
2307
- "<|reserved_special_token_249|>",
2308
- "<|reserved_special_token_250|>"
2309
- ],
2310
  "bos_token": "<|begin_of_text|>",
2311
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2312
  "clean_up_tokenization_spaces": true,
2313
- "eos_token": "<|end_of_text|>",
2314
  "model_input_names": [
2315
  "input_ids",
2316
  "attention_mask"
 
2049
  "special": true
2050
  }
2051
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2052
  "bos_token": "<|begin_of_text|>",
2053
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|eot_id|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"