dutch-tokenizer-arena / stats /compress_rate.json
yhavinga's picture
Add Llama tokenizer creation for Dutch, English, Code, Markdown and TeX.
c78da21
{
"amber.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"aya_101.cc100-en": {
"vocab_size": 250100,
"n_bytes": 1124813,
"n_tokens": 317881,
"n_chars": 1121360
},
"baichuan.cc100-en": {
"vocab_size": 64000,
"n_bytes": 1124813,
"n_tokens": 280108,
"n_chars": 1121360
},
"baichuan2.cc100-en": {
"vocab_size": 125696,
"n_bytes": 1124813,
"n_tokens": 269011,
"n_chars": 1121360
},
"bert_base_cased.cc100-en": {
"vocab_size": 28996,
"n_bytes": 1124813,
"n_tokens": 288022,
"n_chars": 1121360
},
"bert_base_chinese.cc100-en": {
"vocab_size": 21128,
"n_bytes": 1124813,
"n_tokens": 377068,
"n_chars": 1121360
},
"bert_base_uncased.cc100-en": {
"vocab_size": 30522,
"n_bytes": 1124813,
"n_tokens": 280575,
"n_chars": 1121360
},
"bloom.cc100-en": {
"vocab_size": 250680,
"n_bytes": 1124813,
"n_tokens": 257405,
"n_chars": 1121360
},
"byt5_small.cc100-en": {
"vocab_size": 384,
"n_bytes": 1124813,
"n_tokens": 1134813,
"n_chars": 1121360
},
"character_glm_6b.cc100-en": {
"vocab_size": 64789,
"n_bytes": 1124813,
"n_tokens": 289347,
"n_chars": 1121360
},
"chatglm2_6b.cc100-en": {
"vocab_size": 64787,
"n_bytes": 1124813,
"n_tokens": 289329,
"n_chars": 1121360
},
"chatglm3_6b.cc100-en": {
"vocab_size": 64796,
"n_bytes": 1124813,
"n_tokens": 289347,
"n_chars": 1121360
},
"chatglm_6b.cc100-en": {
"vocab_size": 150344,
"n_bytes": 1124813,
"n_tokens": 284761,
"n_chars": 1121360
},
"chatyuan_large_v2.cc100-en": {
"vocab_size": 32128,
"n_bytes": 1124813,
"n_tokens": 536033,
"n_chars": 1121360
},
"chinese_llama.cc100-en": {
"vocab_size": 49953,
"n_bytes": 1124813,
"n_tokens": 291514,
"n_chars": 1121360
},
"chinese_llama2.cc100-en": {
"vocab_size": 55296,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"code_davinci_002.cc100-en": {
"vocab_size": 50281,
"n_bytes": 1124813,
"n_tokens": 258403,
"n_chars": 1121360
},
"crystal_coder.cc100-en": {
"vocab_size": 32022,
"n_bytes": 1124813,
"n_tokens": 284627,
"n_chars": 1121360
},
"dbrx_instruct.cc100-en": {
"vocab_size": 100280,
"n_bytes": 1124813,
"n_tokens": 254985,
"n_chars": 1121360
},
"deepseek_coder_33b_instruct.cc100-en": {
"vocab_size": 32022,
"n_bytes": 1124813,
"n_tokens": 287408,
"n_chars": 1121360
},
"deepseek_llm_7b_base.cc100-en": {
"vocab_size": 100015,
"n_bytes": 1124813,
"n_tokens": 272324,
"n_chars": 1121360
},
"falcon_180b.cc100-en": {
"vocab_size": 65024,
"n_bytes": 1124813,
"n_tokens": 262509,
"n_chars": 1121360
},
"falcon_7b.cc100-en": {
"vocab_size": 65024,
"n_bytes": 1124813,
"n_tokens": 262509,
"n_chars": 1121360
},
"fastchat_t5_3b.cc100-en": {
"vocab_size": 32110,
"n_bytes": 1124813,
"n_tokens": 484941,
"n_chars": 1121360
},
"flan_t5_base.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"gemma_7b.cc100-en": {
"vocab_size": 256000,
"n_bytes": 1124813,
"n_tokens": 268010,
"n_chars": 1121360
},
"gpt2.cc100-en": {
"vocab_size": 50257,
"n_bytes": 1124813,
"n_tokens": 258428,
"n_chars": 1121360
},
"gpt2_chinese.cc100-en": {
"vocab_size": 21128,
"n_bytes": 1124813,
"n_tokens": 392641,
"n_chars": 1121360
},
"gpt_35_turbo.cc100-en": {
"vocab_size": 100277,
"n_bytes": 1124813,
"n_tokens": 254985,
"n_chars": 1121360
},
"gpt_4.cc100-en": {
"vocab_size": 100277,
"n_bytes": 1124813,
"n_tokens": 254985,
"n_chars": 1121360
},
"gpt_nexo_20b.cc100-en": {
"vocab_size": 50277,
"n_bytes": 1124813,
"n_tokens": 259357,
"n_chars": 1121360
},
"grok_1.cc100-en": {
"vocab_size": 131072,
"n_bytes": 1124813,
"n_tokens": 258048,
"n_chars": 1121360
},
"internlm2_chat_7b.cc100-en": {
"vocab_size": 92544,
"n_bytes": 1124813,
"n_tokens": 271583,
"n_chars": 1121360
},
"internlm2_math_7b.cc100-en": {
"vocab_size": 92544,
"n_bytes": 1124813,
"n_tokens": 271583,
"n_chars": 1121360
},
"internlm_chat_7b.cc100-en": {
"vocab_size": 103168,
"n_bytes": 1124813,
"n_tokens": 271293,
"n_chars": 1121360
},
"internlm_xcomposer_7b.cc100-en": {
"vocab_size": 103168,
"n_bytes": 1124813,
"n_tokens": 271293,
"n_chars": 1121360
},
"jamba_v0_1.cc100-en": {
"vocab_size": 65536,
"n_bytes": 1124813,
"n_tokens": 274242,
"n_chars": 1121360
},
"kplug.cc100-en": {
"vocab_size": 10261,
"n_bytes": 1124813,
"n_tokens": 393564,
"n_chars": 1121360
},
"llama.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"llama2.cc100-en": {
"vocab_size": 32001,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"llama3.cc100-en": {
"vocab_size": 128256,
"n_bytes": 1124813,
"n_tokens": 254944,
"n_chars": 1121360
},
"mistral_7b.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 285801,
"n_chars": 1121360
},
"mixtral_8_7b.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 285801,
"n_chars": 1121360
},
"mobilebert_uncased.cc100-en": {
"vocab_size": 30522,
"n_bytes": 1124813,
"n_tokens": 280575,
"n_chars": 1121360
},
"moss.cc100-en": {
"vocab_size": 106072,
"n_bytes": 1124813,
"n_tokens": 257070,
"n_chars": 1121360
},
"mt5_large.cc100-en": {
"vocab_size": 250100,
"n_bytes": 1124813,
"n_tokens": 317881,
"n_chars": 1121360
},
"olmo_7b.cc100-en": {
"vocab_size": 50280,
"n_bytes": 1124813,
"n_tokens": 259357,
"n_chars": 1121360
},
"orion_14b_chat.cc100-en": {
"vocab_size": 84608,
"n_bytes": 1124813,
"n_tokens": 265948,
"n_chars": 1121360
},
"phi_1.cc100-en": {
"vocab_size": 50295,
"n_bytes": 1124813,
"n_tokens": 258409,
"n_chars": 1121360
},
"phi_2.cc100-en": {
"vocab_size": 50295,
"n_bytes": 1124813,
"n_tokens": 258409,
"n_chars": 1121360
},
"phi_3_mini.cc100-en": {
"vocab_size": 32011,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"pko_t5_large.cc100-en": {
"vocab_size": 50358,
"n_bytes": 1124813,
"n_tokens": 658985,
"n_chars": 1121360
},
"prompt_clue.cc100-en": {
"vocab_size": 32128,
"n_bytes": 1124813,
"n_tokens": 536033,
"n_chars": 1121360
},
"qwen1_5_14b_chat.cc100-en": {
"vocab_size": 151646,
"n_bytes": 1124813,
"n_tokens": 257983,
"n_chars": 1121360
},
"qwen_1_8b_chat.cc100-en": {
"vocab_size": 151851,
"n_bytes": 1124813,
"n_tokens": 257983,
"n_chars": 1121360
},
"qwen_72b_chat.cc100-en": {
"vocab_size": 151851,
"n_bytes": 1124813,
"n_tokens": 257983,
"n_chars": 1121360
},
"qwen_7b_chat.cc100-en": {
"vocab_size": 151851,
"n_bytes": 1124813,
"n_tokens": 257983,
"n_chars": 1121360
},
"roberta_chinese_clue.cc100-en": {
"vocab_size": 8021,
"n_bytes": 1124813,
"n_tokens": 583058,
"n_chars": 1121360
},
"skywork_13b_base.cc100-en": {
"vocab_size": 65519,
"n_bytes": 1124813,
"n_tokens": 294617,
"n_chars": 1121360
},
"skywork_13b_math.cc100-en": {
"vocab_size": 65519,
"n_bytes": 1124813,
"n_tokens": 294617,
"n_chars": 1121360
},
"solar_10_7b.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 285801,
"n_chars": 1121360
},
"starchat_alpha.cc100-en": {
"vocab_size": 49156,
"n_bytes": 1124813,
"n_tokens": 288965,
"n_chars": 1121360
},
"switch_c_2048.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"t5_base.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"t5_large.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"t5_small.cc100-en": {
"vocab_size": 32100,
"n_bytes": 1124813,
"n_tokens": 290104,
"n_chars": 1121360
},
"text_davinci_003.cc100-en": {
"vocab_size": 50281,
"n_bytes": 1124813,
"n_tokens": 258403,
"n_chars": 1121360
},
"tigerbot_13b_chat_v2.cc100-en": {
"vocab_size": 60515,
"n_bytes": 1124813,
"n_tokens": 285652,
"n_chars": 1121360
},
"tigerbot_70b_chat_v4_4k.cc100-en": {
"vocab_size": 65110,
"n_bytes": 1124813,
"n_tokens": 286946,
"n_chars": 1121360
},
"wizardcoder_15b_v1.cc100-en": {
"vocab_size": 49153,
"n_bytes": 1124813,
"n_tokens": 288965,
"n_chars": 1121360
},
"wizardcoder_python_7b_v1.cc100-en": {
"vocab_size": 32001,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"wizardlm_7b_v1.cc100-en": {
"vocab_size": 32001,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"wizardmath_70b_v1.cc100-en": {
"vocab_size": 32002,
"n_bytes": 1124813,
"n_tokens": 294627,
"n_chars": 1121360
},
"xlm_roberta.cc100-en": {
"vocab_size": 250002,
"n_bytes": 1124813,
"n_tokens": 300026,
"n_chars": 1121360
},
"yi_34b.cc100-en": {
"vocab_size": 64000,
"n_bytes": 1124813,
"n_tokens": 270400,
"n_chars": 1121360
},
"yi_6b.cc100-en": {
"vocab_size": 64000,
"n_bytes": 1124813,
"n_tokens": 270400,
"n_chars": 1121360
},
"yi_vl34b.cc100-en": {
"vocab_size": 64000,
"n_bytes": 1124813,
"n_tokens": 269738,
"n_chars": 1121360
},
"zephyr_7b_beta.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 285801,
"n_chars": 1121360
},
"amber.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"aya_101.cc100-zh-Hans": {
"vocab_size": 250100,
"n_bytes": 2633047,
"n_tokens": 631182,
"n_chars": 927311
},
"baichuan.cc100-zh-Hans": {
"vocab_size": 64000,
"n_bytes": 2633047,
"n_tokens": 626117,
"n_chars": 927311
},
"baichuan2.cc100-zh-Hans": {
"vocab_size": 125696,
"n_bytes": 2633047,
"n_tokens": 541464,
"n_chars": 927311
},
"bert_base_cased.cc100-zh-Hans": {
"vocab_size": 28996,
"n_bytes": 2633047,
"n_tokens": 899709,
"n_chars": 927311
},
"bert_base_chinese.cc100-zh-Hans": {
"vocab_size": 21128,
"n_bytes": 2633047,
"n_tokens": 896599,
"n_chars": 927311
},
"bert_base_uncased.cc100-zh-Hans": {
"vocab_size": 30522,
"n_bytes": 2633047,
"n_tokens": 898554,
"n_chars": 927311
},
"bloom.cc100-zh-Hans": {
"vocab_size": 250680,
"n_bytes": 2633047,
"n_tokens": 573008,
"n_chars": 927311
},
"byt5_small.cc100-zh-Hans": {
"vocab_size": 384,
"n_bytes": 2633047,
"n_tokens": 2643047,
"n_chars": 927311
},
"character_glm_6b.cc100-zh-Hans": {
"vocab_size": 64789,
"n_bytes": 2633047,
"n_tokens": 583646,
"n_chars": 927311
},
"chatglm2_6b.cc100-zh-Hans": {
"vocab_size": 64787,
"n_bytes": 2633047,
"n_tokens": 583646,
"n_chars": 927311
},
"chatglm3_6b.cc100-zh-Hans": {
"vocab_size": 64796,
"n_bytes": 2633047,
"n_tokens": 583646,
"n_chars": 927311
},
"chatglm_6b.cc100-zh-Hans": {
"vocab_size": 150344,
"n_bytes": 2633047,
"n_tokens": 527384,
"n_chars": 927311
},
"chatyuan_large_v2.cc100-zh-Hans": {
"vocab_size": 32128,
"n_bytes": 2633047,
"n_tokens": 564905,
"n_chars": 927311
},
"chinese_llama.cc100-zh-Hans": {
"vocab_size": 49953,
"n_bytes": 2633047,
"n_tokens": 623219,
"n_chars": 927311
},
"chinese_llama2.cc100-zh-Hans": {
"vocab_size": 55296,
"n_bytes": 2633047,
"n_tokens": 625766,
"n_chars": 927311
},
"code_davinci_002.cc100-zh-Hans": {
"vocab_size": 50281,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"crystal_coder.cc100-zh-Hans": {
"vocab_size": 32022,
"n_bytes": 2633047,
"n_tokens": 1320093,
"n_chars": 927311
},
"dbrx_instruct.cc100-zh-Hans": {
"vocab_size": 100280,
"n_bytes": 2633047,
"n_tokens": 1084939,
"n_chars": 927311
},
"deepseek_coder_33b_instruct.cc100-zh-Hans": {
"vocab_size": 32022,
"n_bytes": 2633047,
"n_tokens": 720577,
"n_chars": 927311
},
"deepseek_llm_7b_base.cc100-zh-Hans": {
"vocab_size": 100015,
"n_bytes": 2633047,
"n_tokens": 605081,
"n_chars": 927311
},
"falcon_180b.cc100-zh-Hans": {
"vocab_size": 65024,
"n_bytes": 2633047,
"n_tokens": 1124681,
"n_chars": 927311
},
"falcon_7b.cc100-zh-Hans": {
"vocab_size": 65024,
"n_bytes": 2633047,
"n_tokens": 1124681,
"n_chars": 927311
},
"fastchat_t5_3b.cc100-zh-Hans": {
"vocab_size": 32110,
"n_bytes": 2633047,
"n_tokens": 178974,
"n_chars": 927311
},
"flan_t5_base.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173520,
"n_chars": 927311
},
"gemma_7b.cc100-zh-Hans": {
"vocab_size": 256000,
"n_bytes": 2633047,
"n_tokens": 641795,
"n_chars": 927311
},
"gpt2.cc100-zh-Hans": {
"vocab_size": 50257,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"gpt2_chinese.cc100-zh-Hans": {
"vocab_size": 21128,
"n_bytes": 2633047,
"n_tokens": 899506,
"n_chars": 927311
},
"gpt_35_turbo.cc100-zh-Hans": {
"vocab_size": 100277,
"n_bytes": 2633047,
"n_tokens": 1084939,
"n_chars": 927311
},
"gpt_4.cc100-zh-Hans": {
"vocab_size": 100277,
"n_bytes": 2633047,
"n_tokens": 1084939,
"n_chars": 927311
},
"gpt_nexo_20b.cc100-zh-Hans": {
"vocab_size": 50277,
"n_bytes": 2633047,
"n_tokens": 1220529,
"n_chars": 927311
},
"grok_1.cc100-zh-Hans": {
"vocab_size": 131072,
"n_bytes": 2633047,
"n_tokens": 1414508,
"n_chars": 927311
},
"internlm2_chat_7b.cc100-zh-Hans": {
"vocab_size": 92544,
"n_bytes": 2633047,
"n_tokens": 579976,
"n_chars": 927311
},
"internlm2_math_7b.cc100-zh-Hans": {
"vocab_size": 92544,
"n_bytes": 2633047,
"n_tokens": 579976,
"n_chars": 927311
},
"internlm_chat_7b.cc100-zh-Hans": {
"vocab_size": 103168,
"n_bytes": 2633047,
"n_tokens": 579109,
"n_chars": 927311
},
"internlm_xcomposer_7b.cc100-zh-Hans": {
"vocab_size": 103168,
"n_bytes": 2633047,
"n_tokens": 579109,
"n_chars": 927311
},
"jamba_v0_1.cc100-zh-Hans": {
"vocab_size": 65536,
"n_bytes": 2633047,
"n_tokens": 1067054,
"n_chars": 927311
},
"kplug.cc100-zh-Hans": {
"vocab_size": 10261,
"n_bytes": 2633047,
"n_tokens": 902451,
"n_chars": 927311
},
"llama.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"llama2.cc100-zh-Hans": {
"vocab_size": 32001,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"llama3.cc100-zh-Hans": {
"vocab_size": 128256,
"n_bytes": 2633047,
"n_tokens": 747405,
"n_chars": 927311
},
"mistral_7b.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1041023,
"n_chars": 927311
},
"mixtral_8_7b.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1041023,
"n_chars": 927311
},
"mobilebert_uncased.cc100-zh-Hans": {
"vocab_size": 30522,
"n_bytes": 2633047,
"n_tokens": 898554,
"n_chars": 927311
},
"moss.cc100-zh-Hans": {
"vocab_size": 106072,
"n_bytes": 2633047,
"n_tokens": 557455,
"n_chars": 927311
},
"mt5_large.cc100-zh-Hans": {
"vocab_size": 250100,
"n_bytes": 2633047,
"n_tokens": 631182,
"n_chars": 927311
},
"olmo_7b.cc100-zh-Hans": {
"vocab_size": 50280,
"n_bytes": 2633047,
"n_tokens": 1220529,
"n_chars": 927311
},
"orion_14b_chat.cc100-zh-Hans": {
"vocab_size": 84608,
"n_bytes": 2633047,
"n_tokens": 529926,
"n_chars": 927311
},
"phi_1.cc100-zh-Hans": {
"vocab_size": 50295,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"phi_2.cc100-zh-Hans": {
"vocab_size": 50295,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"phi_3_mini.cc100-zh-Hans": {
"vocab_size": 32011,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"pko_t5_large.cc100-zh-Hans": {
"vocab_size": 50358,
"n_bytes": 2633047,
"n_tokens": 2533519,
"n_chars": 927311
},
"prompt_clue.cc100-zh-Hans": {
"vocab_size": 32128,
"n_bytes": 2633047,
"n_tokens": 564905,
"n_chars": 927311
},
"qwen1_5_14b_chat.cc100-zh-Hans": {
"vocab_size": 151646,
"n_bytes": 2633047,
"n_tokens": 589211,
"n_chars": 927311
},
"qwen_1_8b_chat.cc100-zh-Hans": {
"vocab_size": 151851,
"n_bytes": 2633047,
"n_tokens": 589211,
"n_chars": 927311
},
"qwen_72b_chat.cc100-zh-Hans": {
"vocab_size": 151851,
"n_bytes": 2633047,
"n_tokens": 589211,
"n_chars": 927311
},
"qwen_7b_chat.cc100-zh-Hans": {
"vocab_size": 151851,
"n_bytes": 2633047,
"n_tokens": 589211,
"n_chars": 927311
},
"roberta_chinese_clue.cc100-zh-Hans": {
"vocab_size": 8021,
"n_bytes": 2633047,
"n_tokens": 907144,
"n_chars": 927311
},
"skywork_13b_base.cc100-zh-Hans": {
"vocab_size": 65519,
"n_bytes": 2633047,
"n_tokens": 663923,
"n_chars": 927311
},
"skywork_13b_math.cc100-zh-Hans": {
"vocab_size": 65519,
"n_bytes": 2633047,
"n_tokens": 663923,
"n_chars": 927311
},
"solar_10_7b.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1041023,
"n_chars": 927311
},
"starchat_alpha.cc100-zh-Hans": {
"vocab_size": 49156,
"n_bytes": 2633047,
"n_tokens": 882018,
"n_chars": 927311
},
"switch_c_2048.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173519,
"n_chars": 927311
},
"t5_base.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173519,
"n_chars": 927311
},
"t5_large.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173519,
"n_chars": 927311
},
"t5_small.cc100-zh-Hans": {
"vocab_size": 32100,
"n_bytes": 2633047,
"n_tokens": 173519,
"n_chars": 927311
},
"text_davinci_003.cc100-zh-Hans": {
"vocab_size": 50281,
"n_bytes": 2633047,
"n_tokens": 1876809,
"n_chars": 927311
},
"tigerbot_13b_chat_v2.cc100-zh-Hans": {
"vocab_size": 60515,
"n_bytes": 2633047,
"n_tokens": 577385,
"n_chars": 927311
},
"tigerbot_70b_chat_v4_4k.cc100-zh-Hans": {
"vocab_size": 65110,
"n_bytes": 2633047,
"n_tokens": 577211,
"n_chars": 927311
},
"wizardcoder_15b_v1.cc100-zh-Hans": {
"vocab_size": 49153,
"n_bytes": 2633047,
"n_tokens": 882018,
"n_chars": 927311
},
"wizardcoder_python_7b_v1.cc100-zh-Hans": {
"vocab_size": 32001,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"wizardlm_7b_v1.cc100-zh-Hans": {
"vocab_size": 32001,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"wizardmath_70b_v1.cc100-zh-Hans": {
"vocab_size": 32002,
"n_bytes": 2633047,
"n_tokens": 1330093,
"n_chars": 927311
},
"xlm_roberta.cc100-zh-Hans": {
"vocab_size": 250002,
"n_bytes": 2633047,
"n_tokens": 619844,
"n_chars": 927311
},
"yi_34b.cc100-zh-Hans": {
"vocab_size": 64000,
"n_bytes": 2633047,
"n_tokens": 588729,
"n_chars": 927311
},
"yi_6b.cc100-zh-Hans": {
"vocab_size": 64000,
"n_bytes": 2633047,
"n_tokens": 588729,
"n_chars": 927311
},
"yi_vl34b.cc100-zh-Hans": {
"vocab_size": 64000,
"n_bytes": 2633047,
"n_tokens": 596166,
"n_chars": 927311
},
"zephyr_7b_beta.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1041023,
"n_chars": 927311
},
"amber.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"aya_101.cc100-es": {
"vocab_size": 250100,
"n_bytes": 1664455,
"n_tokens": 472231,
"n_chars": 1630297
},
"baichuan.cc100-es": {
"vocab_size": 64000,
"n_bytes": 1664455,
"n_tokens": 585804,
"n_chars": 1630297
},
"baichuan2.cc100-es": {
"vocab_size": 125696,
"n_bytes": 1664455,
"n_tokens": 551326,
"n_chars": 1630297
},
"bert_base_cased.cc100-es": {
"vocab_size": 28996,
"n_bytes": 1664455,
"n_tokens": 630231,
"n_chars": 1630297
},
"bert_base_chinese.cc100-es": {
"vocab_size": 21128,
"n_bytes": 1664455,
"n_tokens": 609419,
"n_chars": 1630297
},
"bert_base_uncased.cc100-es": {
"vocab_size": 30522,
"n_bytes": 1664455,
"n_tokens": 558042,
"n_chars": 1630297
},
"bloom.cc100-es": {
"vocab_size": 250680,
"n_bytes": 1664455,
"n_tokens": 350793,
"n_chars": 1630297
},
"byt5_small.cc100-es": {
"vocab_size": 384,
"n_bytes": 1664455,
"n_tokens": 1674455,
"n_chars": 1630297
},
"character_glm_6b.cc100-es": {
"vocab_size": 64789,
"n_bytes": 1664455,
"n_tokens": 566501,
"n_chars": 1630297
},
"chatglm2_6b.cc100-es": {
"vocab_size": 64787,
"n_bytes": 1664455,
"n_tokens": 566476,
"n_chars": 1630297
},
"chatglm3_6b.cc100-es": {
"vocab_size": 64796,
"n_bytes": 1664455,
"n_tokens": 566501,
"n_chars": 1630297
},
"chatglm_6b.cc100-es": {
"vocab_size": 150344,
"n_bytes": 1664455,
"n_tokens": 514848,
"n_chars": 1630297
},
"chatyuan_large_v2.cc100-es": {
"vocab_size": 32128,
"n_bytes": 1664455,
"n_tokens": 889530,
"n_chars": 1630297
},
"chinese_llama.cc100-es": {
"vocab_size": 49953,
"n_bytes": 1664455,
"n_tokens": 486672,
"n_chars": 1630297
},
"chinese_llama2.cc100-es": {
"vocab_size": 55296,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"code_davinci_002.cc100-es": {
"vocab_size": 50281,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"crystal_coder.cc100-es": {
"vocab_size": 32022,
"n_bytes": 1664455,
"n_tokens": 482235,
"n_chars": 1630297
},
"dbrx_instruct.cc100-es": {
"vocab_size": 100280,
"n_bytes": 1664455,
"n_tokens": 433875,
"n_chars": 1630297
},
"deepseek_coder_33b_instruct.cc100-es": {
"vocab_size": 32022,
"n_bytes": 1664455,
"n_tokens": 523884,
"n_chars": 1630297
},
"deepseek_llm_7b_base.cc100-es": {
"vocab_size": 100015,
"n_bytes": 1664455,
"n_tokens": 480877,
"n_chars": 1630297
},
"falcon_180b.cc100-es": {
"vocab_size": 65024,
"n_bytes": 1664455,
"n_tokens": 442138,
"n_chars": 1630297
},
"falcon_7b.cc100-es": {
"vocab_size": 65024,
"n_bytes": 1664455,
"n_tokens": 442138,
"n_chars": 1630297
},
"fastchat_t5_3b.cc100-es": {
"vocab_size": 32110,
"n_bytes": 1664455,
"n_tokens": 970105,
"n_chars": 1630297
},
"flan_t5_base.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706405,
"n_chars": 1630297
},
"gemma_7b.cc100-es": {
"vocab_size": 256000,
"n_bytes": 1664455,
"n_tokens": 371321,
"n_chars": 1630297
},
"gpt2.cc100-es": {
"vocab_size": 50257,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"gpt2_chinese.cc100-es": {
"vocab_size": 21128,
"n_bytes": 1664455,
"n_tokens": 703390,
"n_chars": 1630297
},
"gpt_35_turbo.cc100-es": {
"vocab_size": 100277,
"n_bytes": 1664455,
"n_tokens": 433875,
"n_chars": 1630297
},
"gpt_4.cc100-es": {
"vocab_size": 100277,
"n_bytes": 1664455,
"n_tokens": 433875,
"n_chars": 1630297
},
"gpt_nexo_20b.cc100-es": {
"vocab_size": 50277,
"n_bytes": 1664455,
"n_tokens": 494577,
"n_chars": 1630297
},
"grok_1.cc100-es": {
"vocab_size": 131072,
"n_bytes": 1664455,
"n_tokens": 449392,
"n_chars": 1630297
},
"internlm2_chat_7b.cc100-es": {
"vocab_size": 92544,
"n_bytes": 1664455,
"n_tokens": 518871,
"n_chars": 1630297
},
"internlm2_math_7b.cc100-es": {
"vocab_size": 92544,
"n_bytes": 1664455,
"n_tokens": 518871,
"n_chars": 1630297
},
"internlm_chat_7b.cc100-es": {
"vocab_size": 103168,
"n_bytes": 1664455,
"n_tokens": 516572,
"n_chars": 1630297
},
"internlm_xcomposer_7b.cc100-es": {
"vocab_size": 103168,
"n_bytes": 1664455,
"n_tokens": 516572,
"n_chars": 1630297
},
"jamba_v0_1.cc100-es": {
"vocab_size": 65536,
"n_bytes": 1664455,
"n_tokens": 420883,
"n_chars": 1630297
},
"kplug.cc100-es": {
"vocab_size": 10261,
"n_bytes": 1664455,
"n_tokens": 704804,
"n_chars": 1630297
},
"llama.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"llama2.cc100-es": {
"vocab_size": 32001,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"llama3.cc100-es": {
"vocab_size": 128256,
"n_bytes": 1664455,
"n_tokens": 433289,
"n_chars": 1630297
},
"mistral_7b.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 513915,
"n_chars": 1630297
},
"mixtral_8_7b.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 513915,
"n_chars": 1630297
},
"mobilebert_uncased.cc100-es": {
"vocab_size": 30522,
"n_bytes": 1664455,
"n_tokens": 558042,
"n_chars": 1630297
},
"moss.cc100-es": {
"vocab_size": 106072,
"n_bytes": 1664455,
"n_tokens": 568539,
"n_chars": 1630297
},
"mt5_large.cc100-es": {
"vocab_size": 250100,
"n_bytes": 1664455,
"n_tokens": 472231,
"n_chars": 1630297
},
"olmo_7b.cc100-es": {
"vocab_size": 50280,
"n_bytes": 1664455,
"n_tokens": 494577,
"n_chars": 1630297
},
"orion_14b_chat.cc100-es": {
"vocab_size": 84608,
"n_bytes": 1664455,
"n_tokens": 628571,
"n_chars": 1630297
},
"phi_1.cc100-es": {
"vocab_size": 50295,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"phi_2.cc100-es": {
"vocab_size": 50295,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"phi_3_mini.cc100-es": {
"vocab_size": 32011,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"pko_t5_large.cc100-es": {
"vocab_size": 50358,
"n_bytes": 1664455,
"n_tokens": 1134056,
"n_chars": 1630297
},
"prompt_clue.cc100-es": {
"vocab_size": 32128,
"n_bytes": 1664455,
"n_tokens": 889530,
"n_chars": 1630297
},
"qwen1_5_14b_chat.cc100-es": {
"vocab_size": 151646,
"n_bytes": 1664455,
"n_tokens": 434264,
"n_chars": 1630297
},
"qwen_1_8b_chat.cc100-es": {
"vocab_size": 151851,
"n_bytes": 1664455,
"n_tokens": 434264,
"n_chars": 1630297
},
"qwen_72b_chat.cc100-es": {
"vocab_size": 151851,
"n_bytes": 1664455,
"n_tokens": 434264,
"n_chars": 1630297
},
"qwen_7b_chat.cc100-es": {
"vocab_size": 151851,
"n_bytes": 1664455,
"n_tokens": 434264,
"n_chars": 1630297
},
"roberta_chinese_clue.cc100-es": {
"vocab_size": 8021,
"n_bytes": 1664455,
"n_tokens": 866564,
"n_chars": 1630297
},
"skywork_13b_base.cc100-es": {
"vocab_size": 65519,
"n_bytes": 1664455,
"n_tokens": 492211,
"n_chars": 1630297
},
"skywork_13b_math.cc100-es": {
"vocab_size": 65519,
"n_bytes": 1664455,
"n_tokens": 492211,
"n_chars": 1630297
},
"solar_10_7b.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 513915,
"n_chars": 1630297
},
"starchat_alpha.cc100-es": {
"vocab_size": 49156,
"n_bytes": 1664455,
"n_tokens": 530592,
"n_chars": 1630297
},
"switch_c_2048.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706400,
"n_chars": 1630297
},
"t5_base.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706400,
"n_chars": 1630297
},
"t5_large.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706400,
"n_chars": 1630297
},
"t5_small.cc100-es": {
"vocab_size": 32100,
"n_bytes": 1664455,
"n_tokens": 706400,
"n_chars": 1630297
},
"text_davinci_003.cc100-es": {
"vocab_size": 50281,
"n_bytes": 1664455,
"n_tokens": 569853,
"n_chars": 1630297
},
"tigerbot_13b_chat_v2.cc100-es": {
"vocab_size": 60515,
"n_bytes": 1664455,
"n_tokens": 482553,
"n_chars": 1630297
},
"tigerbot_70b_chat_v4_4k.cc100-es": {
"vocab_size": 65110,
"n_bytes": 1664455,
"n_tokens": 484099,
"n_chars": 1630297
},
"wizardcoder_15b_v1.cc100-es": {
"vocab_size": 49153,
"n_bytes": 1664455,
"n_tokens": 530592,
"n_chars": 1630297
},
"wizardcoder_python_7b_v1.cc100-es": {
"vocab_size": 32001,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"wizardlm_7b_v1.cc100-es": {
"vocab_size": 32001,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"wizardmath_70b_v1.cc100-es": {
"vocab_size": 32002,
"n_bytes": 1664455,
"n_tokens": 492235,
"n_chars": 1630297
},
"xlm_roberta.cc100-es": {
"vocab_size": 250002,
"n_bytes": 1664455,
"n_tokens": 399850,
"n_chars": 1630297
},
"yi_34b.cc100-es": {
"vocab_size": 64000,
"n_bytes": 1664455,
"n_tokens": 577018,
"n_chars": 1630297
},
"yi_6b.cc100-es": {
"vocab_size": 64000,
"n_bytes": 1664455,
"n_tokens": 577018,
"n_chars": 1630297
},
"yi_vl34b.cc100-es": {
"vocab_size": 64000,
"n_bytes": 1664455,
"n_tokens": 576794,
"n_chars": 1630297
},
"zephyr_7b_beta.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 513915,
"n_chars": 1630297
},
"aya_101.cc100-fr": {
"vocab_size": 250100,
"n_bytes": 1540504,
"n_tokens": 470944,
"n_chars": 1484970
},
"baichuan.cc100-fr": {
"vocab_size": 64000,
"n_bytes": 1540504,
"n_tokens": 540430,
"n_chars": 1484970
},
"baichuan2.cc100-fr": {
"vocab_size": 125696,
"n_bytes": 1540504,
"n_tokens": 512313,
"n_chars": 1484970
},
"bert_base_cased.cc100-fr": {
"vocab_size": 28996,
"n_bytes": 1540504,
"n_tokens": 583210,
"n_chars": 1484970
},
"bert_base_chinese.cc100-fr": {
"vocab_size": 21128,
"n_bytes": 1540504,
"n_tokens": 553134,
"n_chars": 1484970
},
"bert_base_uncased.cc100-fr": {
"vocab_size": 30522,
"n_bytes": 1540504,
"n_tokens": 504075,
"n_chars": 1484970
},
"bloom.cc100-fr": {
"vocab_size": 250680,
"n_bytes": 1540504,
"n_tokens": 321639,
"n_chars": 1484970
},
"byt5_small.cc100-fr": {
"vocab_size": 384,
"n_bytes": 1540504,
"n_tokens": 1550504,
"n_chars": 1484970
},
"character_glm_6b.cc100-fr": {
"vocab_size": 64789,
"n_bytes": 1540504,
"n_tokens": 515052,
"n_chars": 1484970
},
"chatglm2_6b.cc100-fr": {
"vocab_size": 64787,
"n_bytes": 1540504,
"n_tokens": 515028,
"n_chars": 1484970
},
"chatglm3_6b.cc100-fr": {
"vocab_size": 64796,
"n_bytes": 1540504,
"n_tokens": 515052,
"n_chars": 1484970
},
"chatglm_6b.cc100-fr": {
"vocab_size": 150344,
"n_bytes": 1540504,
"n_tokens": 499261,
"n_chars": 1484970
},
"chatyuan_large_v2.cc100-fr": {
"vocab_size": 32128,
"n_bytes": 1540504,
"n_tokens": 822012,
"n_chars": 1484970
},
"chinese_llama.cc100-fr": {
"vocab_size": 49953,
"n_bytes": 1540504,
"n_tokens": 450352,
"n_chars": 1484970
},
"chinese_llama2.cc100-fr": {
"vocab_size": 55296,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"code_davinci_002.cc100-fr": {
"vocab_size": 50281,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"crystal_coder.cc100-fr": {
"vocab_size": 32022,
"n_bytes": 1540504,
"n_tokens": 447243,
"n_chars": 1484970
},
"dbrx_instruct.cc100-fr": {
"vocab_size": 100280,
"n_bytes": 1540504,
"n_tokens": 412685,
"n_chars": 1484970
},
"deepseek_coder_33b_instruct.cc100-fr": {
"vocab_size": 32022,
"n_bytes": 1540504,
"n_tokens": 537538,
"n_chars": 1484970
},
"deepseek_llm_7b_base.cc100-fr": {
"vocab_size": 100015,
"n_bytes": 1540504,
"n_tokens": 507693,
"n_chars": 1484970
},
"falcon_180b.cc100-fr": {
"vocab_size": 65024,
"n_bytes": 1540504,
"n_tokens": 407853,
"n_chars": 1484970
},
"falcon_7b.cc100-fr": {
"vocab_size": 65024,
"n_bytes": 1540504,
"n_tokens": 407853,
"n_chars": 1484970
},
"fastchat_t5_3b.cc100-fr": {
"vocab_size": 32110,
"n_bytes": 1540504,
"n_tokens": 717675,
"n_chars": 1484970
},
"flan_t5_base.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476135,
"n_chars": 1484970
},
"gemma_7b.cc100-fr": {
"vocab_size": 256000,
"n_bytes": 1540504,
"n_tokens": 374551,
"n_chars": 1484970
},
"gpt2.cc100-fr": {
"vocab_size": 50257,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"gpt2_chinese.cc100-fr": {
"vocab_size": 21128,
"n_bytes": 1540504,
"n_tokens": 636442,
"n_chars": 1484970
},
"gpt_35_turbo.cc100-fr": {
"vocab_size": 100277,
"n_bytes": 1540504,
"n_tokens": 412685,
"n_chars": 1484970
},
"gpt_4.cc100-fr": {
"vocab_size": 100277,
"n_bytes": 1540504,
"n_tokens": 412685,
"n_chars": 1484970
},
"gpt_nexo_20b.cc100-fr": {
"vocab_size": 50277,
"n_bytes": 1540504,
"n_tokens": 458961,
"n_chars": 1484970
},
"grok_1.cc100-fr": {
"vocab_size": 131072,
"n_bytes": 1540504,
"n_tokens": 428298,
"n_chars": 1484970
},
"internlm2_chat_7b.cc100-fr": {
"vocab_size": 92544,
"n_bytes": 1540504,
"n_tokens": 496629,
"n_chars": 1484970
},
"internlm2_math_7b.cc100-fr": {
"vocab_size": 92544,
"n_bytes": 1540504,
"n_tokens": 496629,
"n_chars": 1484970
},
"internlm_chat_7b.cc100-fr": {
"vocab_size": 103168,
"n_bytes": 1540504,
"n_tokens": 495045,
"n_chars": 1484970
},
"internlm_xcomposer_7b.cc100-fr": {
"vocab_size": 103168,
"n_bytes": 1540504,
"n_tokens": 495045,
"n_chars": 1484970
},
"jamba_v0_1.cc100-fr": {
"vocab_size": 65536,
"n_bytes": 1540504,
"n_tokens": 412899,
"n_chars": 1484970
},
"kplug.cc100-fr": {
"vocab_size": 10261,
"n_bytes": 1540504,
"n_tokens": 638107,
"n_chars": 1484970
},
"llama.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"llama2.cc100-fr": {
"vocab_size": 32001,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"llama3.cc100-fr": {
"vocab_size": 128256,
"n_bytes": 1540504,
"n_tokens": 412146,
"n_chars": 1484970
},
"mistral_7b.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 476666,
"n_chars": 1484970
},
"mixtral_8_7b.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 476666,
"n_chars": 1484970
},
"mobilebert_uncased.cc100-fr": {
"vocab_size": 30522,
"n_bytes": 1540504,
"n_tokens": 504075,
"n_chars": 1484970
},
"moss.cc100-fr": {
"vocab_size": 106072,
"n_bytes": 1540504,
"n_tokens": 515669,
"n_chars": 1484970
},
"mt5_large.cc100-fr": {
"vocab_size": 250100,
"n_bytes": 1540504,
"n_tokens": 470944,
"n_chars": 1484970
},
"olmo_7b.cc100-fr": {
"vocab_size": 50280,
"n_bytes": 1540504,
"n_tokens": 458961,
"n_chars": 1484970
},
"orion_14b_chat.cc100-fr": {
"vocab_size": 84608,
"n_bytes": 1540504,
"n_tokens": 564107,
"n_chars": 1484970
},
"phi_1.cc100-fr": {
"vocab_size": 50295,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"phi_2.cc100-fr": {
"vocab_size": 50295,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"phi_3_mini.cc100-fr": {
"vocab_size": 32011,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"pko_t5_large.cc100-fr": {
"vocab_size": 50358,
"n_bytes": 1540504,
"n_tokens": 1044665,
"n_chars": 1484970
},
"prompt_clue.cc100-fr": {
"vocab_size": 32128,
"n_bytes": 1540504,
"n_tokens": 822012,
"n_chars": 1484970
},
"qwen1_5_14b_chat.cc100-fr": {
"vocab_size": 151646,
"n_bytes": 1540504,
"n_tokens": 413637,
"n_chars": 1484970
},
"qwen_1_8b_chat.cc100-fr": {
"vocab_size": 151851,
"n_bytes": 1540504,
"n_tokens": 413637,
"n_chars": 1484970
},
"qwen_72b_chat.cc100-fr": {
"vocab_size": 151851,
"n_bytes": 1540504,
"n_tokens": 413637,
"n_chars": 1484970
},
"qwen_7b_chat.cc100-fr": {
"vocab_size": 151851,
"n_bytes": 1540504,
"n_tokens": 413637,
"n_chars": 1484970
},
"roberta_chinese_clue.cc100-fr": {
"vocab_size": 8021,
"n_bytes": 1540504,
"n_tokens": 787363,
"n_chars": 1484970
},
"skywork_13b_base.cc100-fr": {
"vocab_size": 65519,
"n_bytes": 1540504,
"n_tokens": 457233,
"n_chars": 1484970
},
"skywork_13b_math.cc100-fr": {
"vocab_size": 65519,
"n_bytes": 1540504,
"n_tokens": 457233,
"n_chars": 1484970
},
"solar_10_7b.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 476666,
"n_chars": 1484970
},
"starchat_alpha.cc100-fr": {
"vocab_size": 49156,
"n_bytes": 1540504,
"n_tokens": 509958,
"n_chars": 1484970
},
"switch_c_2048.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476133,
"n_chars": 1484970
},
"t5_base.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476133,
"n_chars": 1484970
},
"t5_large.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476133,
"n_chars": 1484970
},
"t5_small.cc100-fr": {
"vocab_size": 32100,
"n_bytes": 1540504,
"n_tokens": 476133,
"n_chars": 1484970
},
"text_davinci_003.cc100-fr": {
"vocab_size": 50281,
"n_bytes": 1540504,
"n_tokens": 521776,
"n_chars": 1484970
},
"tigerbot_13b_chat_v2.cc100-fr": {
"vocab_size": 60515,
"n_bytes": 1540504,
"n_tokens": 447372,
"n_chars": 1484970
},
"tigerbot_70b_chat_v4_4k.cc100-fr": {
"vocab_size": 65110,
"n_bytes": 1540504,
"n_tokens": 448567,
"n_chars": 1484970
},
"wizardcoder_15b_v1.cc100-fr": {
"vocab_size": 49153,
"n_bytes": 1540504,
"n_tokens": 509958,
"n_chars": 1484970
},
"wizardcoder_python_7b_v1.cc100-fr": {
"vocab_size": 32001,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"wizardlm_7b_v1.cc100-fr": {
"vocab_size": 32001,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"wizardmath_70b_v1.cc100-fr": {
"vocab_size": 32002,
"n_bytes": 1540504,
"n_tokens": 457243,
"n_chars": 1484970
},
"xlm_roberta.cc100-fr": {
"vocab_size": 250002,
"n_bytes": 1540504,
"n_tokens": 405041,
"n_chars": 1484970
},
"yi_34b.cc100-fr": {
"vocab_size": 64000,
"n_bytes": 1540504,
"n_tokens": 533106,
"n_chars": 1484970
},
"yi_6b.cc100-fr": {
"vocab_size": 64000,
"n_bytes": 1540504,
"n_tokens": 533106,
"n_chars": 1484970
},
"yi_vl34b.cc100-fr": {
"vocab_size": 64000,
"n_bytes": 1540504,
"n_tokens": 532288,
"n_chars": 1484970
},
"zephyr_7b_beta.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 476666,
"n_chars": 1484970
},
"gpt_neox_japanese_2_7b.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 1121413,
"n_chars": 1121360
},
"gpt_neox_japanese_2_7b.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 1049033,
"n_chars": 927311
},
"aya_101.cc100-ja": {
"vocab_size": 250100,
"n_bytes": 1774770,
"n_tokens": 300542,
"n_chars": 603065
},
"baichuan.cc100-ja": {
"vocab_size": 64000,
"n_bytes": 1774770,
"n_tokens": 591656,
"n_chars": 603065
},
"baichuan2.cc100-ja": {
"vocab_size": 125696,
"n_bytes": 1774770,
"n_tokens": 554936,
"n_chars": 603065
},
"bert_base_cased.cc100-ja": {
"vocab_size": 28996,
"n_bytes": 1774770,
"n_tokens": 410492,
"n_chars": 603065
},
"bert_base_chinese.cc100-ja": {
"vocab_size": 21128,
"n_bytes": 1774770,
"n_tokens": 396831,
"n_chars": 603065
},
"bert_base_uncased.cc100-ja": {
"vocab_size": 30522,
"n_bytes": 1774770,
"n_tokens": 580634,
"n_chars": 603065
},
"bloom.cc100-ja": {
"vocab_size": 250680,
"n_bytes": 1774770,
"n_tokens": 523592,
"n_chars": 603065
},
"byt5_small.cc100-ja": {
"vocab_size": 384,
"n_bytes": 1774770,
"n_tokens": 1784770,
"n_chars": 603065
},
"aya_101.cc100-ar": {
"vocab_size": 250100,
"n_bytes": 2813283,
"n_tokens": 631736,
"n_chars": 1560987
},
"baichuan.cc100-ar": {
"vocab_size": 64000,
"n_bytes": 2813283,
"n_tokens": 1422976,
"n_chars": 1560987
},
"baichuan2.cc100-ar": {
"vocab_size": 125696,
"n_bytes": 2813283,
"n_tokens": 1337285,
"n_chars": 1560987
},
"bert_base_cased.cc100-ar": {
"vocab_size": 28996,
"n_bytes": 2813283,
"n_tokens": 1232449,
"n_chars": 1560987
},
"bert_base_chinese.cc100-ar": {
"vocab_size": 21128,
"n_bytes": 2813283,
"n_tokens": 536389,
"n_chars": 1560987
},
"bert_base_uncased.cc100-ar": {
"vocab_size": 30522,
"n_bytes": 2813283,
"n_tokens": 1269370,
"n_chars": 1560987
},
"bloom.cc100-ar": {
"vocab_size": 250680,
"n_bytes": 2813283,
"n_tokens": 427489,
"n_chars": 1560987
},
"byt5_small.cc100-ar": {
"vocab_size": 384,
"n_bytes": 2813283,
"n_tokens": 2823283,
"n_chars": 1560987
},
"character_glm_6b.cc100-ar": {
"vocab_size": 64789,
"n_bytes": 2813283,
"n_tokens": 1441847,
"n_chars": 1560987
},
"chatglm2_6b.cc100-ar": {
"vocab_size": 64787,
"n_bytes": 2813283,
"n_tokens": 1441847,
"n_chars": 1560987
},
"chatglm3_6b.cc100-ar": {
"vocab_size": 64796,
"n_bytes": 2813283,
"n_tokens": 1441847,
"n_chars": 1560987
},
"chatglm_6b.cc100-ar": {
"vocab_size": 150344,
"n_bytes": 2813283,
"n_tokens": 1097200,
"n_chars": 1560987
},
"chatyuan_large_v2.cc100-ar": {
"vocab_size": 32128,
"n_bytes": 2813283,
"n_tokens": 1006313,
"n_chars": 1560987
},
"chinese_llama.cc100-ar": {
"vocab_size": 49953,
"n_bytes": 2813283,
"n_tokens": 1421625,
"n_chars": 1560987
},
"chinese_llama2.cc100-ar": {
"vocab_size": 55296,
"n_bytes": 2813283,
"n_tokens": 1432081,
"n_chars": 1560987
},
"code_davinci_002.cc100-ar": {
"vocab_size": 50281,
"n_bytes": 2813283,
"n_tokens": 1558111,
"n_chars": 1560987
},
"crystal_coder.cc100-ar": {
"vocab_size": 32022,
"n_bytes": 2813283,
"n_tokens": 1422081,
"n_chars": 1560987
},
"dbrx_instruct.cc100-ar": {
"vocab_size": 100280,
"n_bytes": 2813283,
"n_tokens": 1105640,
"n_chars": 1560987
},
"deepseek_coder_33b_instruct.cc100-ar": {
"vocab_size": 32022,
"n_bytes": 2813283,
"n_tokens": 1958863,
"n_chars": 1560987
},
"deepseek_llm_7b_base.cc100-ar": {
"vocab_size": 100015,
"n_bytes": 2813283,
"n_tokens": 1426103,
"n_chars": 1560987
},
"falcon_180b.cc100-ar": {
"vocab_size": 65024,
"n_bytes": 2813283,
"n_tokens": 1597443,
"n_chars": 1560987
},
"falcon_7b.cc100-ar": {
"vocab_size": 65024,
"n_bytes": 2813283,
"n_tokens": 1597443,
"n_chars": 1560987
},
"fastchat_t5_3b.cc100-ar": {
"vocab_size": 32110,
"n_bytes": 2813283,
"n_tokens": 832267,
"n_chars": 1560987
},
"flan_t5_base.cc100-ar": {
"vocab_size": 32100,
"n_bytes": 2813283,
"n_tokens": 568957,
"n_chars": 1560987
},
"gemma_7b.cc100-ar": {
"vocab_size": 256000,
"n_bytes": 2813283,
"n_tokens": 573788,
"n_chars": 1560987
},
"gpt2.cc100-ar": {
"vocab_size": 50257,
"n_bytes": 2813283,
"n_tokens": 1558111,
"n_chars": 1560987
},
"gpt2_chinese.cc100-ar": {
"vocab_size": 21128,
"n_bytes": 2813283,
"n_tokens": 617677,
"n_chars": 1560987
},
"gpt_35_turbo.cc100-ar": {
"vocab_size": 100277,
"n_bytes": 2813283,
"n_tokens": 1105640,
"n_chars": 1560987
},
"gpt_4.cc100-ar": {
"vocab_size": 100277,
"n_bytes": 2813283,
"n_tokens": 1105640,
"n_chars": 1560987
},
"gpt_neox_japanese_2_7b.cc100-ar": {
"vocab_size": 32000,
"n_bytes": 2813283,
"n_tokens": 2809195,
"n_chars": 1560987
},
"gpt_nexo_20b.cc100-ar": {
"vocab_size": 50277,
"n_bytes": 2813283,
"n_tokens": 1106277,
"n_chars": 1560987
},
"grok_1.cc100-ar": {
"vocab_size": 131072,
"n_bytes": 2813283,
"n_tokens": 1392088,
"n_chars": 1560987
},
"internlm2_chat_7b.cc100-ar": {
"vocab_size": 92544,
"n_bytes": 2813283,
"n_tokens": 1635378,
"n_chars": 1560987
},
"internlm2_math_7b.cc100-ar": {
"vocab_size": 92544,
"n_bytes": 2813283,
"n_tokens": 1635378,
"n_chars": 1560987
},
"internlm_chat_7b.cc100-ar": {
"vocab_size": 103168,
"n_bytes": 2813283,
"n_tokens": 532046,
"n_chars": 1560987
},
"internlm_xcomposer_7b.cc100-ar": {
"vocab_size": 103168,
"n_bytes": 2813283,
"n_tokens": 532046,
"n_chars": 1560987
},
"jamba_v0_1.cc100-ar": {
"vocab_size": 65536,
"n_bytes": 2813283,
"n_tokens": 727886,
"n_chars": 1560987
},
"kplug.cc100-ar": {
"vocab_size": 10261,
"n_bytes": 2813283,
"n_tokens": 331987,
"n_chars": 1560987
},
"llama.cc100-ar": {
"vocab_size": 32000,
"n_bytes": 2813283,
"n_tokens": 1432081,
"n_chars": 1560987
},
"llama2.cc100-ar": {
"vocab_size": 32001,
"n_bytes": 2813283,
"n_tokens": 1432081,
"n_chars": 1560987
},
"llama3.cc100-ar": {
"vocab_size": 128256,
"n_bytes": 2813283,
"n_tokens": 615514,
"n_chars": 1560987
},
"mistral_7b.cc100-ar": {
"vocab_size": 32000,
"n_bytes": 2813283,
"n_tokens": 1406319,
"n_chars": 1560987
},
"mixtral_8_7b.cc100-ar": {
"vocab_size": 32000,
"n_bytes": 2813283,
"n_tokens": 1406319,
"n_chars": 1560987
},
"mobilebert_uncased.cc100-ar": {
"vocab_size": 30522,
"n_bytes": 2813283,
"n_tokens": 1269370,
"n_chars": 1560987
},
"moss.cc100-ar": {
"vocab_size": 106072,
"n_bytes": 2813283,
"n_tokens": 1557671,
"n_chars": 1560987
},
"mt5_large.cc100-ar": {
"vocab_size": 250100,
"n_bytes": 2813283,
"n_tokens": 631736,
"n_chars": 1560987
},
"olmo_7b.cc100-ar": {
"vocab_size": 50280,
"n_bytes": 2813283,
"n_tokens": 1106277,
"n_chars": 1560987
},
"orion_14b_chat.cc100-ar": {
"vocab_size": 84608,
"n_bytes": 2813283,
"n_tokens": 1531053,
"n_chars": 1560987
},
"phi_1.cc100-ar": {
"vocab_size": 50295,
"n_bytes": 2813283,
"n_tokens": 1558111,
"n_chars": 1560987
},
"phi_2.cc100-ar": {
"vocab_size": 50295,
"n_bytes": 2813283,
"n_tokens": 1558111,
"n_chars": 1560987
},
"phi_3_mini.cc100-ar": {
"vocab_size": 32011,
"n_bytes": 2813283,
"n_tokens": 1432081,
"n_chars": 1560987
},
"pko_t5_large.cc100-ar": {
"vocab_size": 50358,
"n_bytes": 2813283,
"n_tokens": 2815586,
"n_chars": 1560987
},
"prompt_clue.cc100-ar": {
"vocab_size": 32128,
"n_bytes": 2813283,
"n_tokens": 1006313,
"n_chars": 1560987
},
"qwen1_5_14b_chat.cc100-ar": {
"vocab_size": 151646,
"n_bytes": 2813283,
"n_tokens": 614959,
"n_chars": 1560987
},
"qwen_1_8b_chat.cc100-ar": {
"vocab_size": 151851,
"n_bytes": 2813283,
"n_tokens": 614959,
"n_chars": 1560987
},
"qwen_72b_chat.cc100-ar": {
"vocab_size": 151851,
"n_bytes": 2813283,
"n_tokens": 614959,
"n_chars": 1560987
},
"qwen_7b_chat.cc100-ar": {
"vocab_size": 151851,
"n_bytes": 2813283,
"n_tokens": 614959,
"n_chars": 1560987
},
"roberta_chinese_clue.cc100-ar": {
"vocab_size": 8021,
"n_bytes": 2813283,
"n_tokens": 621762,
"n_chars": 1560987
},
"skywork_13b_base.cc100-ar": {
"vocab_size": 65519,
"n_bytes": 2813283,
"n_tokens": 1432065,
"n_chars": 1560987
},
"skywork_13b_math.cc100-ar": {
"vocab_size": 65519,
"n_bytes": 2813283,
"n_tokens": 1432065,
"n_chars": 1560987
},
"solar_10_7b.cc100-ar": {
"vocab_size": 32000,
"n_bytes": 2813283,
"n_tokens": 1406319,
"n_chars": 1560987
},
"starchat_alpha.cc100-ar": {
"vocab_size": 49156,
"n_bytes": 2813283,
"n_tokens": 1195640,
"n_chars": 1560987
},
"switch_c_2048.cc100-ar": {
"vocab_size": 32100,
"n_bytes": 2813283,
"n_tokens": 568855,
"n_chars": 1560987
},
"t5_base.cc100-ar": {
"vocab_size": 32100,
"n_bytes": 2813283,
"n_tokens": 568855,
"n_chars": 1560987
},
"t5_large.cc100-ar": {
"vocab_size": 32100,
"n_bytes": 2813283,
"n_tokens": 568855,
"n_chars": 1560987
},
"t5_small.cc100-ar": {
"vocab_size": 32100,
"n_bytes": 2813283,
"n_tokens": 568855,
"n_chars": 1560987
},
"text_davinci_003.cc100-ar": {
"vocab_size": 50281,
"n_bytes": 2813283,
"n_tokens": 1558111,
"n_chars": 1560987
},
"tigerbot_13b_chat_v2.cc100-ar": {
"vocab_size": 60515,
"n_bytes": 2813283,
"n_tokens": 1422070,
"n_chars": 1560987
},
"tigerbot_70b_chat_v4_4k.cc100-ar": {
"vocab_size": 65110,
"n_bytes": 2813283,
"n_tokens": 1422073,
"n_chars": 1560987
},
"wizardcoder_15b_v1.cc100-ar": {
"vocab_size": 49153,
"n_bytes": 2813283,
"n_tokens": 1195640,
"n_chars": 1560987
},
"wizardcoder_python_7b_v1.cc100-ar": {
"vocab_size": 32001,
"n_bytes": 2813283,
"n_tokens": 1432081,
"n_chars": 1560987
},
"wizardlm_7b_v1.cc100-ar": {
"vocab_size": 32001,
"n_bytes": 2813283,
"n_tokens": 1432081,
"n_chars": 1560987
},
"wizardmath_70b_v1.cc100-ar": {
"vocab_size": 32002,
"n_bytes": 2813283,
"n_tokens": 1432081,
"n_chars": 1560987
},
"xlm_roberta.cc100-ar": {
"vocab_size": 250002,
"n_bytes": 2813283,
"n_tokens": 518287,
"n_chars": 1560987
},
"yi_34b.cc100-ar": {
"vocab_size": 64000,
"n_bytes": 2813283,
"n_tokens": 1795801,
"n_chars": 1560987
},
"yi_6b.cc100-ar": {
"vocab_size": 64000,
"n_bytes": 2813283,
"n_tokens": 1795801,
"n_chars": 1560987
},
"yi_vl34b.cc100-ar": {
"vocab_size": 64000,
"n_bytes": 2813283,
"n_tokens": 1803957,
"n_chars": 1560987
},
"zephyr_7b_beta.cc100-ar": {
"vocab_size": 32000,
"n_bytes": 2813283,
"n_tokens": 1406319,
"n_chars": 1560987
},
"aya_101.cc100-de": {
"vocab_size": 250100,
"n_bytes": 1814876,
"n_tokens": 480418,
"n_chars": 1784021
},
"baichuan.cc100-de": {
"vocab_size": 64000,
"n_bytes": 1814876,
"n_tokens": 680512,
"n_chars": 1784021
},
"baichuan2.cc100-de": {
"vocab_size": 125696,
"n_bytes": 1814876,
"n_tokens": 628063,
"n_chars": 1784021
},
"bert_base_cased.cc100-de": {
"vocab_size": 28996,
"n_bytes": 1814876,
"n_tokens": 731093,
"n_chars": 1784021
},
"bert_base_chinese.cc100-de": {
"vocab_size": 21128,
"n_bytes": 1814876,
"n_tokens": 561246,
"n_chars": 1784021
},
"bert_base_uncased.cc100-de": {
"vocab_size": 30522,
"n_bytes": 1814876,
"n_tokens": 646485,
"n_chars": 1784021
},
"bloom.cc100-de": {
"vocab_size": 250680,
"n_bytes": 1814876,
"n_tokens": 541170,
"n_chars": 1784021
},
"byt5_small.cc100-de": {
"vocab_size": 384,
"n_bytes": 1814876,
"n_tokens": 1824876,
"n_chars": 1784021
},
"character_glm_6b.cc100-de": {
"vocab_size": 64789,
"n_bytes": 1814876,
"n_tokens": 639822,
"n_chars": 1784021
},
"chatglm2_6b.cc100-de": {
"vocab_size": 64787,
"n_bytes": 1814876,
"n_tokens": 639757,
"n_chars": 1784021
},
"chatglm3_6b.cc100-de": {
"vocab_size": 64796,
"n_bytes": 1814876,
"n_tokens": 639822,
"n_chars": 1784021
},
"chatglm_6b.cc100-de": {
"vocab_size": 150344,
"n_bytes": 1814876,
"n_tokens": 589464,
"n_chars": 1784021
},
"chatyuan_large_v2.cc100-de": {
"vocab_size": 32128,
"n_bytes": 1814876,
"n_tokens": 970463,
"n_chars": 1784021
},
"chinese_llama.cc100-de": {
"vocab_size": 49953,
"n_bytes": 1814876,
"n_tokens": 523859,
"n_chars": 1784021
},
"chinese_llama2.cc100-de": {
"vocab_size": 55296,
"n_bytes": 1814876,
"n_tokens": 537318,
"n_chars": 1784021
},
"code_davinci_002.cc100-de": {
"vocab_size": 50281,
"n_bytes": 1814876,
"n_tokens": 684666,
"n_chars": 1784021
},
"crystal_coder.cc100-de": {
"vocab_size": 32022,
"n_bytes": 1814876,
"n_tokens": 527320,
"n_chars": 1784021
},
"dbrx_instruct.cc100-de": {
"vocab_size": 100280,
"n_bytes": 1814876,
"n_tokens": 500870,
"n_chars": 1784021
},
"deepseek_coder_33b_instruct.cc100-de": {
"vocab_size": 32022,
"n_bytes": 1814876,
"n_tokens": 745618,
"n_chars": 1784021
},
"deepseek_llm_7b_base.cc100-de": {
"vocab_size": 100015,
"n_bytes": 1814876,
"n_tokens": 642573,
"n_chars": 1784021
},
"falcon_180b.cc100-de": {
"vocab_size": 65024,
"n_bytes": 1814876,
"n_tokens": 497054,
"n_chars": 1784021
},
"falcon_7b.cc100-de": {
"vocab_size": 65024,
"n_bytes": 1814876,
"n_tokens": 497054,
"n_chars": 1784021
},
"fastchat_t5_3b.cc100-de": {
"vocab_size": 32110,
"n_bytes": 1814876,
"n_tokens": 736989,
"n_chars": 1784021
},
"flan_t5_base.cc100-de": {
"vocab_size": 32100,
"n_bytes": 1814876,
"n_tokens": 480254,
"n_chars": 1784021
},
"gemma_7b.cc100-de": {
"vocab_size": 256000,
"n_bytes": 1814876,
"n_tokens": 416876,
"n_chars": 1784021
},
"gpt2.cc100-de": {
"vocab_size": 50257,
"n_bytes": 1814876,
"n_tokens": 684669,
"n_chars": 1784021
},
"gpt2_chinese.cc100-de": {
"vocab_size": 21128,
"n_bytes": 1814876,
"n_tokens": 786497,
"n_chars": 1784021
},
"gpt_35_turbo.cc100-de": {
"vocab_size": 100277,
"n_bytes": 1814876,
"n_tokens": 500870,
"n_chars": 1784021
},
"gpt_4.cc100-de": {
"vocab_size": 100277,
"n_bytes": 1814876,
"n_tokens": 500870,
"n_chars": 1784021
},
"gpt_neox_japanese_2_7b.cc100-de": {
"vocab_size": 32000,
"n_bytes": 1814876,
"n_tokens": 1807780,
"n_chars": 1784021
},
"gpt_nexo_20b.cc100-de": {
"vocab_size": 50277,
"n_bytes": 1814876,
"n_tokens": 583628,
"n_chars": 1784021
},
"grok_1.cc100-de": {
"vocab_size": 131072,
"n_bytes": 1814876,
"n_tokens": 505220,
"n_chars": 1784021
},
"internlm2_chat_7b.cc100-de": {
"vocab_size": 92544,
"n_bytes": 1814876,
"n_tokens": 583917,
"n_chars": 1784021
},
"internlm2_math_7b.cc100-de": {
"vocab_size": 92544,
"n_bytes": 1814876,
"n_tokens": 583917,
"n_chars": 1784021
},
"internlm_chat_7b.cc100-de": {
"vocab_size": 103168,
"n_bytes": 1814876,
"n_tokens": 580489,
"n_chars": 1784021
},
"internlm_xcomposer_7b.cc100-de": {
"vocab_size": 103168,
"n_bytes": 1814876,
"n_tokens": 580489,
"n_chars": 1784021
},
"jamba_v0_1.cc100-de": {
"vocab_size": 65536,
"n_bytes": 1814876,
"n_tokens": 535856,
"n_chars": 1784021
},
"kplug.cc100-de": {
"vocab_size": 10261,
"n_bytes": 1814876,
"n_tokens": 789053,
"n_chars": 1784021
},
"llama.cc100-de": {
"vocab_size": 32000,
"n_bytes": 1814876,
"n_tokens": 537320,
"n_chars": 1784021
},
"llama2.cc100-de": {
"vocab_size": 32001,
"n_bytes": 1814876,
"n_tokens": 537320,
"n_chars": 1784021
},
"llama3.cc100-de": {
"vocab_size": 128256,
"n_bytes": 1814876,
"n_tokens": 499766,
"n_chars": 1784021
},
"mistral_7b.cc100-de": {
"vocab_size": 32000,
"n_bytes": 1814876,
"n_tokens": 577526,
"n_chars": 1784021
},
"mixtral_8_7b.cc100-de": {
"vocab_size": 32000,
"n_bytes": 1814876,
"n_tokens": 577526,
"n_chars": 1784021
},
"mobilebert_uncased.cc100-de": {
"vocab_size": 30522,
"n_bytes": 1814876,
"n_tokens": 646485,
"n_chars": 1784021
},
"moss.cc100-de": {
"vocab_size": 106072,
"n_bytes": 1814876,
"n_tokens": 683401,
"n_chars": 1784021
},
"mt5_large.cc100-de": {
"vocab_size": 250100,
"n_bytes": 1814876,
"n_tokens": 480418,
"n_chars": 1784021
},
"olmo_7b.cc100-de": {
"vocab_size": 50280,
"n_bytes": 1814876,
"n_tokens": 583628,
"n_chars": 1784021
},
"orion_14b_chat.cc100-de": {
"vocab_size": 84608,
"n_bytes": 1814876,
"n_tokens": 744404,
"n_chars": 1784021
},
"phi_1.cc100-de": {
"vocab_size": 50295,
"n_bytes": 1814876,
"n_tokens": 684665,
"n_chars": 1784021
},
"phi_2.cc100-de": {
"vocab_size": 50295,
"n_bytes": 1814876,
"n_tokens": 684665,
"n_chars": 1784021
},
"phi_3_mini.cc100-de": {
"vocab_size": 32011,
"n_bytes": 1814876,
"n_tokens": 537320,
"n_chars": 1784021
},
"pko_t5_large.cc100-de": {
"vocab_size": 50358,
"n_bytes": 1814876,
"n_tokens": 1254350,
"n_chars": 1784021
},
"prompt_clue.cc100-de": {
"vocab_size": 32128,
"n_bytes": 1814876,
"n_tokens": 970463,
"n_chars": 1784021
},
"qwen1_5_14b_chat.cc100-de": {
"vocab_size": 151646,
"n_bytes": 1814876,
"n_tokens": 503561,
"n_chars": 1784021
},
"qwen_1_8b_chat.cc100-de": {
"vocab_size": 151851,
"n_bytes": 1814876,
"n_tokens": 503561,
"n_chars": 1784021
},
"qwen_72b_chat.cc100-de": {
"vocab_size": 151851,
"n_bytes": 1814876,
"n_tokens": 503561,
"n_chars": 1784021
},
"qwen_7b_chat.cc100-de": {
"vocab_size": 151851,
"n_bytes": 1814876,
"n_tokens": 503561,
"n_chars": 1784021
},
"roberta_chinese_clue.cc100-de": {
"vocab_size": 8021,
"n_bytes": 1814876,
"n_tokens": 915612,
"n_chars": 1784021
},
"skywork_13b_base.cc100-de": {
"vocab_size": 65519,
"n_bytes": 1814876,
"n_tokens": 537308,
"n_chars": 1784021
},
"skywork_13b_math.cc100-de": {
"vocab_size": 65519,
"n_bytes": 1814876,
"n_tokens": 537308,
"n_chars": 1784021
},
"solar_10_7b.cc100-de": {
"vocab_size": 32000,
"n_bytes": 1814876,
"n_tokens": 577526,
"n_chars": 1784021
},
"starchat_alpha.cc100-de": {
"vocab_size": 49156,
"n_bytes": 1814876,
"n_tokens": 620541,
"n_chars": 1784021
},
"switch_c_2048.cc100-de": {
"vocab_size": 32100,
"n_bytes": 1814876,
"n_tokens": 480254,
"n_chars": 1784021
},
"t5_base.cc100-de": {
"vocab_size": 32100,
"n_bytes": 1814876,
"n_tokens": 480254,
"n_chars": 1784021
},
"t5_large.cc100-de": {
"vocab_size": 32100,
"n_bytes": 1814876,
"n_tokens": 480254,
"n_chars": 1784021
},
"t5_small.cc100-de": {
"vocab_size": 32100,
"n_bytes": 1814876,
"n_tokens": 480254,
"n_chars": 1784021
},
"text_davinci_003.cc100-de": {
"vocab_size": 50281,
"n_bytes": 1814876,
"n_tokens": 684666,
"n_chars": 1784021
},
"tigerbot_13b_chat_v2.cc100-de": {
"vocab_size": 60515,
"n_bytes": 1814876,
"n_tokens": 528918,
"n_chars": 1784021
},
"tigerbot_70b_chat_v4_4k.cc100-de": {
"vocab_size": 65110,
"n_bytes": 1814876,
"n_tokens": 529170,
"n_chars": 1784021
},
"wizardcoder_15b_v1.cc100-de": {
"vocab_size": 49153,
"n_bytes": 1814876,
"n_tokens": 620541,
"n_chars": 1784021
},
"wizardcoder_python_7b_v1.cc100-de": {
"vocab_size": 32001,
"n_bytes": 1814876,
"n_tokens": 537320,
"n_chars": 1784021
},
"wizardlm_7b_v1.cc100-de": {
"vocab_size": 32001,
"n_bytes": 1814876,
"n_tokens": 537320,
"n_chars": 1784021
},
"wizardmath_70b_v1.cc100-de": {
"vocab_size": 32002,
"n_bytes": 1814876,
"n_tokens": 537320,
"n_chars": 1784021
},
"xlm_roberta.cc100-de": {
"vocab_size": 250002,
"n_bytes": 1814876,
"n_tokens": 432571,
"n_chars": 1784021
},
"yi_34b.cc100-de": {
"vocab_size": 64000,
"n_bytes": 1814876,
"n_tokens": 698366,
"n_chars": 1784021
},
"yi_6b.cc100-de": {
"vocab_size": 64000,
"n_bytes": 1814876,
"n_tokens": 698366,
"n_chars": 1784021
},
"yi_vl34b.cc100-de": {
"vocab_size": 64000,
"n_bytes": 1814876,
"n_tokens": 697065,
"n_chars": 1784021
},
"zephyr_7b_beta.cc100-de": {
"vocab_size": 32000,
"n_bytes": 1814876,
"n_tokens": 577526,
"n_chars": 1784021
},
"gpt_neox_japanese_2_7b.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 1658946,
"n_chars": 1630297
},
"gpt_neox_japanese_2_7b.cc100-fr": {
"vocab_size": 32000,
"n_bytes": 1540504,
"n_tokens": 1524129,
"n_chars": 1484970
},
"character_glm_6b.cc100-ja": {
"vocab_size": 64789,
"n_bytes": 1774770,
"n_tokens": 601380,
"n_chars": 603065
},
"chatglm2_6b.cc100-ja": {
"vocab_size": 64787,
"n_bytes": 1774770,
"n_tokens": 601380,
"n_chars": 603065
},
"chatglm3_6b.cc100-ja": {
"vocab_size": 64796,
"n_bytes": 1774770,
"n_tokens": 601380,
"n_chars": 603065
},
"chatglm_6b.cc100-ja": {
"vocab_size": 150344,
"n_bytes": 1774770,
"n_tokens": 489930,
"n_chars": 603065
},
"chatyuan_large_v2.cc100-ja": {
"vocab_size": 32128,
"n_bytes": 1774770,
"n_tokens": 575118,
"n_chars": 603065
},
"chinese_llama.cc100-ja": {
"vocab_size": 49953,
"n_bytes": 1774770,
"n_tokens": 614177,
"n_chars": 603065
},
"chinese_llama2.cc100-ja": {
"vocab_size": 55296,
"n_bytes": 1774770,
"n_tokens": 624362,
"n_chars": 603065
},
"code_davinci_002.cc100-ja": {
"vocab_size": 50281,
"n_bytes": 1774770,
"n_tokens": 844362,
"n_chars": 603065
},
"crystal_coder.cc100-ja": {
"vocab_size": 32022,
"n_bytes": 1774770,
"n_tokens": 718461,
"n_chars": 603065
},
"dbrx_instruct.cc100-ja": {
"vocab_size": 100280,
"n_bytes": 1774770,
"n_tokens": 630348,
"n_chars": 603065
},
"deepseek_coder_33b_instruct.cc100-ja": {
"vocab_size": 32022,
"n_bytes": 1774770,
"n_tokens": 1018060,
"n_chars": 603065
},
"deepseek_llm_7b_base.cc100-ja": {
"vocab_size": 100015,
"n_bytes": 1774770,
"n_tokens": 761467,
"n_chars": 603065
},
"falcon_180b.cc100-ja": {
"vocab_size": 65024,
"n_bytes": 1774770,
"n_tokens": 842458,
"n_chars": 603065
},
"falcon_7b.cc100-ja": {
"vocab_size": 65024,
"n_bytes": 1774770,
"n_tokens": 842458,
"n_chars": 603065
},
"fastchat_t5_3b.cc100-ja": {
"vocab_size": 32110,
"n_bytes": 1774770,
"n_tokens": 53915,
"n_chars": 603065
},
"flan_t5_base.cc100-ja": {
"vocab_size": 32100,
"n_bytes": 1774770,
"n_tokens": 51999,
"n_chars": 603065
},
"gemma_7b.cc100-ja": {
"vocab_size": 256000,
"n_bytes": 1774770,
"n_tokens": 317873,
"n_chars": 603065
},
"gpt2.cc100-ja": {
"vocab_size": 50257,
"n_bytes": 1774770,
"n_tokens": 844362,
"n_chars": 603065
},
"gpt2_chinese.cc100-ja": {
"vocab_size": 21128,
"n_bytes": 1774770,
"n_tokens": 503085,
"n_chars": 603065
},
"gpt_35_turbo.cc100-ja": {
"vocab_size": 100277,
"n_bytes": 1774770,
"n_tokens": 630348,
"n_chars": 603065
},
"gpt_4.cc100-ja": {
"vocab_size": 100277,
"n_bytes": 1774770,
"n_tokens": 630348,
"n_chars": 603065
},
"gpt_neox_japanese_2_7b.cc100-ja": {
"vocab_size": 32000,
"n_bytes": 1774770,
"n_tokens": 410803,
"n_chars": 603065
},
"gpt_nexo_20b.cc100-ja": {
"vocab_size": 50277,
"n_bytes": 1774770,
"n_tokens": 605168,
"n_chars": 603065
},
"grok_1.cc100-ja": {
"vocab_size": 131072,
"n_bytes": 1774770,
"n_tokens": 497590,
"n_chars": 603065
},
"internlm2_chat_7b.cc100-ja": {
"vocab_size": 92544,
"n_bytes": 1774770,
"n_tokens": 595803,
"n_chars": 603065
},
"internlm2_math_7b.cc100-ja": {
"vocab_size": 92544,
"n_bytes": 1774770,
"n_tokens": 595803,
"n_chars": 603065
},
"internlm_chat_7b.cc100-ja": {
"vocab_size": 103168,
"n_bytes": 1774770,
"n_tokens": 448212,
"n_chars": 603065
},
"internlm_xcomposer_7b.cc100-ja": {
"vocab_size": 103168,
"n_bytes": 1774770,
"n_tokens": 448212,
"n_chars": 603065
},
"jamba_v0_1.cc100-ja": {
"vocab_size": 65536,
"n_bytes": 1774770,
"n_tokens": 683256,
"n_chars": 603065
},
"kplug.cc100-ja": {
"vocab_size": 10261,
"n_bytes": 1774770,
"n_tokens": 338023,
"n_chars": 603065
},
"llama.cc100-ja": {
"vocab_size": 32000,
"n_bytes": 1774770,
"n_tokens": 728461,
"n_chars": 603065
},
"llama2.cc100-ja": {
"vocab_size": 32001,
"n_bytes": 1774770,
"n_tokens": 728461,
"n_chars": 603065
},
"llama3.cc100-ja": {
"vocab_size": 128256,
"n_bytes": 1774770,
"n_tokens": 414715,
"n_chars": 603065
},
"mistral_7b.cc100-ja": {
"vocab_size": 32000,
"n_bytes": 1774770,
"n_tokens": 685134,
"n_chars": 603065
},
"mixtral_8_7b.cc100-ja": {
"vocab_size": 32000,
"n_bytes": 1774770,
"n_tokens": 685134,
"n_chars": 603065
},
"mobilebert_uncased.cc100-ja": {
"vocab_size": 30522,
"n_bytes": 1774770,
"n_tokens": 580634,
"n_chars": 603065
},
"moss.cc100-ja": {
"vocab_size": 106072,
"n_bytes": 1774770,
"n_tokens": 600011,
"n_chars": 603065
},
"mt5_large.cc100-ja": {
"vocab_size": 250100,
"n_bytes": 1774770,
"n_tokens": 300542,
"n_chars": 603065
},
"olmo_7b.cc100-ja": {
"vocab_size": 50280,
"n_bytes": 1774770,
"n_tokens": 605168,
"n_chars": 603065
},
"orion_14b_chat.cc100-ja": {
"vocab_size": 84608,
"n_bytes": 1774770,
"n_tokens": 324956,
"n_chars": 603065
},
"phi_1.cc100-ja": {
"vocab_size": 50295,
"n_bytes": 1774770,
"n_tokens": 844362,
"n_chars": 603065
},
"phi_2.cc100-ja": {
"vocab_size": 50295,
"n_bytes": 1774770,
"n_tokens": 844362,
"n_chars": 603065
},
"phi_3_mini.cc100-ja": {
"vocab_size": 32011,
"n_bytes": 1774770,
"n_tokens": 728461,
"n_chars": 603065
},
"pko_t5_large.cc100-ja": {
"vocab_size": 50358,
"n_bytes": 1774770,
"n_tokens": 1766950,
"n_chars": 603065
},
"prompt_clue.cc100-ja": {
"vocab_size": 32128,
"n_bytes": 1774770,
"n_tokens": 575118,
"n_chars": 603065
},
"qwen1_5_14b_chat.cc100-ja": {
"vocab_size": 151646,
"n_bytes": 1774770,
"n_tokens": 377144,
"n_chars": 603065
},
"qwen_1_8b_chat.cc100-ja": {
"vocab_size": 151851,
"n_bytes": 1774770,
"n_tokens": 377144,
"n_chars": 603065
},
"qwen_72b_chat.cc100-ja": {
"vocab_size": 151851,
"n_bytes": 1774770,
"n_tokens": 377144,
"n_chars": 603065
},
"qwen_7b_chat.cc100-ja": {
"vocab_size": 151851,
"n_bytes": 1774770,
"n_tokens": 377144,
"n_chars": 603065
},
"roberta_chinese_clue.cc100-ja": {
"vocab_size": 8021,
"n_bytes": 1774770,
"n_tokens": 339411,
"n_chars": 603065
},
"skywork_13b_base.cc100-ja": {
"vocab_size": 65519,
"n_bytes": 1774770,
"n_tokens": 603613,
"n_chars": 603065
},
"skywork_13b_math.cc100-ja": {
"vocab_size": 65519,
"n_bytes": 1774770,
"n_tokens": 603613,
"n_chars": 603065
},
"solar_10_7b.cc100-ja": {
"vocab_size": 32000,
"n_bytes": 1774770,
"n_tokens": 685134,
"n_chars": 603065
},
"starchat_alpha.cc100-ja": {
"vocab_size": 49156,
"n_bytes": 1774770,
"n_tokens": 546876,
"n_chars": 603065
},
"switch_c_2048.cc100-ja": {
"vocab_size": 32100,
"n_bytes": 1774770,
"n_tokens": 51947,
"n_chars": 603065
},
"t5_base.cc100-ja": {
"vocab_size": 32100,
"n_bytes": 1774770,
"n_tokens": 51947,
"n_chars": 603065
},
"t5_large.cc100-ja": {
"vocab_size": 32100,
"n_bytes": 1774770,
"n_tokens": 51947,
"n_chars": 603065
},
"t5_small.cc100-ja": {
"vocab_size": 32100,
"n_bytes": 1774770,
"n_tokens": 51947,
"n_chars": 603065
},
"text_davinci_003.cc100-ja": {
"vocab_size": 50281,
"n_bytes": 1774770,
"n_tokens": 844362,
"n_chars": 603065
},
"tigerbot_13b_chat_v2.cc100-ja": {
"vocab_size": 60515,
"n_bytes": 1774770,
"n_tokens": 567792,
"n_chars": 603065
},
"tigerbot_70b_chat_v4_4k.cc100-ja": {
"vocab_size": 65110,
"n_bytes": 1774770,
"n_tokens": 406571,
"n_chars": 603065
},
"wizardcoder_15b_v1.cc100-ja": {
"vocab_size": 49153,
"n_bytes": 1774770,
"n_tokens": 546876,
"n_chars": 603065
},
"wizardcoder_python_7b_v1.cc100-ja": {
"vocab_size": 32001,
"n_bytes": 1774770,
"n_tokens": 728461,
"n_chars": 603065
},
"wizardlm_7b_v1.cc100-ja": {
"vocab_size": 32001,
"n_bytes": 1774770,
"n_tokens": 728461,
"n_chars": 603065
},
"wizardmath_70b_v1.cc100-ja": {
"vocab_size": 32002,
"n_bytes": 1774770,
"n_tokens": 728461,
"n_chars": 603065
},
"xlm_roberta.cc100-ja": {
"vocab_size": 250002,
"n_bytes": 1774770,
"n_tokens": 344820,
"n_chars": 603065
},
"yi_34b.cc100-ja": {
"vocab_size": 64000,
"n_bytes": 1774770,
"n_tokens": 740791,
"n_chars": 603065
},
"yi_6b.cc100-ja": {
"vocab_size": 64000,
"n_bytes": 1774770,
"n_tokens": 740791,
"n_chars": 603065
},
"yi_vl34b.cc100-ja": {
"vocab_size": 64000,
"n_bytes": 1774770,
"n_tokens": 749927,
"n_chars": 603065
},
"zephyr_7b_beta.cc100-ja": {
"vocab_size": 32000,
"n_bytes": 1774770,
"n_tokens": 685134,
"n_chars": 603065
},
"llama_3_chinese_8b.cc100-ar": {
"vocab_size": 128256,
"n_bytes": 2813283,
"n_tokens": 625514,
"n_chars": 1560987
},
"llama_3_chinese_8b.cc100-de": {
"vocab_size": 128256,
"n_bytes": 1814876,
"n_tokens": 509766,
"n_chars": 1784021
},
"llama_3_chinese_8b.cc100-en": {
"vocab_size": 128256,
"n_bytes": 1124813,
"n_tokens": 264944,
"n_chars": 1121360
},
"llama_3_chinese_8b.cc100-es": {
"vocab_size": 128256,
"n_bytes": 1664455,
"n_tokens": 443289,
"n_chars": 1630297
},
"aya_101.cc100-fa": {
"vocab_size": 250100,
"n_bytes": 2054052,
"n_tokens": 429922,
"n_chars": 1145876
},
"baichuan.cc100-fa": {
"vocab_size": 64000,
"n_bytes": 2054052,
"n_tokens": 1142057,
"n_chars": 1145876
},
"baichuan2.cc100-fa": {
"vocab_size": 125696,
"n_bytes": 2054052,
"n_tokens": 1052077,
"n_chars": 1145876
},
"bert_base_cased.cc100-fa": {
"vocab_size": 28996,
"n_bytes": 2054052,
"n_tokens": 903078,
"n_chars": 1145876
},
"bert_base_chinese.cc100-fa": {
"vocab_size": 21128,
"n_bytes": 2054052,
"n_tokens": 396414,
"n_chars": 1145876
},
"bert_base_uncased.cc100-fa": {
"vocab_size": 30522,
"n_bytes": 2054052,
"n_tokens": 910783,
"n_chars": 1145876
},
"bloom.cc100-fa": {
"vocab_size": 250680,
"n_bytes": 2054052,
"n_tokens": 434406,
"n_chars": 1145876
},
"byt5_small.cc100-fa": {
"vocab_size": 384,
"n_bytes": 2054052,
"n_tokens": 2064052,
"n_chars": 1145876
},
"character_glm_6b.cc100-fa": {
"vocab_size": 64789,
"n_bytes": 2054052,
"n_tokens": 1165051,
"n_chars": 1145876
},
"chatglm2_6b.cc100-fa": {
"vocab_size": 64787,
"n_bytes": 2054052,
"n_tokens": 1165051,
"n_chars": 1145876
},
"chatglm3_6b.cc100-fa": {
"vocab_size": 64796,
"n_bytes": 2054052,
"n_tokens": 1165051,
"n_chars": 1145876
},
"chatglm_6b.cc100-fa": {
"vocab_size": 150344,
"n_bytes": 2054052,
"n_tokens": 910808,
"n_chars": 1145876
},
"chatyuan_large_v2.cc100-fa": {
"vocab_size": 32128,
"n_bytes": 2054052,
"n_tokens": 740377,
"n_chars": 1145876
},
"chinese_llama.cc100-fa": {
"vocab_size": 49953,
"n_bytes": 2054052,
"n_tokens": 1150750,
"n_chars": 1145876
},
"chinese_llama2.cc100-fa": {
"vocab_size": 55296,
"n_bytes": 2054052,
"n_tokens": 1155078,
"n_chars": 1145876
},
"code_davinci_002.cc100-fa": {
"vocab_size": 50281,
"n_bytes": 2054052,
"n_tokens": 1292300,
"n_chars": 1145876
},
"crystal_coder.cc100-fa": {
"vocab_size": 32022,
"n_bytes": 2054052,
"n_tokens": 1145076,
"n_chars": 1145876
},
"dbrx_instruct.cc100-fa": {
"vocab_size": 100280,
"n_bytes": 2054052,
"n_tokens": 818067,
"n_chars": 1145876
},
"deepseek_coder_33b_instruct.cc100-fa": {
"vocab_size": 32022,
"n_bytes": 2054052,
"n_tokens": 1326109,
"n_chars": 1145876
},
"deepseek_llm_7b_base.cc100-fa": {
"vocab_size": 100015,
"n_bytes": 2054052,
"n_tokens": 973451,
"n_chars": 1145876
},
"falcon_180b.cc100-fa": {
"vocab_size": 65024,
"n_bytes": 2054052,
"n_tokens": 1246580,
"n_chars": 1145876
},
"falcon_7b.cc100-fa": {
"vocab_size": 65024,
"n_bytes": 2054052,
"n_tokens": 1246580,
"n_chars": 1145876
},
"fastchat_t5_3b.cc100-fa": {
"vocab_size": 32110,
"n_bytes": 2054052,
"n_tokens": 712443,
"n_chars": 1145876
},
"flan_t5_base.cc100-fa": {
"vocab_size": 32100,
"n_bytes": 2054052,
"n_tokens": 493779,
"n_chars": 1145876
},
"gemma_7b.cc100-fa": {
"vocab_size": 256000,
"n_bytes": 2054052,
"n_tokens": 373762,
"n_chars": 1145876
},
"gpt2.cc100-fa": {
"vocab_size": 50257,
"n_bytes": 2054052,
"n_tokens": 1292300,
"n_chars": 1145876
},
"gpt2_chinese.cc100-fa": {
"vocab_size": 21128,
"n_bytes": 2054052,
"n_tokens": 406174,
"n_chars": 1145876
},
"gpt_35_turbo.cc100-fa": {
"vocab_size": 100277,
"n_bytes": 2054052,
"n_tokens": 818067,
"n_chars": 1145876
},
"gpt_4.cc100-fa": {
"vocab_size": 100277,
"n_bytes": 2054052,
"n_tokens": 818067,
"n_chars": 1145876
},
"gpt_neox_japanese_2_7b.cc100-fa": {
"vocab_size": 32000,
"n_bytes": 2054052,
"n_tokens": 2036715,
"n_chars": 1145876
},
"gpt_nexo_20b.cc100-fa": {
"vocab_size": 50277,
"n_bytes": 2054052,
"n_tokens": 866434,
"n_chars": 1145876
},
"grok_1.cc100-fa": {
"vocab_size": 131072,
"n_bytes": 2054052,
"n_tokens": 1073281,
"n_chars": 1145876
},
"internlm2_chat_7b.cc100-fa": {
"vocab_size": 92544,
"n_bytes": 2054052,
"n_tokens": 1195032,
"n_chars": 1145876
},
"internlm2_math_7b.cc100-fa": {
"vocab_size": 92544,
"n_bytes": 2054052,
"n_tokens": 1195032,
"n_chars": 1145876
},
"internlm_chat_7b.cc100-fa": {
"vocab_size": 103168,
"n_bytes": 2054052,
"n_tokens": 640945,
"n_chars": 1145876
},
"internlm_xcomposer_7b.cc100-fa": {
"vocab_size": 103168,
"n_bytes": 2054052,
"n_tokens": 640945,
"n_chars": 1145876
},
"jamba_v0_1.cc100-fa": {
"vocab_size": 65536,
"n_bytes": 2054052,
"n_tokens": 732550,
"n_chars": 1145876
},
"kplug.cc100-fa": {
"vocab_size": 10261,
"n_bytes": 2054052,
"n_tokens": 274671,
"n_chars": 1145876
},
"llama.cc100-fa": {
"vocab_size": 32000,
"n_bytes": 2054052,
"n_tokens": 1155076,
"n_chars": 1145876
},
"llama2.cc100-fa": {
"vocab_size": 32001,
"n_bytes": 2054052,
"n_tokens": 1155076,
"n_chars": 1145876
},
"llama3.cc100-fa": {
"vocab_size": 128256,
"n_bytes": 2054052,
"n_tokens": 387448,
"n_chars": 1145876
},
"llama_3_chinese_8b.cc100-fa": {
"vocab_size": 128256,
"n_bytes": 2054052,
"n_tokens": 397448,
"n_chars": 1145876
},
"mistral_7b.cc100-fa": {
"vocab_size": 32000,
"n_bytes": 2054052,
"n_tokens": 1133278,
"n_chars": 1145876
},
"mixtral_8_7b.cc100-fa": {
"vocab_size": 32000,
"n_bytes": 2054052,
"n_tokens": 1133278,
"n_chars": 1145876
},
"mobilebert_uncased.cc100-fa": {
"vocab_size": 30522,
"n_bytes": 2054052,
"n_tokens": 910783,
"n_chars": 1145876
},
"moss.cc100-fa": {
"vocab_size": 106072,
"n_bytes": 2054052,
"n_tokens": 1285426,
"n_chars": 1145876
},
"mt5_large.cc100-fa": {
"vocab_size": 250100,
"n_bytes": 2054052,
"n_tokens": 429922,
"n_chars": 1145876
},
"olmo_7b.cc100-fa": {
"vocab_size": 50280,
"n_bytes": 2054052,
"n_tokens": 866434,
"n_chars": 1145876
},
"orion_14b_chat.cc100-fa": {
"vocab_size": 84608,
"n_bytes": 2054052,
"n_tokens": 1131108,
"n_chars": 1145876
},
"phi_1.cc100-fa": {
"vocab_size": 50295,
"n_bytes": 2054052,
"n_tokens": 1292300,
"n_chars": 1145876
},
"phi_2.cc100-fa": {
"vocab_size": 50295,
"n_bytes": 2054052,
"n_tokens": 1292300,
"n_chars": 1145876
},
"phi_3_mini.cc100-fa": {
"vocab_size": 32011,
"n_bytes": 2054052,
"n_tokens": 1155076,
"n_chars": 1145876
},
"pko_t5_large.cc100-fa": {
"vocab_size": 50358,
"n_bytes": 2054052,
"n_tokens": 2061040,
"n_chars": 1145876
},
"prompt_clue.cc100-fa": {
"vocab_size": 32128,
"n_bytes": 2054052,
"n_tokens": 740377,
"n_chars": 1145876
},
"qwen1_5_14b_chat.cc100-fa": {
"vocab_size": 151646,
"n_bytes": 2054052,
"n_tokens": 643421,
"n_chars": 1145876
},
"qwen_1_8b_chat.cc100-fa": {
"vocab_size": 151851,
"n_bytes": 2054052,
"n_tokens": 643421,
"n_chars": 1145876
},
"qwen_72b_chat.cc100-fa": {
"vocab_size": 151851,
"n_bytes": 2054052,
"n_tokens": 643421,
"n_chars": 1145876
},
"qwen_7b_chat.cc100-fa": {
"vocab_size": 151851,
"n_bytes": 2054052,
"n_tokens": 643421,
"n_chars": 1145876
},
"roberta_chinese_clue.cc100-fa": {
"vocab_size": 8021,
"n_bytes": 2054052,
"n_tokens": 407763,
"n_chars": 1145876
},
"skywork_13b_base.cc100-fa": {
"vocab_size": 65519,
"n_bytes": 2054052,
"n_tokens": 1155072,
"n_chars": 1145876
},
"skywork_13b_math.cc100-fa": {
"vocab_size": 65519,
"n_bytes": 2054052,
"n_tokens": 1155072,
"n_chars": 1145876
},
"solar_10_7b.cc100-fa": {
"vocab_size": 32000,
"n_bytes": 2054052,
"n_tokens": 1133278,
"n_chars": 1145876
},
"starchat_alpha.cc100-fa": {
"vocab_size": 49156,
"n_bytes": 2054052,
"n_tokens": 851630,
"n_chars": 1145876
},
"switch_c_2048.cc100-fa": {
"vocab_size": 32100,
"n_bytes": 2054052,
"n_tokens": 493767,
"n_chars": 1145876
},
"t5_base.cc100-fa": {
"vocab_size": 32100,
"n_bytes": 2054052,
"n_tokens": 493767,
"n_chars": 1145876
},
"t5_large.cc100-fa": {
"vocab_size": 32100,
"n_bytes": 2054052,
"n_tokens": 493767,
"n_chars": 1145876
},
"t5_small.cc100-fa": {
"vocab_size": 32100,
"n_bytes": 2054052,
"n_tokens": 493767,
"n_chars": 1145876
},
"text_davinci_003.cc100-fa": {
"vocab_size": 50281,
"n_bytes": 2054052,
"n_tokens": 1292300,
"n_chars": 1145876
},
"tigerbot_13b_chat_v2.cc100-fa": {
"vocab_size": 60515,
"n_bytes": 2054052,
"n_tokens": 1145046,
"n_chars": 1145876
},
"tigerbot_70b_chat_v4_4k.cc100-fa": {
"vocab_size": 65110,
"n_bytes": 2054052,
"n_tokens": 1145048,
"n_chars": 1145876
},
"wizardcoder_15b_v1.cc100-fa": {
"vocab_size": 49153,
"n_bytes": 2054052,
"n_tokens": 851630,
"n_chars": 1145876
},
"wizardcoder_python_7b_v1.cc100-fa": {
"vocab_size": 32001,
"n_bytes": 2054052,
"n_tokens": 1155076,
"n_chars": 1145876
},
"wizardlm_7b_v1.cc100-fa": {
"vocab_size": 32001,
"n_bytes": 2054052,
"n_tokens": 1155076,
"n_chars": 1145876
},
"wizardmath_70b_v1.cc100-fa": {
"vocab_size": 32002,
"n_bytes": 2054052,
"n_tokens": 1155076,
"n_chars": 1145876
},
"xlm_roberta.cc100-fa": {
"vocab_size": 250002,
"n_bytes": 2054052,
"n_tokens": 330926,
"n_chars": 1145876
},
"yi_34b.cc100-fa": {
"vocab_size": 64000,
"n_bytes": 2054052,
"n_tokens": 1337264,
"n_chars": 1145876
},
"yi_6b.cc100-fa": {
"vocab_size": 64000,
"n_bytes": 2054052,
"n_tokens": 1337264,
"n_chars": 1145876
},
"yi_vl34b.cc100-fa": {
"vocab_size": 64000,
"n_bytes": 2054052,
"n_tokens": 1346819,
"n_chars": 1145876
},
"zephyr_7b_beta.cc100-fa": {
"vocab_size": 32000,
"n_bytes": 2054052,
"n_tokens": 1133278,
"n_chars": 1145876
},
"llama_3_chinese_8b.cc100-fr": {
"vocab_size": 128256,
"n_bytes": 1540504,
"n_tokens": 422146,
"n_chars": 1484970
},
"llama_3_chinese_8b.cc100-ja": {
"vocab_size": 128256,
"n_bytes": 1774770,
"n_tokens": 424715,
"n_chars": 603065
},
"aya_101.cc100-ko": {
"vocab_size": 250100,
"n_bytes": 1524839,
"n_tokens": 434586,
"n_chars": 655190
},
"baichuan.cc100-ko": {
"vocab_size": 64000,
"n_bytes": 1524839,
"n_tokens": 639258,
"n_chars": 655190
},
"baichuan2.cc100-ko": {
"vocab_size": 125696,
"n_bytes": 1524839,
"n_tokens": 623358,
"n_chars": 655190
},
"bert_base_cased.cc100-ko": {
"vocab_size": 28996,
"n_bytes": 1524839,
"n_tokens": 222828,
"n_chars": 655190
},
"bert_base_chinese.cc100-ko": {
"vocab_size": 21128,
"n_bytes": 1524839,
"n_tokens": 219752,
"n_chars": 655190
},
"bert_base_uncased.cc100-ko": {
"vocab_size": 30522,
"n_bytes": 1524839,
"n_tokens": 904756,
"n_chars": 655190
},
"bloom.cc100-ko": {
"vocab_size": 250680,
"n_bytes": 1524839,
"n_tokens": 742111,
"n_chars": 655190
},
"byt5_small.cc100-ko": {
"vocab_size": 384,
"n_bytes": 1524839,
"n_tokens": 1534839,
"n_chars": 655190
},
"character_glm_6b.cc100-ko": {
"vocab_size": 64789,
"n_bytes": 1524839,
"n_tokens": 672160,
"n_chars": 655190
},
"chatglm2_6b.cc100-ko": {
"vocab_size": 64787,
"n_bytes": 1524839,
"n_tokens": 672156,
"n_chars": 655190
},
"chatglm3_6b.cc100-ko": {
"vocab_size": 64796,
"n_bytes": 1524839,
"n_tokens": 672160,
"n_chars": 655190
},
"chatglm_6b.cc100-ko": {
"vocab_size": 150344,
"n_bytes": 1524839,
"n_tokens": 939630,
"n_chars": 655190
},
"chatyuan_large_v2.cc100-ko": {
"vocab_size": 32128,
"n_bytes": 1524839,
"n_tokens": 354411,
"n_chars": 655190
},
"chinese_llama.cc100-ko": {
"vocab_size": 49953,
"n_bytes": 1524839,
"n_tokens": 913553,
"n_chars": 655190
},
"chinese_llama2.cc100-ko": {
"vocab_size": 55296,
"n_bytes": 1524839,
"n_tokens": 963427,
"n_chars": 655190
},
"code_davinci_002.cc100-ko": {
"vocab_size": 50281,
"n_bytes": 1524839,
"n_tokens": 1308993,
"n_chars": 655190
},
"crystal_coder.cc100-ko": {
"vocab_size": 32022,
"n_bytes": 1524839,
"n_tokens": 954428,
"n_chars": 655190
},
"dbrx_instruct.cc100-ko": {
"vocab_size": 100280,
"n_bytes": 1524839,
"n_tokens": 652277,
"n_chars": 655190
},
"deepseek_coder_33b_instruct.cc100-ko": {
"vocab_size": 32022,
"n_bytes": 1524839,
"n_tokens": 1454805,
"n_chars": 655190
},
"deepseek_llm_7b_base.cc100-ko": {
"vocab_size": 100015,
"n_bytes": 1524839,
"n_tokens": 1081983,
"n_chars": 655190
},
"falcon_180b.cc100-ko": {
"vocab_size": 65024,
"n_bytes": 1524839,
"n_tokens": 1330568,
"n_chars": 655190
},
"falcon_7b.cc100-ko": {
"vocab_size": 65024,
"n_bytes": 1524839,
"n_tokens": 1330568,
"n_chars": 655190
},
"fastchat_t5_3b.cc100-ko": {
"vocab_size": 32110,
"n_bytes": 1524839,
"n_tokens": 484953,
"n_chars": 655190
},
"flan_t5_base.cc100-ko": {
"vocab_size": 32100,
"n_bytes": 1524839,
"n_tokens": 344457,
"n_chars": 655190
},
"gemma_7b.cc100-ko": {
"vocab_size": 256000,
"n_bytes": 1524839,
"n_tokens": 464410,
"n_chars": 655190
},
"gpt2.cc100-ko": {
"vocab_size": 50257,
"n_bytes": 1524839,
"n_tokens": 1309029,
"n_chars": 655190
},
"gpt2_chinese.cc100-ko": {
"vocab_size": 21128,
"n_bytes": 1524839,
"n_tokens": 1055974,
"n_chars": 655190
},
"gpt_35_turbo.cc100-ko": {
"vocab_size": 100277,
"n_bytes": 1524839,
"n_tokens": 652277,
"n_chars": 655190
},
"gpt_4.cc100-ko": {
"vocab_size": 100277,
"n_bytes": 1524839,
"n_tokens": 652277,
"n_chars": 655190
},
"gpt_neox_japanese_2_7b.cc100-ko": {
"vocab_size": 32000,
"n_bytes": 1524839,
"n_tokens": 1512832,
"n_chars": 655190
},
"gpt_nexo_20b.cc100-ko": {
"vocab_size": 50277,
"n_bytes": 1524839,
"n_tokens": 973288,
"n_chars": 655190
},
"grok_1.cc100-ko": {
"vocab_size": 131072,
"n_bytes": 1524839,
"n_tokens": 1152005,
"n_chars": 655190
},
"internlm2_chat_7b.cc100-ko": {
"vocab_size": 92544,
"n_bytes": 1524839,
"n_tokens": 1008524,
"n_chars": 655190
},
"internlm2_math_7b.cc100-ko": {
"vocab_size": 92544,
"n_bytes": 1524839,
"n_tokens": 1008524,
"n_chars": 655190
},
"internlm_chat_7b.cc100-ko": {
"vocab_size": 103168,
"n_bytes": 1524839,
"n_tokens": 839609,
"n_chars": 655190
},
"internlm_xcomposer_7b.cc100-ko": {
"vocab_size": 103168,
"n_bytes": 1524839,
"n_tokens": 839609,
"n_chars": 655190
},
"jamba_v0_1.cc100-ko": {
"vocab_size": 65536,
"n_bytes": 1524839,
"n_tokens": 715688,
"n_chars": 655190
},
"kplug.cc100-ko": {
"vocab_size": 10261,
"n_bytes": 1524839,
"n_tokens": 222771,
"n_chars": 655190
},
"llama.cc100-ko": {
"vocab_size": 32000,
"n_bytes": 1524839,
"n_tokens": 964428,
"n_chars": 655190
},
"llama2.cc100-ko": {
"vocab_size": 32001,
"n_bytes": 1524839,
"n_tokens": 964428,
"n_chars": 655190
},
"llama3.cc100-ko": {
"vocab_size": 128256,
"n_bytes": 1524839,
"n_tokens": 412595,
"n_chars": 655190
},
"llama_3_chinese_8b.cc100-ko": {
"vocab_size": 128256,
"n_bytes": 1524839,
"n_tokens": 422595,
"n_chars": 655190
},
"mistral_7b.cc100-ko": {
"vocab_size": 32000,
"n_bytes": 1524839,
"n_tokens": 728766,
"n_chars": 655190
},
"mixtral_8_7b.cc100-ko": {
"vocab_size": 32000,
"n_bytes": 1524839,
"n_tokens": 728766,
"n_chars": 655190
},
"mobilebert_uncased.cc100-ko": {
"vocab_size": 30522,
"n_bytes": 1524839,
"n_tokens": 904756,
"n_chars": 655190
},
"moss.cc100-ko": {
"vocab_size": 106072,
"n_bytes": 1524839,
"n_tokens": 1305249,
"n_chars": 655190
},
"mt5_large.cc100-ko": {
"vocab_size": 250100,
"n_bytes": 1524839,
"n_tokens": 434586,
"n_chars": 655190
},
"olmo_7b.cc100-ko": {
"vocab_size": 50280,
"n_bytes": 1524839,
"n_tokens": 973288,
"n_chars": 655190
},
"orion_14b_chat.cc100-ko": {
"vocab_size": 84608,
"n_bytes": 1524839,
"n_tokens": 351149,
"n_chars": 655190
},
"phi_1.cc100-ko": {
"vocab_size": 50295,
"n_bytes": 1524839,
"n_tokens": 1308988,
"n_chars": 655190
},
"phi_2.cc100-ko": {
"vocab_size": 50295,
"n_bytes": 1524839,
"n_tokens": 1308988,
"n_chars": 655190
},
"phi_3_mini.cc100-ko": {
"vocab_size": 32011,
"n_bytes": 1524839,
"n_tokens": 964428,
"n_chars": 655190
},
"pko_t5_large.cc100-ko": {
"vocab_size": 50358,
"n_bytes": 1524839,
"n_tokens": 471643,
"n_chars": 655190
},
"prompt_clue.cc100-ko": {
"vocab_size": 32128,
"n_bytes": 1524839,
"n_tokens": 354411,
"n_chars": 655190
},
"qwen1_5_14b_chat.cc100-ko": {
"vocab_size": 151646,
"n_bytes": 1524839,
"n_tokens": 457492,
"n_chars": 655190
},
"qwen_1_8b_chat.cc100-ko": {
"vocab_size": 151851,
"n_bytes": 1524839,
"n_tokens": 457492,
"n_chars": 655190
},
"qwen_72b_chat.cc100-ko": {
"vocab_size": 151851,
"n_bytes": 1524839,
"n_tokens": 457492,
"n_chars": 655190
},
"qwen_7b_chat.cc100-ko": {
"vocab_size": 151851,
"n_bytes": 1524839,
"n_tokens": 457492,
"n_chars": 655190
},
"roberta_chinese_clue.cc100-ko": {
"vocab_size": 8021,
"n_bytes": 1524839,
"n_tokens": 226812,
"n_chars": 655190
},
"skywork_13b_base.cc100-ko": {
"vocab_size": 65519,
"n_bytes": 1524839,
"n_tokens": 962744,
"n_chars": 655190
},
"skywork_13b_math.cc100-ko": {
"vocab_size": 65519,
"n_bytes": 1524839,
"n_tokens": 962744,
"n_chars": 655190
},
"solar_10_7b.cc100-ko": {
"vocab_size": 32000,
"n_bytes": 1524839,
"n_tokens": 728766,
"n_chars": 655190
},
"starchat_alpha.cc100-ko": {
"vocab_size": 49156,
"n_bytes": 1524839,
"n_tokens": 580873,
"n_chars": 655190
},
"switch_c_2048.cc100-ko": {
"vocab_size": 32100,
"n_bytes": 1524839,
"n_tokens": 344457,
"n_chars": 655190
},
"t5_base.cc100-ko": {
"vocab_size": 32100,
"n_bytes": 1524839,
"n_tokens": 344457,
"n_chars": 655190
},
"t5_large.cc100-ko": {
"vocab_size": 32100,
"n_bytes": 1524839,
"n_tokens": 344457,
"n_chars": 655190
},
"t5_small.cc100-ko": {
"vocab_size": 32100,
"n_bytes": 1524839,
"n_tokens": 344457,
"n_chars": 655190
},
"text_davinci_003.cc100-ko": {
"vocab_size": 50281,
"n_bytes": 1524839,
"n_tokens": 1308993,
"n_chars": 655190
},
"tigerbot_13b_chat_v2.cc100-ko": {
"vocab_size": 60515,
"n_bytes": 1524839,
"n_tokens": 793053,
"n_chars": 655190
},
"tigerbot_70b_chat_v4_4k.cc100-ko": {
"vocab_size": 65110,
"n_bytes": 1524839,
"n_tokens": 484082,
"n_chars": 655190
},
"wizardcoder_15b_v1.cc100-ko": {
"vocab_size": 49153,
"n_bytes": 1524839,
"n_tokens": 580873,
"n_chars": 655190
},
"wizardcoder_python_7b_v1.cc100-ko": {
"vocab_size": 32001,
"n_bytes": 1524839,
"n_tokens": 964428,
"n_chars": 655190
},
"wizardlm_7b_v1.cc100-ko": {
"vocab_size": 32001,
"n_bytes": 1524839,
"n_tokens": 964428,
"n_chars": 655190
},
"wizardmath_70b_v1.cc100-ko": {
"vocab_size": 32002,
"n_bytes": 1524839,
"n_tokens": 964428,
"n_chars": 655190
},
"xlm_roberta.cc100-ko": {
"vocab_size": 250002,
"n_bytes": 1524839,
"n_tokens": 374571,
"n_chars": 655190
},
"yi_34b.cc100-ko": {
"vocab_size": 64000,
"n_bytes": 1524839,
"n_tokens": 1203134,
"n_chars": 655190
},
"yi_6b.cc100-ko": {
"vocab_size": 64000,
"n_bytes": 1524839,
"n_tokens": 1203134,
"n_chars": 655190
},
"yi_vl34b.cc100-ko": {
"vocab_size": 64000,
"n_bytes": 1524839,
"n_tokens": 1210021,
"n_chars": 655190
},
"zephyr_7b_beta.cc100-ko": {
"vocab_size": 32000,
"n_bytes": 1524839,
"n_tokens": 728766,
"n_chars": 655190
},
"llama_3_chinese_8b.cc100-zh-Hans": {
"vocab_size": 128256,
"n_bytes": 2633047,
"n_tokens": 757405,
"n_chars": 927311
},
"dutch_llama_tokenizer.cc100-en": {
"vocab_size": 32000,
"n_bytes": 1124813,
"n_tokens": 291975,
"n_chars": 1121360
},
"gronlp-gpt2-small-dutch.cc100-en": {
"vocab_size": 40000,
"n_bytes": 1124813,
"n_tokens": 361710,
"n_chars": 1121360
},
"yhavinga-gpt2-medium-dutch.cc100-en": {
"vocab_size": 50257,
"n_bytes": 1124813,
"n_tokens": 361847,
"n_chars": 1121360
},
"yhavinga-ul2-large-en-nl.cc100-en": {
"vocab_size": 32128,
"n_bytes": 1124813,
"n_tokens": 297641,
"n_chars": 1121360
},
"dutch_llama_tokenizer.cc100-zh-Hans": {
"vocab_size": 32000,
"n_bytes": 2633047,
"n_tokens": 2621293,
"n_chars": 927311
},
"gronlp-gpt2-small-dutch.cc100-zh-Hans": {
"vocab_size": 40000,
"n_bytes": 2633047,
"n_tokens": 1350320,
"n_chars": 927311
},
"yhavinga-gpt2-medium-dutch.cc100-zh-Hans": {
"vocab_size": 50257,
"n_bytes": 2633047,
"n_tokens": 2600872,
"n_chars": 927311
},
"yhavinga-ul2-large-en-nl.cc100-zh-Hans": {
"vocab_size": 32128,
"n_bytes": 2633047,
"n_tokens": 2519719,
"n_chars": 927311
},
"aya_101.cc100-nl": {
"vocab_size": 250100,
"n_bytes": 1513030,
"n_tokens": 423616,
"n_chars": 1508067
},
"baichuan.cc100-nl": {
"vocab_size": 64000,
"n_bytes": 1513030,
"n_tokens": 574927,
"n_chars": 1508067
},
"baichuan2.cc100-nl": {
"vocab_size": 125696,
"n_bytes": 1513030,
"n_tokens": 540387,
"n_chars": 1508067
},
"bert_base_cased.cc100-nl": {
"vocab_size": 28996,
"n_bytes": 1513030,
"n_tokens": 630793,
"n_chars": 1508067
},
"bert_base_chinese.cc100-nl": {
"vocab_size": 21128,
"n_bytes": 1513030,
"n_tokens": 626052,
"n_chars": 1508067
},
"bert_base_uncased.cc100-nl": {
"vocab_size": 30522,
"n_bytes": 1513030,
"n_tokens": 574651,
"n_chars": 1508067
},
"bloom.cc100-nl": {
"vocab_size": 250680,
"n_bytes": 1513030,
"n_tokens": 488924,
"n_chars": 1508067
},
"byt5_small.cc100-nl": {
"vocab_size": 384,
"n_bytes": 1513030,
"n_tokens": 1523030,
"n_chars": 1508067
},
"character_glm_6b.cc100-nl": {
"vocab_size": 64789,
"n_bytes": 1513030,
"n_tokens": 559014,
"n_chars": 1508067
},
"chatglm2_6b.cc100-nl": {
"vocab_size": 64787,
"n_bytes": 1513030,
"n_tokens": 559017,
"n_chars": 1508067
},
"chatglm3_6b.cc100-nl": {
"vocab_size": 64796,
"n_bytes": 1513030,
"n_tokens": 559014,
"n_chars": 1508067
},
"chatglm_6b.cc100-nl": {
"vocab_size": 150344,
"n_bytes": 1513030,
"n_tokens": 533174,
"n_chars": 1508067
},
"chatyuan_large_v2.cc100-nl": {
"vocab_size": 32128,
"n_bytes": 1513030,
"n_tokens": 837963,
"n_chars": 1508067
},
"chinese_llama.cc100-nl": {
"vocab_size": 49953,
"n_bytes": 1513030,
"n_tokens": 488766,
"n_chars": 1508067
},
"chinese_llama2.cc100-nl": {
"vocab_size": 55296,
"n_bytes": 1513030,
"n_tokens": 495966,
"n_chars": 1508067
},
"code_davinci_002.cc100-nl": {
"vocab_size": 50281,
"n_bytes": 1513030,
"n_tokens": 559119,
"n_chars": 1508067
},
"crystal_coder.cc100-nl": {
"vocab_size": 32022,
"n_bytes": 1513030,
"n_tokens": 485966,
"n_chars": 1508067
},
"dbrx_instruct.cc100-nl": {
"vocab_size": 100280,
"n_bytes": 1513030,
"n_tokens": 449343,
"n_chars": 1508067
},
"deepseek_coder_33b_instruct.cc100-nl": {
"vocab_size": 32022,
"n_bytes": 1513030,
"n_tokens": 603966,
"n_chars": 1508067
},
"deepseek_llm_7b_base.cc100-nl": {
"vocab_size": 100015,
"n_bytes": 1513030,
"n_tokens": 536746,
"n_chars": 1508067
},
"dutch_llama_tokenizer.cc100-nl": {
"vocab_size": 32000,
"n_bytes": 1513030,
"n_tokens": 366481,
"n_chars": 1508067
},
"falcon_180b.cc100-nl": {
"vocab_size": 65024,
"n_bytes": 1513030,
"n_tokens": 438112,
"n_chars": 1508067
},
"falcon_7b.cc100-nl": {
"vocab_size": 65024,
"n_bytes": 1513030,
"n_tokens": 438112,
"n_chars": 1508067
},
"fastchat_t5_3b.cc100-nl": {
"vocab_size": 32110,
"n_bytes": 1513030,
"n_tokens": 933018,
"n_chars": 1508067
},
"flan_t5_base.cc100-nl": {
"vocab_size": 32100,
"n_bytes": 1513030,
"n_tokens": 696337,
"n_chars": 1508067
},
"gemma_7b.cc100-nl": {
"vocab_size": 256000,
"n_bytes": 1513030,
"n_tokens": 387522,
"n_chars": 1508067
},
"gpt2.cc100-nl": {
"vocab_size": 50257,
"n_bytes": 1513030,
"n_tokens": 559119,
"n_chars": 1508067
},
"gpt2_chinese.cc100-nl": {
"vocab_size": 21128,
"n_bytes": 1513030,
"n_tokens": 676651,
"n_chars": 1508067
},
"gpt_35_turbo.cc100-nl": {
"vocab_size": 100277,
"n_bytes": 1513030,
"n_tokens": 449343,
"n_chars": 1508067
},
"gpt_4.cc100-nl": {
"vocab_size": 100277,
"n_bytes": 1513030,
"n_tokens": 449343,
"n_chars": 1508067
},
"gpt_neox_japanese_2_7b.cc100-nl": {
"vocab_size": 32000,
"n_bytes": 1513030,
"n_tokens": 1509448,
"n_chars": 1508067
},
"gpt_nexo_20b.cc100-nl": {
"vocab_size": 50277,
"n_bytes": 1513030,
"n_tokens": 497728,
"n_chars": 1508067
},
"grok_1.cc100-nl": {
"vocab_size": 131072,
"n_bytes": 1513030,
"n_tokens": 457359,
"n_chars": 1508067
},
"gronlp-gpt2-small-dutch.cc100-nl": {
"vocab_size": 40000,
"n_bytes": 1513030,
"n_tokens": 332376,
"n_chars": 1508067
},
"internlm2_chat_7b.cc100-nl": {
"vocab_size": 92544,
"n_bytes": 1513030,
"n_tokens": 494821,
"n_chars": 1508067
},
"internlm2_math_7b.cc100-nl": {
"vocab_size": 92544,
"n_bytes": 1513030,
"n_tokens": 494821,
"n_chars": 1508067
},
"internlm_chat_7b.cc100-nl": {
"vocab_size": 103168,
"n_bytes": 1513030,
"n_tokens": 494108,
"n_chars": 1508067
},
"internlm_xcomposer_7b.cc100-nl": {
"vocab_size": 103168,
"n_bytes": 1513030,
"n_tokens": 494108,
"n_chars": 1508067
},
"jamba_v0_1.cc100-nl": {
"vocab_size": 65536,
"n_bytes": 1513030,
"n_tokens": 442176,
"n_chars": 1508067
},
"kplug.cc100-nl": {
"vocab_size": 10261,
"n_bytes": 1513030,
"n_tokens": 678131,
"n_chars": 1508067
},
"llama.cc100-nl": {
"vocab_size": 32000,
"n_bytes": 1513030,
"n_tokens": 495966,
"n_chars": 1508067
},
"llama2.cc100-nl": {
"vocab_size": 32001,
"n_bytes": 1513030,
"n_tokens": 495966,
"n_chars": 1508067
},
"llama3.cc100-nl": {
"vocab_size": 128256,
"n_bytes": 1513030,
"n_tokens": 448173,
"n_chars": 1508067
},
"llama_3_chinese_8b.cc100-nl": {
"vocab_size": 128256,
"n_bytes": 1513030,
"n_tokens": 458173,
"n_chars": 1508067
},
"mistral_7b.cc100-nl": {
"vocab_size": 32000,
"n_bytes": 1513030,
"n_tokens": 515884,
"n_chars": 1508067
},
"mixtral_8_7b.cc100-nl": {
"vocab_size": 32000,
"n_bytes": 1513030,
"n_tokens": 515884,
"n_chars": 1508067
},
"mobilebert_uncased.cc100-nl": {
"vocab_size": 30522,
"n_bytes": 1513030,
"n_tokens": 574651,
"n_chars": 1508067
},
"moss.cc100-nl": {
"vocab_size": 106072,
"n_bytes": 1513030,
"n_tokens": 557984,
"n_chars": 1508067
},
"mt5_large.cc100-nl": {
"vocab_size": 250100,
"n_bytes": 1513030,
"n_tokens": 423616,
"n_chars": 1508067
},
"dutch_llama_tokenizer.cc100-es": {
"vocab_size": 32000,
"n_bytes": 1664455,
"n_tokens": 610314,
"n_chars": 1630297
},
"gronlp-gpt2-small-dutch.cc100-es": {
"vocab_size": 40000,
"n_bytes": 1664455,
"n_tokens": 608465,
"n_chars": 1630297
},
"yhavinga-gpt2-medium-dutch.cc100-es": {
"vocab_size": 50257,
"n_bytes": 1664455,
"n_tokens": 605886,
"n_chars": 1630297
},
"yhavinga-ul2-large-en-nl.cc100-es": {
"vocab_size": 32128,
"n_bytes": 1664455,
"n_tokens": 686255,
"n_chars": 1630297
},
"olmo_7b.cc100-nl": {
"vocab_size": 50280,
"n_bytes": 1513030,
"n_tokens": 497728,
"n_chars": 1508067
},
"orion_14b_chat.cc100-nl": {
"vocab_size": 84608,
"n_bytes": 1513030,
"n_tokens": 599429,
"n_chars": 1508067
},
"phi_1.cc100-nl": {
"vocab_size": 50295,
"n_bytes": 1513030,
"n_tokens": 559124,
"n_chars": 1508067
},
"phi_2.cc100-nl": {
"vocab_size": 50295,
"n_bytes": 1513030,
"n_tokens": 559124,
"n_chars": 1508067
},
"phi_3_mini.cc100-nl": {
"vocab_size": 32011,
"n_bytes": 1513030,
"n_tokens": 495966,
"n_chars": 1508067
},
"pko_t5_large.cc100-nl": {
"vocab_size": 50358,
"n_bytes": 1513030,
"n_tokens": 1017288,
"n_chars": 1508067
},
"prompt_clue.cc100-nl": {
"vocab_size": 32128,
"n_bytes": 1513030,
"n_tokens": 837963,
"n_chars": 1508067
},
"qwen1_5_14b_chat.cc100-nl": {
"vocab_size": 151646,
"n_bytes": 1513030,
"n_tokens": 453342,
"n_chars": 1508067
},
"qwen_1_8b_chat.cc100-nl": {
"vocab_size": 151851,
"n_bytes": 1513030,
"n_tokens": 453342,
"n_chars": 1508067
},
"qwen_72b_chat.cc100-nl": {
"vocab_size": 151851,
"n_bytes": 1513030,
"n_tokens": 453342,
"n_chars": 1508067
},
"qwen_7b_chat.cc100-nl": {
"vocab_size": 151851,
"n_bytes": 1513030,
"n_tokens": 453342,
"n_chars": 1508067
},
"roberta_chinese_clue.cc100-nl": {
"vocab_size": 8021,
"n_bytes": 1513030,
"n_tokens": 821246,
"n_chars": 1508067
},
"skywork_13b_base.cc100-nl": {
"vocab_size": 65519,
"n_bytes": 1513030,
"n_tokens": 495958,
"n_chars": 1508067
},
"skywork_13b_math.cc100-nl": {
"vocab_size": 65519,
"n_bytes": 1513030,
"n_tokens": 495958,
"n_chars": 1508067
},
"solar_10_7b.cc100-nl": {
"vocab_size": 32000,
"n_bytes": 1513030,
"n_tokens": 515884,
"n_chars": 1508067
},
"starchat_alpha.cc100-nl": {
"vocab_size": 49156,
"n_bytes": 1513030,
"n_tokens": 532871,
"n_chars": 1508067
},
"switch_c_2048.cc100-nl": {
"vocab_size": 32100,
"n_bytes": 1513030,
"n_tokens": 696333,
"n_chars": 1508067
},
"t5_base.cc100-nl": {
"vocab_size": 32100,
"n_bytes": 1513030,
"n_tokens": 696333,
"n_chars": 1508067
},
"t5_large.cc100-nl": {
"vocab_size": 32100,
"n_bytes": 1513030,
"n_tokens": 696333,
"n_chars": 1508067
},
"t5_small.cc100-nl": {
"vocab_size": 32100,
"n_bytes": 1513030,
"n_tokens": 696333,
"n_chars": 1508067
},
"text_davinci_003.cc100-nl": {
"vocab_size": 50281,
"n_bytes": 1513030,
"n_tokens": 559119,
"n_chars": 1508067
},
"tigerbot_13b_chat_v2.cc100-nl": {
"vocab_size": 60515,
"n_bytes": 1513030,
"n_tokens": 486271,
"n_chars": 1508067
},
"tigerbot_70b_chat_v4_4k.cc100-nl": {
"vocab_size": 65110,
"n_bytes": 1513030,
"n_tokens": 486472,
"n_chars": 1508067
}
}