|
{ |
|
"amber.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"aya_101.cc100-en": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 317881, |
|
"n_chars": 1121360 |
|
}, |
|
"baichuan.cc100-en": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 280108, |
|
"n_chars": 1121360 |
|
}, |
|
"baichuan2.cc100-en": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1124813, |
|
"n_tokens": 269011, |
|
"n_chars": 1121360 |
|
}, |
|
"bert_base_cased.cc100-en": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1124813, |
|
"n_tokens": 288022, |
|
"n_chars": 1121360 |
|
}, |
|
"bert_base_chinese.cc100-en": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1124813, |
|
"n_tokens": 377068, |
|
"n_chars": 1121360 |
|
}, |
|
"bert_base_uncased.cc100-en": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1124813, |
|
"n_tokens": 280575, |
|
"n_chars": 1121360 |
|
}, |
|
"bloom.cc100-en": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257405, |
|
"n_chars": 1121360 |
|
}, |
|
"byt5_small.cc100-en": { |
|
"vocab_size": 384, |
|
"n_bytes": 1124813, |
|
"n_tokens": 1134813, |
|
"n_chars": 1121360 |
|
}, |
|
"character_glm_6b.cc100-en": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1124813, |
|
"n_tokens": 289347, |
|
"n_chars": 1121360 |
|
}, |
|
"chatglm2_6b.cc100-en": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1124813, |
|
"n_tokens": 289329, |
|
"n_chars": 1121360 |
|
}, |
|
"chatglm3_6b.cc100-en": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1124813, |
|
"n_tokens": 289347, |
|
"n_chars": 1121360 |
|
}, |
|
"chatglm_6b.cc100-en": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1124813, |
|
"n_tokens": 284761, |
|
"n_chars": 1121360 |
|
}, |
|
"chatyuan_large_v2.cc100-en": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1124813, |
|
"n_tokens": 536033, |
|
"n_chars": 1121360 |
|
}, |
|
"chinese_llama.cc100-en": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1124813, |
|
"n_tokens": 291514, |
|
"n_chars": 1121360 |
|
}, |
|
"chinese_llama2.cc100-en": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"code_davinci_002.cc100-en": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258403, |
|
"n_chars": 1121360 |
|
}, |
|
"crystal_coder.cc100-en": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1124813, |
|
"n_tokens": 284627, |
|
"n_chars": 1121360 |
|
}, |
|
"dbrx_instruct.cc100-en": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1124813, |
|
"n_tokens": 254985, |
|
"n_chars": 1121360 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-en": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1124813, |
|
"n_tokens": 287408, |
|
"n_chars": 1121360 |
|
}, |
|
"deepseek_llm_7b_base.cc100-en": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1124813, |
|
"n_tokens": 272324, |
|
"n_chars": 1121360 |
|
}, |
|
"falcon_180b.cc100-en": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1124813, |
|
"n_tokens": 262509, |
|
"n_chars": 1121360 |
|
}, |
|
"falcon_7b.cc100-en": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1124813, |
|
"n_tokens": 262509, |
|
"n_chars": 1121360 |
|
}, |
|
"fastchat_t5_3b.cc100-en": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1124813, |
|
"n_tokens": 484941, |
|
"n_chars": 1121360 |
|
}, |
|
"flan_t5_base.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"gemma_7b.cc100-en": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 268010, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt2.cc100-en": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258428, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt2_chinese.cc100-en": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1124813, |
|
"n_tokens": 392641, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt_35_turbo.cc100-en": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1124813, |
|
"n_tokens": 254985, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt_4.cc100-en": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1124813, |
|
"n_tokens": 254985, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt_nexo_20b.cc100-en": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1124813, |
|
"n_tokens": 259357, |
|
"n_chars": 1121360 |
|
}, |
|
"grok_1.cc100-en": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258048, |
|
"n_chars": 1121360 |
|
}, |
|
"internlm2_chat_7b.cc100-en": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1124813, |
|
"n_tokens": 271583, |
|
"n_chars": 1121360 |
|
}, |
|
"internlm2_math_7b.cc100-en": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1124813, |
|
"n_tokens": 271583, |
|
"n_chars": 1121360 |
|
}, |
|
"internlm_chat_7b.cc100-en": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1124813, |
|
"n_tokens": 271293, |
|
"n_chars": 1121360 |
|
}, |
|
"internlm_xcomposer_7b.cc100-en": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1124813, |
|
"n_tokens": 271293, |
|
"n_chars": 1121360 |
|
}, |
|
"jamba_v0_1.cc100-en": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1124813, |
|
"n_tokens": 274242, |
|
"n_chars": 1121360 |
|
}, |
|
"kplug.cc100-en": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1124813, |
|
"n_tokens": 393564, |
|
"n_chars": 1121360 |
|
}, |
|
"llama.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"llama2.cc100-en": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"llama3.cc100-en": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1124813, |
|
"n_tokens": 254944, |
|
"n_chars": 1121360 |
|
}, |
|
"mistral_7b.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285801, |
|
"n_chars": 1121360 |
|
}, |
|
"mixtral_8_7b.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285801, |
|
"n_chars": 1121360 |
|
}, |
|
"mobilebert_uncased.cc100-en": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1124813, |
|
"n_tokens": 280575, |
|
"n_chars": 1121360 |
|
}, |
|
"moss.cc100-en": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257070, |
|
"n_chars": 1121360 |
|
}, |
|
"mt5_large.cc100-en": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 317881, |
|
"n_chars": 1121360 |
|
}, |
|
"olmo_7b.cc100-en": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1124813, |
|
"n_tokens": 259357, |
|
"n_chars": 1121360 |
|
}, |
|
"orion_14b_chat.cc100-en": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1124813, |
|
"n_tokens": 265948, |
|
"n_chars": 1121360 |
|
}, |
|
"phi_1.cc100-en": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258409, |
|
"n_chars": 1121360 |
|
}, |
|
"phi_2.cc100-en": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258409, |
|
"n_chars": 1121360 |
|
}, |
|
"phi_3_mini.cc100-en": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"pko_t5_large.cc100-en": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1124813, |
|
"n_tokens": 658985, |
|
"n_chars": 1121360 |
|
}, |
|
"prompt_clue.cc100-en": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1124813, |
|
"n_tokens": 536033, |
|
"n_chars": 1121360 |
|
}, |
|
"qwen1_5_14b_chat.cc100-en": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257983, |
|
"n_chars": 1121360 |
|
}, |
|
"qwen_1_8b_chat.cc100-en": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257983, |
|
"n_chars": 1121360 |
|
}, |
|
"qwen_72b_chat.cc100-en": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257983, |
|
"n_chars": 1121360 |
|
}, |
|
"qwen_7b_chat.cc100-en": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1124813, |
|
"n_tokens": 257983, |
|
"n_chars": 1121360 |
|
}, |
|
"roberta_chinese_clue.cc100-en": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1124813, |
|
"n_tokens": 583058, |
|
"n_chars": 1121360 |
|
}, |
|
"skywork_13b_base.cc100-en": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294617, |
|
"n_chars": 1121360 |
|
}, |
|
"skywork_13b_math.cc100-en": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294617, |
|
"n_chars": 1121360 |
|
}, |
|
"solar_10_7b.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285801, |
|
"n_chars": 1121360 |
|
}, |
|
"starchat_alpha.cc100-en": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1124813, |
|
"n_tokens": 288965, |
|
"n_chars": 1121360 |
|
}, |
|
"switch_c_2048.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"t5_base.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"t5_large.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"t5_small.cc100-en": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1124813, |
|
"n_tokens": 290104, |
|
"n_chars": 1121360 |
|
}, |
|
"text_davinci_003.cc100-en": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1124813, |
|
"n_tokens": 258403, |
|
"n_chars": 1121360 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-en": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285652, |
|
"n_chars": 1121360 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-en": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1124813, |
|
"n_tokens": 286946, |
|
"n_chars": 1121360 |
|
}, |
|
"wizardcoder_15b_v1.cc100-en": { |
|
"vocab_size": 49153, |
|
"n_bytes": 1124813, |
|
"n_tokens": 288965, |
|
"n_chars": 1121360 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-en": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"wizardlm_7b_v1.cc100-en": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"wizardmath_70b_v1.cc100-en": { |
|
"vocab_size": 32002, |
|
"n_bytes": 1124813, |
|
"n_tokens": 294627, |
|
"n_chars": 1121360 |
|
}, |
|
"xlm_roberta.cc100-en": { |
|
"vocab_size": 250002, |
|
"n_bytes": 1124813, |
|
"n_tokens": 300026, |
|
"n_chars": 1121360 |
|
}, |
|
"yi_34b.cc100-en": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 270400, |
|
"n_chars": 1121360 |
|
}, |
|
"yi_6b.cc100-en": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 270400, |
|
"n_chars": 1121360 |
|
}, |
|
"yi_vl34b.cc100-en": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 269738, |
|
"n_chars": 1121360 |
|
}, |
|
"zephyr_7b_beta.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 285801, |
|
"n_chars": 1121360 |
|
}, |
|
"amber.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"aya_101.cc100-zh-Hans": { |
|
"vocab_size": 250100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 631182, |
|
"n_chars": 927311 |
|
}, |
|
"baichuan.cc100-zh-Hans": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 626117, |
|
"n_chars": 927311 |
|
}, |
|
"baichuan2.cc100-zh-Hans": { |
|
"vocab_size": 125696, |
|
"n_bytes": 2633047, |
|
"n_tokens": 541464, |
|
"n_chars": 927311 |
|
}, |
|
"bert_base_cased.cc100-zh-Hans": { |
|
"vocab_size": 28996, |
|
"n_bytes": 2633047, |
|
"n_tokens": 899709, |
|
"n_chars": 927311 |
|
}, |
|
"bert_base_chinese.cc100-zh-Hans": { |
|
"vocab_size": 21128, |
|
"n_bytes": 2633047, |
|
"n_tokens": 896599, |
|
"n_chars": 927311 |
|
}, |
|
"bert_base_uncased.cc100-zh-Hans": { |
|
"vocab_size": 30522, |
|
"n_bytes": 2633047, |
|
"n_tokens": 898554, |
|
"n_chars": 927311 |
|
}, |
|
"bloom.cc100-zh-Hans": { |
|
"vocab_size": 250680, |
|
"n_bytes": 2633047, |
|
"n_tokens": 573008, |
|
"n_chars": 927311 |
|
}, |
|
"byt5_small.cc100-zh-Hans": { |
|
"vocab_size": 384, |
|
"n_bytes": 2633047, |
|
"n_tokens": 2643047, |
|
"n_chars": 927311 |
|
}, |
|
"character_glm_6b.cc100-zh-Hans": { |
|
"vocab_size": 64789, |
|
"n_bytes": 2633047, |
|
"n_tokens": 583646, |
|
"n_chars": 927311 |
|
}, |
|
"chatglm2_6b.cc100-zh-Hans": { |
|
"vocab_size": 64787, |
|
"n_bytes": 2633047, |
|
"n_tokens": 583646, |
|
"n_chars": 927311 |
|
}, |
|
"chatglm3_6b.cc100-zh-Hans": { |
|
"vocab_size": 64796, |
|
"n_bytes": 2633047, |
|
"n_tokens": 583646, |
|
"n_chars": 927311 |
|
}, |
|
"chatglm_6b.cc100-zh-Hans": { |
|
"vocab_size": 150344, |
|
"n_bytes": 2633047, |
|
"n_tokens": 527384, |
|
"n_chars": 927311 |
|
}, |
|
"chatyuan_large_v2.cc100-zh-Hans": { |
|
"vocab_size": 32128, |
|
"n_bytes": 2633047, |
|
"n_tokens": 564905, |
|
"n_chars": 927311 |
|
}, |
|
"chinese_llama.cc100-zh-Hans": { |
|
"vocab_size": 49953, |
|
"n_bytes": 2633047, |
|
"n_tokens": 623219, |
|
"n_chars": 927311 |
|
}, |
|
"chinese_llama2.cc100-zh-Hans": { |
|
"vocab_size": 55296, |
|
"n_bytes": 2633047, |
|
"n_tokens": 625766, |
|
"n_chars": 927311 |
|
}, |
|
"code_davinci_002.cc100-zh-Hans": { |
|
"vocab_size": 50281, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"crystal_coder.cc100-zh-Hans": { |
|
"vocab_size": 32022, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1320093, |
|
"n_chars": 927311 |
|
}, |
|
"dbrx_instruct.cc100-zh-Hans": { |
|
"vocab_size": 100280, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1084939, |
|
"n_chars": 927311 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-zh-Hans": { |
|
"vocab_size": 32022, |
|
"n_bytes": 2633047, |
|
"n_tokens": 720577, |
|
"n_chars": 927311 |
|
}, |
|
"deepseek_llm_7b_base.cc100-zh-Hans": { |
|
"vocab_size": 100015, |
|
"n_bytes": 2633047, |
|
"n_tokens": 605081, |
|
"n_chars": 927311 |
|
}, |
|
"falcon_180b.cc100-zh-Hans": { |
|
"vocab_size": 65024, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1124681, |
|
"n_chars": 927311 |
|
}, |
|
"falcon_7b.cc100-zh-Hans": { |
|
"vocab_size": 65024, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1124681, |
|
"n_chars": 927311 |
|
}, |
|
"fastchat_t5_3b.cc100-zh-Hans": { |
|
"vocab_size": 32110, |
|
"n_bytes": 2633047, |
|
"n_tokens": 178974, |
|
"n_chars": 927311 |
|
}, |
|
"flan_t5_base.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173520, |
|
"n_chars": 927311 |
|
}, |
|
"gemma_7b.cc100-zh-Hans": { |
|
"vocab_size": 256000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 641795, |
|
"n_chars": 927311 |
|
}, |
|
"gpt2.cc100-zh-Hans": { |
|
"vocab_size": 50257, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"gpt2_chinese.cc100-zh-Hans": { |
|
"vocab_size": 21128, |
|
"n_bytes": 2633047, |
|
"n_tokens": 899506, |
|
"n_chars": 927311 |
|
}, |
|
"gpt_35_turbo.cc100-zh-Hans": { |
|
"vocab_size": 100277, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1084939, |
|
"n_chars": 927311 |
|
}, |
|
"gpt_4.cc100-zh-Hans": { |
|
"vocab_size": 100277, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1084939, |
|
"n_chars": 927311 |
|
}, |
|
"gpt_nexo_20b.cc100-zh-Hans": { |
|
"vocab_size": 50277, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1220529, |
|
"n_chars": 927311 |
|
}, |
|
"grok_1.cc100-zh-Hans": { |
|
"vocab_size": 131072, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1414508, |
|
"n_chars": 927311 |
|
}, |
|
"internlm2_chat_7b.cc100-zh-Hans": { |
|
"vocab_size": 92544, |
|
"n_bytes": 2633047, |
|
"n_tokens": 579976, |
|
"n_chars": 927311 |
|
}, |
|
"internlm2_math_7b.cc100-zh-Hans": { |
|
"vocab_size": 92544, |
|
"n_bytes": 2633047, |
|
"n_tokens": 579976, |
|
"n_chars": 927311 |
|
}, |
|
"internlm_chat_7b.cc100-zh-Hans": { |
|
"vocab_size": 103168, |
|
"n_bytes": 2633047, |
|
"n_tokens": 579109, |
|
"n_chars": 927311 |
|
}, |
|
"internlm_xcomposer_7b.cc100-zh-Hans": { |
|
"vocab_size": 103168, |
|
"n_bytes": 2633047, |
|
"n_tokens": 579109, |
|
"n_chars": 927311 |
|
}, |
|
"jamba_v0_1.cc100-zh-Hans": { |
|
"vocab_size": 65536, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1067054, |
|
"n_chars": 927311 |
|
}, |
|
"kplug.cc100-zh-Hans": { |
|
"vocab_size": 10261, |
|
"n_bytes": 2633047, |
|
"n_tokens": 902451, |
|
"n_chars": 927311 |
|
}, |
|
"llama.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"llama2.cc100-zh-Hans": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"llama3.cc100-zh-Hans": { |
|
"vocab_size": 128256, |
|
"n_bytes": 2633047, |
|
"n_tokens": 747405, |
|
"n_chars": 927311 |
|
}, |
|
"mistral_7b.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1041023, |
|
"n_chars": 927311 |
|
}, |
|
"mixtral_8_7b.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1041023, |
|
"n_chars": 927311 |
|
}, |
|
"mobilebert_uncased.cc100-zh-Hans": { |
|
"vocab_size": 30522, |
|
"n_bytes": 2633047, |
|
"n_tokens": 898554, |
|
"n_chars": 927311 |
|
}, |
|
"moss.cc100-zh-Hans": { |
|
"vocab_size": 106072, |
|
"n_bytes": 2633047, |
|
"n_tokens": 557455, |
|
"n_chars": 927311 |
|
}, |
|
"mt5_large.cc100-zh-Hans": { |
|
"vocab_size": 250100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 631182, |
|
"n_chars": 927311 |
|
}, |
|
"olmo_7b.cc100-zh-Hans": { |
|
"vocab_size": 50280, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1220529, |
|
"n_chars": 927311 |
|
}, |
|
"orion_14b_chat.cc100-zh-Hans": { |
|
"vocab_size": 84608, |
|
"n_bytes": 2633047, |
|
"n_tokens": 529926, |
|
"n_chars": 927311 |
|
}, |
|
"phi_1.cc100-zh-Hans": { |
|
"vocab_size": 50295, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"phi_2.cc100-zh-Hans": { |
|
"vocab_size": 50295, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"phi_3_mini.cc100-zh-Hans": { |
|
"vocab_size": 32011, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"pko_t5_large.cc100-zh-Hans": { |
|
"vocab_size": 50358, |
|
"n_bytes": 2633047, |
|
"n_tokens": 2533519, |
|
"n_chars": 927311 |
|
}, |
|
"prompt_clue.cc100-zh-Hans": { |
|
"vocab_size": 32128, |
|
"n_bytes": 2633047, |
|
"n_tokens": 564905, |
|
"n_chars": 927311 |
|
}, |
|
"qwen1_5_14b_chat.cc100-zh-Hans": { |
|
"vocab_size": 151646, |
|
"n_bytes": 2633047, |
|
"n_tokens": 589211, |
|
"n_chars": 927311 |
|
}, |
|
"qwen_1_8b_chat.cc100-zh-Hans": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2633047, |
|
"n_tokens": 589211, |
|
"n_chars": 927311 |
|
}, |
|
"qwen_72b_chat.cc100-zh-Hans": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2633047, |
|
"n_tokens": 589211, |
|
"n_chars": 927311 |
|
}, |
|
"qwen_7b_chat.cc100-zh-Hans": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2633047, |
|
"n_tokens": 589211, |
|
"n_chars": 927311 |
|
}, |
|
"roberta_chinese_clue.cc100-zh-Hans": { |
|
"vocab_size": 8021, |
|
"n_bytes": 2633047, |
|
"n_tokens": 907144, |
|
"n_chars": 927311 |
|
}, |
|
"skywork_13b_base.cc100-zh-Hans": { |
|
"vocab_size": 65519, |
|
"n_bytes": 2633047, |
|
"n_tokens": 663923, |
|
"n_chars": 927311 |
|
}, |
|
"skywork_13b_math.cc100-zh-Hans": { |
|
"vocab_size": 65519, |
|
"n_bytes": 2633047, |
|
"n_tokens": 663923, |
|
"n_chars": 927311 |
|
}, |
|
"solar_10_7b.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1041023, |
|
"n_chars": 927311 |
|
}, |
|
"starchat_alpha.cc100-zh-Hans": { |
|
"vocab_size": 49156, |
|
"n_bytes": 2633047, |
|
"n_tokens": 882018, |
|
"n_chars": 927311 |
|
}, |
|
"switch_c_2048.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173519, |
|
"n_chars": 927311 |
|
}, |
|
"t5_base.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173519, |
|
"n_chars": 927311 |
|
}, |
|
"t5_large.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173519, |
|
"n_chars": 927311 |
|
}, |
|
"t5_small.cc100-zh-Hans": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2633047, |
|
"n_tokens": 173519, |
|
"n_chars": 927311 |
|
}, |
|
"text_davinci_003.cc100-zh-Hans": { |
|
"vocab_size": 50281, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1876809, |
|
"n_chars": 927311 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-zh-Hans": { |
|
"vocab_size": 60515, |
|
"n_bytes": 2633047, |
|
"n_tokens": 577385, |
|
"n_chars": 927311 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-zh-Hans": { |
|
"vocab_size": 65110, |
|
"n_bytes": 2633047, |
|
"n_tokens": 577211, |
|
"n_chars": 927311 |
|
}, |
|
"wizardcoder_15b_v1.cc100-zh-Hans": { |
|
"vocab_size": 49153, |
|
"n_bytes": 2633047, |
|
"n_tokens": 882018, |
|
"n_chars": 927311 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-zh-Hans": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"wizardlm_7b_v1.cc100-zh-Hans": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"wizardmath_70b_v1.cc100-zh-Hans": { |
|
"vocab_size": 32002, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1330093, |
|
"n_chars": 927311 |
|
}, |
|
"xlm_roberta.cc100-zh-Hans": { |
|
"vocab_size": 250002, |
|
"n_bytes": 2633047, |
|
"n_tokens": 619844, |
|
"n_chars": 927311 |
|
}, |
|
"yi_34b.cc100-zh-Hans": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 588729, |
|
"n_chars": 927311 |
|
}, |
|
"yi_6b.cc100-zh-Hans": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 588729, |
|
"n_chars": 927311 |
|
}, |
|
"yi_vl34b.cc100-zh-Hans": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 596166, |
|
"n_chars": 927311 |
|
}, |
|
"zephyr_7b_beta.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1041023, |
|
"n_chars": 927311 |
|
}, |
|
"amber.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"aya_101.cc100-es": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 472231, |
|
"n_chars": 1630297 |
|
}, |
|
"baichuan.cc100-es": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 585804, |
|
"n_chars": 1630297 |
|
}, |
|
"baichuan2.cc100-es": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1664455, |
|
"n_tokens": 551326, |
|
"n_chars": 1630297 |
|
}, |
|
"bert_base_cased.cc100-es": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1664455, |
|
"n_tokens": 630231, |
|
"n_chars": 1630297 |
|
}, |
|
"bert_base_chinese.cc100-es": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1664455, |
|
"n_tokens": 609419, |
|
"n_chars": 1630297 |
|
}, |
|
"bert_base_uncased.cc100-es": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1664455, |
|
"n_tokens": 558042, |
|
"n_chars": 1630297 |
|
}, |
|
"bloom.cc100-es": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1664455, |
|
"n_tokens": 350793, |
|
"n_chars": 1630297 |
|
}, |
|
"byt5_small.cc100-es": { |
|
"vocab_size": 384, |
|
"n_bytes": 1664455, |
|
"n_tokens": 1674455, |
|
"n_chars": 1630297 |
|
}, |
|
"character_glm_6b.cc100-es": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1664455, |
|
"n_tokens": 566501, |
|
"n_chars": 1630297 |
|
}, |
|
"chatglm2_6b.cc100-es": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1664455, |
|
"n_tokens": 566476, |
|
"n_chars": 1630297 |
|
}, |
|
"chatglm3_6b.cc100-es": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1664455, |
|
"n_tokens": 566501, |
|
"n_chars": 1630297 |
|
}, |
|
"chatglm_6b.cc100-es": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1664455, |
|
"n_tokens": 514848, |
|
"n_chars": 1630297 |
|
}, |
|
"chatyuan_large_v2.cc100-es": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1664455, |
|
"n_tokens": 889530, |
|
"n_chars": 1630297 |
|
}, |
|
"chinese_llama.cc100-es": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1664455, |
|
"n_tokens": 486672, |
|
"n_chars": 1630297 |
|
}, |
|
"chinese_llama2.cc100-es": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"code_davinci_002.cc100-es": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"crystal_coder.cc100-es": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1664455, |
|
"n_tokens": 482235, |
|
"n_chars": 1630297 |
|
}, |
|
"dbrx_instruct.cc100-es": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1664455, |
|
"n_tokens": 433875, |
|
"n_chars": 1630297 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-es": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1664455, |
|
"n_tokens": 523884, |
|
"n_chars": 1630297 |
|
}, |
|
"deepseek_llm_7b_base.cc100-es": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1664455, |
|
"n_tokens": 480877, |
|
"n_chars": 1630297 |
|
}, |
|
"falcon_180b.cc100-es": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1664455, |
|
"n_tokens": 442138, |
|
"n_chars": 1630297 |
|
}, |
|
"falcon_7b.cc100-es": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1664455, |
|
"n_tokens": 442138, |
|
"n_chars": 1630297 |
|
}, |
|
"fastchat_t5_3b.cc100-es": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1664455, |
|
"n_tokens": 970105, |
|
"n_chars": 1630297 |
|
}, |
|
"flan_t5_base.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706405, |
|
"n_chars": 1630297 |
|
}, |
|
"gemma_7b.cc100-es": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 371321, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt2.cc100-es": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt2_chinese.cc100-es": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1664455, |
|
"n_tokens": 703390, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt_35_turbo.cc100-es": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1664455, |
|
"n_tokens": 433875, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt_4.cc100-es": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1664455, |
|
"n_tokens": 433875, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt_nexo_20b.cc100-es": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1664455, |
|
"n_tokens": 494577, |
|
"n_chars": 1630297 |
|
}, |
|
"grok_1.cc100-es": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1664455, |
|
"n_tokens": 449392, |
|
"n_chars": 1630297 |
|
}, |
|
"internlm2_chat_7b.cc100-es": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1664455, |
|
"n_tokens": 518871, |
|
"n_chars": 1630297 |
|
}, |
|
"internlm2_math_7b.cc100-es": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1664455, |
|
"n_tokens": 518871, |
|
"n_chars": 1630297 |
|
}, |
|
"internlm_chat_7b.cc100-es": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1664455, |
|
"n_tokens": 516572, |
|
"n_chars": 1630297 |
|
}, |
|
"internlm_xcomposer_7b.cc100-es": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1664455, |
|
"n_tokens": 516572, |
|
"n_chars": 1630297 |
|
}, |
|
"jamba_v0_1.cc100-es": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1664455, |
|
"n_tokens": 420883, |
|
"n_chars": 1630297 |
|
}, |
|
"kplug.cc100-es": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1664455, |
|
"n_tokens": 704804, |
|
"n_chars": 1630297 |
|
}, |
|
"llama.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"llama2.cc100-es": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"llama3.cc100-es": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1664455, |
|
"n_tokens": 433289, |
|
"n_chars": 1630297 |
|
}, |
|
"mistral_7b.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 513915, |
|
"n_chars": 1630297 |
|
}, |
|
"mixtral_8_7b.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 513915, |
|
"n_chars": 1630297 |
|
}, |
|
"mobilebert_uncased.cc100-es": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1664455, |
|
"n_tokens": 558042, |
|
"n_chars": 1630297 |
|
}, |
|
"moss.cc100-es": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1664455, |
|
"n_tokens": 568539, |
|
"n_chars": 1630297 |
|
}, |
|
"mt5_large.cc100-es": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 472231, |
|
"n_chars": 1630297 |
|
}, |
|
"olmo_7b.cc100-es": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1664455, |
|
"n_tokens": 494577, |
|
"n_chars": 1630297 |
|
}, |
|
"orion_14b_chat.cc100-es": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1664455, |
|
"n_tokens": 628571, |
|
"n_chars": 1630297 |
|
}, |
|
"phi_1.cc100-es": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"phi_2.cc100-es": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"phi_3_mini.cc100-es": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"pko_t5_large.cc100-es": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1664455, |
|
"n_tokens": 1134056, |
|
"n_chars": 1630297 |
|
}, |
|
"prompt_clue.cc100-es": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1664455, |
|
"n_tokens": 889530, |
|
"n_chars": 1630297 |
|
}, |
|
"qwen1_5_14b_chat.cc100-es": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1664455, |
|
"n_tokens": 434264, |
|
"n_chars": 1630297 |
|
}, |
|
"qwen_1_8b_chat.cc100-es": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1664455, |
|
"n_tokens": 434264, |
|
"n_chars": 1630297 |
|
}, |
|
"qwen_72b_chat.cc100-es": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1664455, |
|
"n_tokens": 434264, |
|
"n_chars": 1630297 |
|
}, |
|
"qwen_7b_chat.cc100-es": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1664455, |
|
"n_tokens": 434264, |
|
"n_chars": 1630297 |
|
}, |
|
"roberta_chinese_clue.cc100-es": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1664455, |
|
"n_tokens": 866564, |
|
"n_chars": 1630297 |
|
}, |
|
"skywork_13b_base.cc100-es": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492211, |
|
"n_chars": 1630297 |
|
}, |
|
"skywork_13b_math.cc100-es": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492211, |
|
"n_chars": 1630297 |
|
}, |
|
"solar_10_7b.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 513915, |
|
"n_chars": 1630297 |
|
}, |
|
"starchat_alpha.cc100-es": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1664455, |
|
"n_tokens": 530592, |
|
"n_chars": 1630297 |
|
}, |
|
"switch_c_2048.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706400, |
|
"n_chars": 1630297 |
|
}, |
|
"t5_base.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706400, |
|
"n_chars": 1630297 |
|
}, |
|
"t5_large.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706400, |
|
"n_chars": 1630297 |
|
}, |
|
"t5_small.cc100-es": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1664455, |
|
"n_tokens": 706400, |
|
"n_chars": 1630297 |
|
}, |
|
"text_davinci_003.cc100-es": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1664455, |
|
"n_tokens": 569853, |
|
"n_chars": 1630297 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-es": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1664455, |
|
"n_tokens": 482553, |
|
"n_chars": 1630297 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-es": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1664455, |
|
"n_tokens": 484099, |
|
"n_chars": 1630297 |
|
}, |
|
"wizardcoder_15b_v1.cc100-es": { |
|
"vocab_size": 49153, |
|
"n_bytes": 1664455, |
|
"n_tokens": 530592, |
|
"n_chars": 1630297 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-es": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"wizardlm_7b_v1.cc100-es": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"wizardmath_70b_v1.cc100-es": { |
|
"vocab_size": 32002, |
|
"n_bytes": 1664455, |
|
"n_tokens": 492235, |
|
"n_chars": 1630297 |
|
}, |
|
"xlm_roberta.cc100-es": { |
|
"vocab_size": 250002, |
|
"n_bytes": 1664455, |
|
"n_tokens": 399850, |
|
"n_chars": 1630297 |
|
}, |
|
"yi_34b.cc100-es": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 577018, |
|
"n_chars": 1630297 |
|
}, |
|
"yi_6b.cc100-es": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 577018, |
|
"n_chars": 1630297 |
|
}, |
|
"yi_vl34b.cc100-es": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 576794, |
|
"n_chars": 1630297 |
|
}, |
|
"zephyr_7b_beta.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 513915, |
|
"n_chars": 1630297 |
|
}, |
|
"aya_101.cc100-fr": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 470944, |
|
"n_chars": 1484970 |
|
}, |
|
"baichuan.cc100-fr": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 540430, |
|
"n_chars": 1484970 |
|
}, |
|
"baichuan2.cc100-fr": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1540504, |
|
"n_tokens": 512313, |
|
"n_chars": 1484970 |
|
}, |
|
"bert_base_cased.cc100-fr": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1540504, |
|
"n_tokens": 583210, |
|
"n_chars": 1484970 |
|
}, |
|
"bert_base_chinese.cc100-fr": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1540504, |
|
"n_tokens": 553134, |
|
"n_chars": 1484970 |
|
}, |
|
"bert_base_uncased.cc100-fr": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1540504, |
|
"n_tokens": 504075, |
|
"n_chars": 1484970 |
|
}, |
|
"bloom.cc100-fr": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1540504, |
|
"n_tokens": 321639, |
|
"n_chars": 1484970 |
|
}, |
|
"byt5_small.cc100-fr": { |
|
"vocab_size": 384, |
|
"n_bytes": 1540504, |
|
"n_tokens": 1550504, |
|
"n_chars": 1484970 |
|
}, |
|
"character_glm_6b.cc100-fr": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1540504, |
|
"n_tokens": 515052, |
|
"n_chars": 1484970 |
|
}, |
|
"chatglm2_6b.cc100-fr": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1540504, |
|
"n_tokens": 515028, |
|
"n_chars": 1484970 |
|
}, |
|
"chatglm3_6b.cc100-fr": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1540504, |
|
"n_tokens": 515052, |
|
"n_chars": 1484970 |
|
}, |
|
"chatglm_6b.cc100-fr": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1540504, |
|
"n_tokens": 499261, |
|
"n_chars": 1484970 |
|
}, |
|
"chatyuan_large_v2.cc100-fr": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1540504, |
|
"n_tokens": 822012, |
|
"n_chars": 1484970 |
|
}, |
|
"chinese_llama.cc100-fr": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1540504, |
|
"n_tokens": 450352, |
|
"n_chars": 1484970 |
|
}, |
|
"chinese_llama2.cc100-fr": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"code_davinci_002.cc100-fr": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"crystal_coder.cc100-fr": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1540504, |
|
"n_tokens": 447243, |
|
"n_chars": 1484970 |
|
}, |
|
"dbrx_instruct.cc100-fr": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412685, |
|
"n_chars": 1484970 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-fr": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1540504, |
|
"n_tokens": 537538, |
|
"n_chars": 1484970 |
|
}, |
|
"deepseek_llm_7b_base.cc100-fr": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1540504, |
|
"n_tokens": 507693, |
|
"n_chars": 1484970 |
|
}, |
|
"falcon_180b.cc100-fr": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1540504, |
|
"n_tokens": 407853, |
|
"n_chars": 1484970 |
|
}, |
|
"falcon_7b.cc100-fr": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1540504, |
|
"n_tokens": 407853, |
|
"n_chars": 1484970 |
|
}, |
|
"fastchat_t5_3b.cc100-fr": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1540504, |
|
"n_tokens": 717675, |
|
"n_chars": 1484970 |
|
}, |
|
"flan_t5_base.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476135, |
|
"n_chars": 1484970 |
|
}, |
|
"gemma_7b.cc100-fr": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 374551, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt2.cc100-fr": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt2_chinese.cc100-fr": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1540504, |
|
"n_tokens": 636442, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt_35_turbo.cc100-fr": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412685, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt_4.cc100-fr": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412685, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt_nexo_20b.cc100-fr": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1540504, |
|
"n_tokens": 458961, |
|
"n_chars": 1484970 |
|
}, |
|
"grok_1.cc100-fr": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1540504, |
|
"n_tokens": 428298, |
|
"n_chars": 1484970 |
|
}, |
|
"internlm2_chat_7b.cc100-fr": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1540504, |
|
"n_tokens": 496629, |
|
"n_chars": 1484970 |
|
}, |
|
"internlm2_math_7b.cc100-fr": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1540504, |
|
"n_tokens": 496629, |
|
"n_chars": 1484970 |
|
}, |
|
"internlm_chat_7b.cc100-fr": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1540504, |
|
"n_tokens": 495045, |
|
"n_chars": 1484970 |
|
}, |
|
"internlm_xcomposer_7b.cc100-fr": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1540504, |
|
"n_tokens": 495045, |
|
"n_chars": 1484970 |
|
}, |
|
"jamba_v0_1.cc100-fr": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412899, |
|
"n_chars": 1484970 |
|
}, |
|
"kplug.cc100-fr": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1540504, |
|
"n_tokens": 638107, |
|
"n_chars": 1484970 |
|
}, |
|
"llama.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"llama2.cc100-fr": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"llama3.cc100-fr": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1540504, |
|
"n_tokens": 412146, |
|
"n_chars": 1484970 |
|
}, |
|
"mistral_7b.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476666, |
|
"n_chars": 1484970 |
|
}, |
|
"mixtral_8_7b.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476666, |
|
"n_chars": 1484970 |
|
}, |
|
"mobilebert_uncased.cc100-fr": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1540504, |
|
"n_tokens": 504075, |
|
"n_chars": 1484970 |
|
}, |
|
"moss.cc100-fr": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1540504, |
|
"n_tokens": 515669, |
|
"n_chars": 1484970 |
|
}, |
|
"mt5_large.cc100-fr": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 470944, |
|
"n_chars": 1484970 |
|
}, |
|
"olmo_7b.cc100-fr": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1540504, |
|
"n_tokens": 458961, |
|
"n_chars": 1484970 |
|
}, |
|
"orion_14b_chat.cc100-fr": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1540504, |
|
"n_tokens": 564107, |
|
"n_chars": 1484970 |
|
}, |
|
"phi_1.cc100-fr": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"phi_2.cc100-fr": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"phi_3_mini.cc100-fr": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"pko_t5_large.cc100-fr": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1540504, |
|
"n_tokens": 1044665, |
|
"n_chars": 1484970 |
|
}, |
|
"prompt_clue.cc100-fr": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1540504, |
|
"n_tokens": 822012, |
|
"n_chars": 1484970 |
|
}, |
|
"qwen1_5_14b_chat.cc100-fr": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1540504, |
|
"n_tokens": 413637, |
|
"n_chars": 1484970 |
|
}, |
|
"qwen_1_8b_chat.cc100-fr": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1540504, |
|
"n_tokens": 413637, |
|
"n_chars": 1484970 |
|
}, |
|
"qwen_72b_chat.cc100-fr": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1540504, |
|
"n_tokens": 413637, |
|
"n_chars": 1484970 |
|
}, |
|
"qwen_7b_chat.cc100-fr": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1540504, |
|
"n_tokens": 413637, |
|
"n_chars": 1484970 |
|
}, |
|
"roberta_chinese_clue.cc100-fr": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1540504, |
|
"n_tokens": 787363, |
|
"n_chars": 1484970 |
|
}, |
|
"skywork_13b_base.cc100-fr": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457233, |
|
"n_chars": 1484970 |
|
}, |
|
"skywork_13b_math.cc100-fr": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457233, |
|
"n_chars": 1484970 |
|
}, |
|
"solar_10_7b.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476666, |
|
"n_chars": 1484970 |
|
}, |
|
"starchat_alpha.cc100-fr": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1540504, |
|
"n_tokens": 509958, |
|
"n_chars": 1484970 |
|
}, |
|
"switch_c_2048.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476133, |
|
"n_chars": 1484970 |
|
}, |
|
"t5_base.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476133, |
|
"n_chars": 1484970 |
|
}, |
|
"t5_large.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476133, |
|
"n_chars": 1484970 |
|
}, |
|
"t5_small.cc100-fr": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476133, |
|
"n_chars": 1484970 |
|
}, |
|
"text_davinci_003.cc100-fr": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1540504, |
|
"n_tokens": 521776, |
|
"n_chars": 1484970 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-fr": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1540504, |
|
"n_tokens": 447372, |
|
"n_chars": 1484970 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-fr": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1540504, |
|
"n_tokens": 448567, |
|
"n_chars": 1484970 |
|
}, |
|
"wizardcoder_15b_v1.cc100-fr": { |
|
"vocab_size": 49153, |
|
"n_bytes": 1540504, |
|
"n_tokens": 509958, |
|
"n_chars": 1484970 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-fr": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"wizardlm_7b_v1.cc100-fr": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"wizardmath_70b_v1.cc100-fr": { |
|
"vocab_size": 32002, |
|
"n_bytes": 1540504, |
|
"n_tokens": 457243, |
|
"n_chars": 1484970 |
|
}, |
|
"xlm_roberta.cc100-fr": { |
|
"vocab_size": 250002, |
|
"n_bytes": 1540504, |
|
"n_tokens": 405041, |
|
"n_chars": 1484970 |
|
}, |
|
"yi_34b.cc100-fr": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 533106, |
|
"n_chars": 1484970 |
|
}, |
|
"yi_6b.cc100-fr": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 533106, |
|
"n_chars": 1484970 |
|
}, |
|
"yi_vl34b.cc100-fr": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 532288, |
|
"n_chars": 1484970 |
|
}, |
|
"zephyr_7b_beta.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 476666, |
|
"n_chars": 1484970 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 1121413, |
|
"n_chars": 1121360 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1049033, |
|
"n_chars": 927311 |
|
}, |
|
"aya_101.cc100-ja": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1774770, |
|
"n_tokens": 300542, |
|
"n_chars": 603065 |
|
}, |
|
"baichuan.cc100-ja": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 591656, |
|
"n_chars": 603065 |
|
}, |
|
"baichuan2.cc100-ja": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1774770, |
|
"n_tokens": 554936, |
|
"n_chars": 603065 |
|
}, |
|
"bert_base_cased.cc100-ja": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1774770, |
|
"n_tokens": 410492, |
|
"n_chars": 603065 |
|
}, |
|
"bert_base_chinese.cc100-ja": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1774770, |
|
"n_tokens": 396831, |
|
"n_chars": 603065 |
|
}, |
|
"bert_base_uncased.cc100-ja": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1774770, |
|
"n_tokens": 580634, |
|
"n_chars": 603065 |
|
}, |
|
"bloom.cc100-ja": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1774770, |
|
"n_tokens": 523592, |
|
"n_chars": 603065 |
|
}, |
|
"byt5_small.cc100-ja": { |
|
"vocab_size": 384, |
|
"n_bytes": 1774770, |
|
"n_tokens": 1784770, |
|
"n_chars": 603065 |
|
}, |
|
"aya_101.cc100-ar": { |
|
"vocab_size": 250100, |
|
"n_bytes": 2813283, |
|
"n_tokens": 631736, |
|
"n_chars": 1560987 |
|
}, |
|
"baichuan.cc100-ar": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1422976, |
|
"n_chars": 1560987 |
|
}, |
|
"baichuan2.cc100-ar": { |
|
"vocab_size": 125696, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1337285, |
|
"n_chars": 1560987 |
|
}, |
|
"bert_base_cased.cc100-ar": { |
|
"vocab_size": 28996, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1232449, |
|
"n_chars": 1560987 |
|
}, |
|
"bert_base_chinese.cc100-ar": { |
|
"vocab_size": 21128, |
|
"n_bytes": 2813283, |
|
"n_tokens": 536389, |
|
"n_chars": 1560987 |
|
}, |
|
"bert_base_uncased.cc100-ar": { |
|
"vocab_size": 30522, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1269370, |
|
"n_chars": 1560987 |
|
}, |
|
"bloom.cc100-ar": { |
|
"vocab_size": 250680, |
|
"n_bytes": 2813283, |
|
"n_tokens": 427489, |
|
"n_chars": 1560987 |
|
}, |
|
"byt5_small.cc100-ar": { |
|
"vocab_size": 384, |
|
"n_bytes": 2813283, |
|
"n_tokens": 2823283, |
|
"n_chars": 1560987 |
|
}, |
|
"character_glm_6b.cc100-ar": { |
|
"vocab_size": 64789, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1441847, |
|
"n_chars": 1560987 |
|
}, |
|
"chatglm2_6b.cc100-ar": { |
|
"vocab_size": 64787, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1441847, |
|
"n_chars": 1560987 |
|
}, |
|
"chatglm3_6b.cc100-ar": { |
|
"vocab_size": 64796, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1441847, |
|
"n_chars": 1560987 |
|
}, |
|
"chatglm_6b.cc100-ar": { |
|
"vocab_size": 150344, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1097200, |
|
"n_chars": 1560987 |
|
}, |
|
"chatyuan_large_v2.cc100-ar": { |
|
"vocab_size": 32128, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1006313, |
|
"n_chars": 1560987 |
|
}, |
|
"chinese_llama.cc100-ar": { |
|
"vocab_size": 49953, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1421625, |
|
"n_chars": 1560987 |
|
}, |
|
"chinese_llama2.cc100-ar": { |
|
"vocab_size": 55296, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1432081, |
|
"n_chars": 1560987 |
|
}, |
|
"code_davinci_002.cc100-ar": { |
|
"vocab_size": 50281, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1558111, |
|
"n_chars": 1560987 |
|
}, |
|
"crystal_coder.cc100-ar": { |
|
"vocab_size": 32022, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1422081, |
|
"n_chars": 1560987 |
|
}, |
|
"dbrx_instruct.cc100-ar": { |
|
"vocab_size": 100280, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1105640, |
|
"n_chars": 1560987 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-ar": { |
|
"vocab_size": 32022, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1958863, |
|
"n_chars": 1560987 |
|
}, |
|
"deepseek_llm_7b_base.cc100-ar": { |
|
"vocab_size": 100015, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1426103, |
|
"n_chars": 1560987 |
|
}, |
|
"falcon_180b.cc100-ar": { |
|
"vocab_size": 65024, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1597443, |
|
"n_chars": 1560987 |
|
}, |
|
"falcon_7b.cc100-ar": { |
|
"vocab_size": 65024, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1597443, |
|
"n_chars": 1560987 |
|
}, |
|
"fastchat_t5_3b.cc100-ar": { |
|
"vocab_size": 32110, |
|
"n_bytes": 2813283, |
|
"n_tokens": 832267, |
|
"n_chars": 1560987 |
|
}, |
|
"flan_t5_base.cc100-ar": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2813283, |
|
"n_tokens": 568957, |
|
"n_chars": 1560987 |
|
}, |
|
"gemma_7b.cc100-ar": { |
|
"vocab_size": 256000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 573788, |
|
"n_chars": 1560987 |
|
}, |
|
"gpt2.cc100-ar": { |
|
"vocab_size": 50257, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1558111, |
|
"n_chars": 1560987 |
|
}, |
|
"gpt2_chinese.cc100-ar": { |
|
"vocab_size": 21128, |
|
"n_bytes": 2813283, |
|
"n_tokens": 617677, |
|
"n_chars": 1560987 |
|
}, |
|
"gpt_35_turbo.cc100-ar": { |
|
"vocab_size": 100277, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1105640, |
|
"n_chars": 1560987 |
|
}, |
|
"gpt_4.cc100-ar": { |
|
"vocab_size": 100277, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1105640, |
|
"n_chars": 1560987 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-ar": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 2809195, |
|
"n_chars": 1560987 |
|
}, |
|
"gpt_nexo_20b.cc100-ar": { |
|
"vocab_size": 50277, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1106277, |
|
"n_chars": 1560987 |
|
}, |
|
"grok_1.cc100-ar": { |
|
"vocab_size": 131072, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1392088, |
|
"n_chars": 1560987 |
|
}, |
|
"internlm2_chat_7b.cc100-ar": { |
|
"vocab_size": 92544, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1635378, |
|
"n_chars": 1560987 |
|
}, |
|
"internlm2_math_7b.cc100-ar": { |
|
"vocab_size": 92544, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1635378, |
|
"n_chars": 1560987 |
|
}, |
|
"internlm_chat_7b.cc100-ar": { |
|
"vocab_size": 103168, |
|
"n_bytes": 2813283, |
|
"n_tokens": 532046, |
|
"n_chars": 1560987 |
|
}, |
|
"internlm_xcomposer_7b.cc100-ar": { |
|
"vocab_size": 103168, |
|
"n_bytes": 2813283, |
|
"n_tokens": 532046, |
|
"n_chars": 1560987 |
|
}, |
|
"jamba_v0_1.cc100-ar": { |
|
"vocab_size": 65536, |
|
"n_bytes": 2813283, |
|
"n_tokens": 727886, |
|
"n_chars": 1560987 |
|
}, |
|
"kplug.cc100-ar": { |
|
"vocab_size": 10261, |
|
"n_bytes": 2813283, |
|
"n_tokens": 331987, |
|
"n_chars": 1560987 |
|
}, |
|
"llama.cc100-ar": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1432081, |
|
"n_chars": 1560987 |
|
}, |
|
"llama2.cc100-ar": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1432081, |
|
"n_chars": 1560987 |
|
}, |
|
"llama3.cc100-ar": { |
|
"vocab_size": 128256, |
|
"n_bytes": 2813283, |
|
"n_tokens": 615514, |
|
"n_chars": 1560987 |
|
}, |
|
"mistral_7b.cc100-ar": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1406319, |
|
"n_chars": 1560987 |
|
}, |
|
"mixtral_8_7b.cc100-ar": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1406319, |
|
"n_chars": 1560987 |
|
}, |
|
"mobilebert_uncased.cc100-ar": { |
|
"vocab_size": 30522, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1269370, |
|
"n_chars": 1560987 |
|
}, |
|
"moss.cc100-ar": { |
|
"vocab_size": 106072, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1557671, |
|
"n_chars": 1560987 |
|
}, |
|
"mt5_large.cc100-ar": { |
|
"vocab_size": 250100, |
|
"n_bytes": 2813283, |
|
"n_tokens": 631736, |
|
"n_chars": 1560987 |
|
}, |
|
"olmo_7b.cc100-ar": { |
|
"vocab_size": 50280, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1106277, |
|
"n_chars": 1560987 |
|
}, |
|
"orion_14b_chat.cc100-ar": { |
|
"vocab_size": 84608, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1531053, |
|
"n_chars": 1560987 |
|
}, |
|
"phi_1.cc100-ar": { |
|
"vocab_size": 50295, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1558111, |
|
"n_chars": 1560987 |
|
}, |
|
"phi_2.cc100-ar": { |
|
"vocab_size": 50295, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1558111, |
|
"n_chars": 1560987 |
|
}, |
|
"phi_3_mini.cc100-ar": { |
|
"vocab_size": 32011, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1432081, |
|
"n_chars": 1560987 |
|
}, |
|
"pko_t5_large.cc100-ar": { |
|
"vocab_size": 50358, |
|
"n_bytes": 2813283, |
|
"n_tokens": 2815586, |
|
"n_chars": 1560987 |
|
}, |
|
"prompt_clue.cc100-ar": { |
|
"vocab_size": 32128, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1006313, |
|
"n_chars": 1560987 |
|
}, |
|
"qwen1_5_14b_chat.cc100-ar": { |
|
"vocab_size": 151646, |
|
"n_bytes": 2813283, |
|
"n_tokens": 614959, |
|
"n_chars": 1560987 |
|
}, |
|
"qwen_1_8b_chat.cc100-ar": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2813283, |
|
"n_tokens": 614959, |
|
"n_chars": 1560987 |
|
}, |
|
"qwen_72b_chat.cc100-ar": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2813283, |
|
"n_tokens": 614959, |
|
"n_chars": 1560987 |
|
}, |
|
"qwen_7b_chat.cc100-ar": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2813283, |
|
"n_tokens": 614959, |
|
"n_chars": 1560987 |
|
}, |
|
"roberta_chinese_clue.cc100-ar": { |
|
"vocab_size": 8021, |
|
"n_bytes": 2813283, |
|
"n_tokens": 621762, |
|
"n_chars": 1560987 |
|
}, |
|
"skywork_13b_base.cc100-ar": { |
|
"vocab_size": 65519, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1432065, |
|
"n_chars": 1560987 |
|
}, |
|
"skywork_13b_math.cc100-ar": { |
|
"vocab_size": 65519, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1432065, |
|
"n_chars": 1560987 |
|
}, |
|
"solar_10_7b.cc100-ar": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1406319, |
|
"n_chars": 1560987 |
|
}, |
|
"starchat_alpha.cc100-ar": { |
|
"vocab_size": 49156, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1195640, |
|
"n_chars": 1560987 |
|
}, |
|
"switch_c_2048.cc100-ar": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2813283, |
|
"n_tokens": 568855, |
|
"n_chars": 1560987 |
|
}, |
|
"t5_base.cc100-ar": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2813283, |
|
"n_tokens": 568855, |
|
"n_chars": 1560987 |
|
}, |
|
"t5_large.cc100-ar": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2813283, |
|
"n_tokens": 568855, |
|
"n_chars": 1560987 |
|
}, |
|
"t5_small.cc100-ar": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2813283, |
|
"n_tokens": 568855, |
|
"n_chars": 1560987 |
|
}, |
|
"text_davinci_003.cc100-ar": { |
|
"vocab_size": 50281, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1558111, |
|
"n_chars": 1560987 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-ar": { |
|
"vocab_size": 60515, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1422070, |
|
"n_chars": 1560987 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-ar": { |
|
"vocab_size": 65110, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1422073, |
|
"n_chars": 1560987 |
|
}, |
|
"wizardcoder_15b_v1.cc100-ar": { |
|
"vocab_size": 49153, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1195640, |
|
"n_chars": 1560987 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-ar": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1432081, |
|
"n_chars": 1560987 |
|
}, |
|
"wizardlm_7b_v1.cc100-ar": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1432081, |
|
"n_chars": 1560987 |
|
}, |
|
"wizardmath_70b_v1.cc100-ar": { |
|
"vocab_size": 32002, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1432081, |
|
"n_chars": 1560987 |
|
}, |
|
"xlm_roberta.cc100-ar": { |
|
"vocab_size": 250002, |
|
"n_bytes": 2813283, |
|
"n_tokens": 518287, |
|
"n_chars": 1560987 |
|
}, |
|
"yi_34b.cc100-ar": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1795801, |
|
"n_chars": 1560987 |
|
}, |
|
"yi_6b.cc100-ar": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1795801, |
|
"n_chars": 1560987 |
|
}, |
|
"yi_vl34b.cc100-ar": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1803957, |
|
"n_chars": 1560987 |
|
}, |
|
"zephyr_7b_beta.cc100-ar": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2813283, |
|
"n_tokens": 1406319, |
|
"n_chars": 1560987 |
|
}, |
|
"aya_101.cc100-de": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1814876, |
|
"n_tokens": 480418, |
|
"n_chars": 1784021 |
|
}, |
|
"baichuan.cc100-de": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 680512, |
|
"n_chars": 1784021 |
|
}, |
|
"baichuan2.cc100-de": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1814876, |
|
"n_tokens": 628063, |
|
"n_chars": 1784021 |
|
}, |
|
"bert_base_cased.cc100-de": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1814876, |
|
"n_tokens": 731093, |
|
"n_chars": 1784021 |
|
}, |
|
"bert_base_chinese.cc100-de": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1814876, |
|
"n_tokens": 561246, |
|
"n_chars": 1784021 |
|
}, |
|
"bert_base_uncased.cc100-de": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1814876, |
|
"n_tokens": 646485, |
|
"n_chars": 1784021 |
|
}, |
|
"bloom.cc100-de": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1814876, |
|
"n_tokens": 541170, |
|
"n_chars": 1784021 |
|
}, |
|
"byt5_small.cc100-de": { |
|
"vocab_size": 384, |
|
"n_bytes": 1814876, |
|
"n_tokens": 1824876, |
|
"n_chars": 1784021 |
|
}, |
|
"character_glm_6b.cc100-de": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1814876, |
|
"n_tokens": 639822, |
|
"n_chars": 1784021 |
|
}, |
|
"chatglm2_6b.cc100-de": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1814876, |
|
"n_tokens": 639757, |
|
"n_chars": 1784021 |
|
}, |
|
"chatglm3_6b.cc100-de": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1814876, |
|
"n_tokens": 639822, |
|
"n_chars": 1784021 |
|
}, |
|
"chatglm_6b.cc100-de": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1814876, |
|
"n_tokens": 589464, |
|
"n_chars": 1784021 |
|
}, |
|
"chatyuan_large_v2.cc100-de": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1814876, |
|
"n_tokens": 970463, |
|
"n_chars": 1784021 |
|
}, |
|
"chinese_llama.cc100-de": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1814876, |
|
"n_tokens": 523859, |
|
"n_chars": 1784021 |
|
}, |
|
"chinese_llama2.cc100-de": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1814876, |
|
"n_tokens": 537318, |
|
"n_chars": 1784021 |
|
}, |
|
"code_davinci_002.cc100-de": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1814876, |
|
"n_tokens": 684666, |
|
"n_chars": 1784021 |
|
}, |
|
"crystal_coder.cc100-de": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1814876, |
|
"n_tokens": 527320, |
|
"n_chars": 1784021 |
|
}, |
|
"dbrx_instruct.cc100-de": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1814876, |
|
"n_tokens": 500870, |
|
"n_chars": 1784021 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-de": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1814876, |
|
"n_tokens": 745618, |
|
"n_chars": 1784021 |
|
}, |
|
"deepseek_llm_7b_base.cc100-de": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1814876, |
|
"n_tokens": 642573, |
|
"n_chars": 1784021 |
|
}, |
|
"falcon_180b.cc100-de": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1814876, |
|
"n_tokens": 497054, |
|
"n_chars": 1784021 |
|
}, |
|
"falcon_7b.cc100-de": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1814876, |
|
"n_tokens": 497054, |
|
"n_chars": 1784021 |
|
}, |
|
"fastchat_t5_3b.cc100-de": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1814876, |
|
"n_tokens": 736989, |
|
"n_chars": 1784021 |
|
}, |
|
"flan_t5_base.cc100-de": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1814876, |
|
"n_tokens": 480254, |
|
"n_chars": 1784021 |
|
}, |
|
"gemma_7b.cc100-de": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 416876, |
|
"n_chars": 1784021 |
|
}, |
|
"gpt2.cc100-de": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1814876, |
|
"n_tokens": 684669, |
|
"n_chars": 1784021 |
|
}, |
|
"gpt2_chinese.cc100-de": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1814876, |
|
"n_tokens": 786497, |
|
"n_chars": 1784021 |
|
}, |
|
"gpt_35_turbo.cc100-de": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1814876, |
|
"n_tokens": 500870, |
|
"n_chars": 1784021 |
|
}, |
|
"gpt_4.cc100-de": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1814876, |
|
"n_tokens": 500870, |
|
"n_chars": 1784021 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-de": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 1807780, |
|
"n_chars": 1784021 |
|
}, |
|
"gpt_nexo_20b.cc100-de": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1814876, |
|
"n_tokens": 583628, |
|
"n_chars": 1784021 |
|
}, |
|
"grok_1.cc100-de": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1814876, |
|
"n_tokens": 505220, |
|
"n_chars": 1784021 |
|
}, |
|
"internlm2_chat_7b.cc100-de": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1814876, |
|
"n_tokens": 583917, |
|
"n_chars": 1784021 |
|
}, |
|
"internlm2_math_7b.cc100-de": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1814876, |
|
"n_tokens": 583917, |
|
"n_chars": 1784021 |
|
}, |
|
"internlm_chat_7b.cc100-de": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1814876, |
|
"n_tokens": 580489, |
|
"n_chars": 1784021 |
|
}, |
|
"internlm_xcomposer_7b.cc100-de": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1814876, |
|
"n_tokens": 580489, |
|
"n_chars": 1784021 |
|
}, |
|
"jamba_v0_1.cc100-de": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1814876, |
|
"n_tokens": 535856, |
|
"n_chars": 1784021 |
|
}, |
|
"kplug.cc100-de": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1814876, |
|
"n_tokens": 789053, |
|
"n_chars": 1784021 |
|
}, |
|
"llama.cc100-de": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 537320, |
|
"n_chars": 1784021 |
|
}, |
|
"llama2.cc100-de": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1814876, |
|
"n_tokens": 537320, |
|
"n_chars": 1784021 |
|
}, |
|
"llama3.cc100-de": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1814876, |
|
"n_tokens": 499766, |
|
"n_chars": 1784021 |
|
}, |
|
"mistral_7b.cc100-de": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 577526, |
|
"n_chars": 1784021 |
|
}, |
|
"mixtral_8_7b.cc100-de": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 577526, |
|
"n_chars": 1784021 |
|
}, |
|
"mobilebert_uncased.cc100-de": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1814876, |
|
"n_tokens": 646485, |
|
"n_chars": 1784021 |
|
}, |
|
"moss.cc100-de": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1814876, |
|
"n_tokens": 683401, |
|
"n_chars": 1784021 |
|
}, |
|
"mt5_large.cc100-de": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1814876, |
|
"n_tokens": 480418, |
|
"n_chars": 1784021 |
|
}, |
|
"olmo_7b.cc100-de": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1814876, |
|
"n_tokens": 583628, |
|
"n_chars": 1784021 |
|
}, |
|
"orion_14b_chat.cc100-de": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1814876, |
|
"n_tokens": 744404, |
|
"n_chars": 1784021 |
|
}, |
|
"phi_1.cc100-de": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1814876, |
|
"n_tokens": 684665, |
|
"n_chars": 1784021 |
|
}, |
|
"phi_2.cc100-de": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1814876, |
|
"n_tokens": 684665, |
|
"n_chars": 1784021 |
|
}, |
|
"phi_3_mini.cc100-de": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1814876, |
|
"n_tokens": 537320, |
|
"n_chars": 1784021 |
|
}, |
|
"pko_t5_large.cc100-de": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1814876, |
|
"n_tokens": 1254350, |
|
"n_chars": 1784021 |
|
}, |
|
"prompt_clue.cc100-de": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1814876, |
|
"n_tokens": 970463, |
|
"n_chars": 1784021 |
|
}, |
|
"qwen1_5_14b_chat.cc100-de": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1814876, |
|
"n_tokens": 503561, |
|
"n_chars": 1784021 |
|
}, |
|
"qwen_1_8b_chat.cc100-de": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1814876, |
|
"n_tokens": 503561, |
|
"n_chars": 1784021 |
|
}, |
|
"qwen_72b_chat.cc100-de": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1814876, |
|
"n_tokens": 503561, |
|
"n_chars": 1784021 |
|
}, |
|
"qwen_7b_chat.cc100-de": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1814876, |
|
"n_tokens": 503561, |
|
"n_chars": 1784021 |
|
}, |
|
"roberta_chinese_clue.cc100-de": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1814876, |
|
"n_tokens": 915612, |
|
"n_chars": 1784021 |
|
}, |
|
"skywork_13b_base.cc100-de": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1814876, |
|
"n_tokens": 537308, |
|
"n_chars": 1784021 |
|
}, |
|
"skywork_13b_math.cc100-de": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1814876, |
|
"n_tokens": 537308, |
|
"n_chars": 1784021 |
|
}, |
|
"solar_10_7b.cc100-de": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 577526, |
|
"n_chars": 1784021 |
|
}, |
|
"starchat_alpha.cc100-de": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1814876, |
|
"n_tokens": 620541, |
|
"n_chars": 1784021 |
|
}, |
|
"switch_c_2048.cc100-de": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1814876, |
|
"n_tokens": 480254, |
|
"n_chars": 1784021 |
|
}, |
|
"t5_base.cc100-de": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1814876, |
|
"n_tokens": 480254, |
|
"n_chars": 1784021 |
|
}, |
|
"t5_large.cc100-de": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1814876, |
|
"n_tokens": 480254, |
|
"n_chars": 1784021 |
|
}, |
|
"t5_small.cc100-de": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1814876, |
|
"n_tokens": 480254, |
|
"n_chars": 1784021 |
|
}, |
|
"text_davinci_003.cc100-de": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1814876, |
|
"n_tokens": 684666, |
|
"n_chars": 1784021 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-de": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1814876, |
|
"n_tokens": 528918, |
|
"n_chars": 1784021 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-de": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1814876, |
|
"n_tokens": 529170, |
|
"n_chars": 1784021 |
|
}, |
|
"wizardcoder_15b_v1.cc100-de": { |
|
"vocab_size": 49153, |
|
"n_bytes": 1814876, |
|
"n_tokens": 620541, |
|
"n_chars": 1784021 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-de": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1814876, |
|
"n_tokens": 537320, |
|
"n_chars": 1784021 |
|
}, |
|
"wizardlm_7b_v1.cc100-de": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1814876, |
|
"n_tokens": 537320, |
|
"n_chars": 1784021 |
|
}, |
|
"wizardmath_70b_v1.cc100-de": { |
|
"vocab_size": 32002, |
|
"n_bytes": 1814876, |
|
"n_tokens": 537320, |
|
"n_chars": 1784021 |
|
}, |
|
"xlm_roberta.cc100-de": { |
|
"vocab_size": 250002, |
|
"n_bytes": 1814876, |
|
"n_tokens": 432571, |
|
"n_chars": 1784021 |
|
}, |
|
"yi_34b.cc100-de": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 698366, |
|
"n_chars": 1784021 |
|
}, |
|
"yi_6b.cc100-de": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 698366, |
|
"n_chars": 1784021 |
|
}, |
|
"yi_vl34b.cc100-de": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 697065, |
|
"n_chars": 1784021 |
|
}, |
|
"zephyr_7b_beta.cc100-de": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1814876, |
|
"n_tokens": 577526, |
|
"n_chars": 1784021 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 1658946, |
|
"n_chars": 1630297 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-fr": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1540504, |
|
"n_tokens": 1524129, |
|
"n_chars": 1484970 |
|
}, |
|
"character_glm_6b.cc100-ja": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1774770, |
|
"n_tokens": 601380, |
|
"n_chars": 603065 |
|
}, |
|
"chatglm2_6b.cc100-ja": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1774770, |
|
"n_tokens": 601380, |
|
"n_chars": 603065 |
|
}, |
|
"chatglm3_6b.cc100-ja": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1774770, |
|
"n_tokens": 601380, |
|
"n_chars": 603065 |
|
}, |
|
"chatglm_6b.cc100-ja": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1774770, |
|
"n_tokens": 489930, |
|
"n_chars": 603065 |
|
}, |
|
"chatyuan_large_v2.cc100-ja": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1774770, |
|
"n_tokens": 575118, |
|
"n_chars": 603065 |
|
}, |
|
"chinese_llama.cc100-ja": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1774770, |
|
"n_tokens": 614177, |
|
"n_chars": 603065 |
|
}, |
|
"chinese_llama2.cc100-ja": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1774770, |
|
"n_tokens": 624362, |
|
"n_chars": 603065 |
|
}, |
|
"code_davinci_002.cc100-ja": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1774770, |
|
"n_tokens": 844362, |
|
"n_chars": 603065 |
|
}, |
|
"crystal_coder.cc100-ja": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1774770, |
|
"n_tokens": 718461, |
|
"n_chars": 603065 |
|
}, |
|
"dbrx_instruct.cc100-ja": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1774770, |
|
"n_tokens": 630348, |
|
"n_chars": 603065 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-ja": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1774770, |
|
"n_tokens": 1018060, |
|
"n_chars": 603065 |
|
}, |
|
"deepseek_llm_7b_base.cc100-ja": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1774770, |
|
"n_tokens": 761467, |
|
"n_chars": 603065 |
|
}, |
|
"falcon_180b.cc100-ja": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1774770, |
|
"n_tokens": 842458, |
|
"n_chars": 603065 |
|
}, |
|
"falcon_7b.cc100-ja": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1774770, |
|
"n_tokens": 842458, |
|
"n_chars": 603065 |
|
}, |
|
"fastchat_t5_3b.cc100-ja": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1774770, |
|
"n_tokens": 53915, |
|
"n_chars": 603065 |
|
}, |
|
"flan_t5_base.cc100-ja": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1774770, |
|
"n_tokens": 51999, |
|
"n_chars": 603065 |
|
}, |
|
"gemma_7b.cc100-ja": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 317873, |
|
"n_chars": 603065 |
|
}, |
|
"gpt2.cc100-ja": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1774770, |
|
"n_tokens": 844362, |
|
"n_chars": 603065 |
|
}, |
|
"gpt2_chinese.cc100-ja": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1774770, |
|
"n_tokens": 503085, |
|
"n_chars": 603065 |
|
}, |
|
"gpt_35_turbo.cc100-ja": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1774770, |
|
"n_tokens": 630348, |
|
"n_chars": 603065 |
|
}, |
|
"gpt_4.cc100-ja": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1774770, |
|
"n_tokens": 630348, |
|
"n_chars": 603065 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-ja": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 410803, |
|
"n_chars": 603065 |
|
}, |
|
"gpt_nexo_20b.cc100-ja": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1774770, |
|
"n_tokens": 605168, |
|
"n_chars": 603065 |
|
}, |
|
"grok_1.cc100-ja": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1774770, |
|
"n_tokens": 497590, |
|
"n_chars": 603065 |
|
}, |
|
"internlm2_chat_7b.cc100-ja": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1774770, |
|
"n_tokens": 595803, |
|
"n_chars": 603065 |
|
}, |
|
"internlm2_math_7b.cc100-ja": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1774770, |
|
"n_tokens": 595803, |
|
"n_chars": 603065 |
|
}, |
|
"internlm_chat_7b.cc100-ja": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1774770, |
|
"n_tokens": 448212, |
|
"n_chars": 603065 |
|
}, |
|
"internlm_xcomposer_7b.cc100-ja": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1774770, |
|
"n_tokens": 448212, |
|
"n_chars": 603065 |
|
}, |
|
"jamba_v0_1.cc100-ja": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1774770, |
|
"n_tokens": 683256, |
|
"n_chars": 603065 |
|
}, |
|
"kplug.cc100-ja": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1774770, |
|
"n_tokens": 338023, |
|
"n_chars": 603065 |
|
}, |
|
"llama.cc100-ja": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 728461, |
|
"n_chars": 603065 |
|
}, |
|
"llama2.cc100-ja": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1774770, |
|
"n_tokens": 728461, |
|
"n_chars": 603065 |
|
}, |
|
"llama3.cc100-ja": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1774770, |
|
"n_tokens": 414715, |
|
"n_chars": 603065 |
|
}, |
|
"mistral_7b.cc100-ja": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 685134, |
|
"n_chars": 603065 |
|
}, |
|
"mixtral_8_7b.cc100-ja": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 685134, |
|
"n_chars": 603065 |
|
}, |
|
"mobilebert_uncased.cc100-ja": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1774770, |
|
"n_tokens": 580634, |
|
"n_chars": 603065 |
|
}, |
|
"moss.cc100-ja": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1774770, |
|
"n_tokens": 600011, |
|
"n_chars": 603065 |
|
}, |
|
"mt5_large.cc100-ja": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1774770, |
|
"n_tokens": 300542, |
|
"n_chars": 603065 |
|
}, |
|
"olmo_7b.cc100-ja": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1774770, |
|
"n_tokens": 605168, |
|
"n_chars": 603065 |
|
}, |
|
"orion_14b_chat.cc100-ja": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1774770, |
|
"n_tokens": 324956, |
|
"n_chars": 603065 |
|
}, |
|
"phi_1.cc100-ja": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1774770, |
|
"n_tokens": 844362, |
|
"n_chars": 603065 |
|
}, |
|
"phi_2.cc100-ja": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1774770, |
|
"n_tokens": 844362, |
|
"n_chars": 603065 |
|
}, |
|
"phi_3_mini.cc100-ja": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1774770, |
|
"n_tokens": 728461, |
|
"n_chars": 603065 |
|
}, |
|
"pko_t5_large.cc100-ja": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1774770, |
|
"n_tokens": 1766950, |
|
"n_chars": 603065 |
|
}, |
|
"prompt_clue.cc100-ja": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1774770, |
|
"n_tokens": 575118, |
|
"n_chars": 603065 |
|
}, |
|
"qwen1_5_14b_chat.cc100-ja": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1774770, |
|
"n_tokens": 377144, |
|
"n_chars": 603065 |
|
}, |
|
"qwen_1_8b_chat.cc100-ja": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1774770, |
|
"n_tokens": 377144, |
|
"n_chars": 603065 |
|
}, |
|
"qwen_72b_chat.cc100-ja": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1774770, |
|
"n_tokens": 377144, |
|
"n_chars": 603065 |
|
}, |
|
"qwen_7b_chat.cc100-ja": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1774770, |
|
"n_tokens": 377144, |
|
"n_chars": 603065 |
|
}, |
|
"roberta_chinese_clue.cc100-ja": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1774770, |
|
"n_tokens": 339411, |
|
"n_chars": 603065 |
|
}, |
|
"skywork_13b_base.cc100-ja": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1774770, |
|
"n_tokens": 603613, |
|
"n_chars": 603065 |
|
}, |
|
"skywork_13b_math.cc100-ja": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1774770, |
|
"n_tokens": 603613, |
|
"n_chars": 603065 |
|
}, |
|
"solar_10_7b.cc100-ja": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 685134, |
|
"n_chars": 603065 |
|
}, |
|
"starchat_alpha.cc100-ja": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1774770, |
|
"n_tokens": 546876, |
|
"n_chars": 603065 |
|
}, |
|
"switch_c_2048.cc100-ja": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1774770, |
|
"n_tokens": 51947, |
|
"n_chars": 603065 |
|
}, |
|
"t5_base.cc100-ja": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1774770, |
|
"n_tokens": 51947, |
|
"n_chars": 603065 |
|
}, |
|
"t5_large.cc100-ja": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1774770, |
|
"n_tokens": 51947, |
|
"n_chars": 603065 |
|
}, |
|
"t5_small.cc100-ja": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1774770, |
|
"n_tokens": 51947, |
|
"n_chars": 603065 |
|
}, |
|
"text_davinci_003.cc100-ja": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1774770, |
|
"n_tokens": 844362, |
|
"n_chars": 603065 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-ja": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1774770, |
|
"n_tokens": 567792, |
|
"n_chars": 603065 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-ja": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1774770, |
|
"n_tokens": 406571, |
|
"n_chars": 603065 |
|
}, |
|
"wizardcoder_15b_v1.cc100-ja": { |
|
"vocab_size": 49153, |
|
"n_bytes": 1774770, |
|
"n_tokens": 546876, |
|
"n_chars": 603065 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-ja": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1774770, |
|
"n_tokens": 728461, |
|
"n_chars": 603065 |
|
}, |
|
"wizardlm_7b_v1.cc100-ja": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1774770, |
|
"n_tokens": 728461, |
|
"n_chars": 603065 |
|
}, |
|
"wizardmath_70b_v1.cc100-ja": { |
|
"vocab_size": 32002, |
|
"n_bytes": 1774770, |
|
"n_tokens": 728461, |
|
"n_chars": 603065 |
|
}, |
|
"xlm_roberta.cc100-ja": { |
|
"vocab_size": 250002, |
|
"n_bytes": 1774770, |
|
"n_tokens": 344820, |
|
"n_chars": 603065 |
|
}, |
|
"yi_34b.cc100-ja": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 740791, |
|
"n_chars": 603065 |
|
}, |
|
"yi_6b.cc100-ja": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 740791, |
|
"n_chars": 603065 |
|
}, |
|
"yi_vl34b.cc100-ja": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 749927, |
|
"n_chars": 603065 |
|
}, |
|
"zephyr_7b_beta.cc100-ja": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1774770, |
|
"n_tokens": 685134, |
|
"n_chars": 603065 |
|
}, |
|
"llama_3_chinese_8b.cc100-ar": { |
|
"vocab_size": 128256, |
|
"n_bytes": 2813283, |
|
"n_tokens": 625514, |
|
"n_chars": 1560987 |
|
}, |
|
"llama_3_chinese_8b.cc100-de": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1814876, |
|
"n_tokens": 509766, |
|
"n_chars": 1784021 |
|
}, |
|
"llama_3_chinese_8b.cc100-en": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1124813, |
|
"n_tokens": 264944, |
|
"n_chars": 1121360 |
|
}, |
|
"llama_3_chinese_8b.cc100-es": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1664455, |
|
"n_tokens": 443289, |
|
"n_chars": 1630297 |
|
}, |
|
"aya_101.cc100-fa": { |
|
"vocab_size": 250100, |
|
"n_bytes": 2054052, |
|
"n_tokens": 429922, |
|
"n_chars": 1145876 |
|
}, |
|
"baichuan.cc100-fa": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1142057, |
|
"n_chars": 1145876 |
|
}, |
|
"baichuan2.cc100-fa": { |
|
"vocab_size": 125696, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1052077, |
|
"n_chars": 1145876 |
|
}, |
|
"bert_base_cased.cc100-fa": { |
|
"vocab_size": 28996, |
|
"n_bytes": 2054052, |
|
"n_tokens": 903078, |
|
"n_chars": 1145876 |
|
}, |
|
"bert_base_chinese.cc100-fa": { |
|
"vocab_size": 21128, |
|
"n_bytes": 2054052, |
|
"n_tokens": 396414, |
|
"n_chars": 1145876 |
|
}, |
|
"bert_base_uncased.cc100-fa": { |
|
"vocab_size": 30522, |
|
"n_bytes": 2054052, |
|
"n_tokens": 910783, |
|
"n_chars": 1145876 |
|
}, |
|
"bloom.cc100-fa": { |
|
"vocab_size": 250680, |
|
"n_bytes": 2054052, |
|
"n_tokens": 434406, |
|
"n_chars": 1145876 |
|
}, |
|
"byt5_small.cc100-fa": { |
|
"vocab_size": 384, |
|
"n_bytes": 2054052, |
|
"n_tokens": 2064052, |
|
"n_chars": 1145876 |
|
}, |
|
"character_glm_6b.cc100-fa": { |
|
"vocab_size": 64789, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1165051, |
|
"n_chars": 1145876 |
|
}, |
|
"chatglm2_6b.cc100-fa": { |
|
"vocab_size": 64787, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1165051, |
|
"n_chars": 1145876 |
|
}, |
|
"chatglm3_6b.cc100-fa": { |
|
"vocab_size": 64796, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1165051, |
|
"n_chars": 1145876 |
|
}, |
|
"chatglm_6b.cc100-fa": { |
|
"vocab_size": 150344, |
|
"n_bytes": 2054052, |
|
"n_tokens": 910808, |
|
"n_chars": 1145876 |
|
}, |
|
"chatyuan_large_v2.cc100-fa": { |
|
"vocab_size": 32128, |
|
"n_bytes": 2054052, |
|
"n_tokens": 740377, |
|
"n_chars": 1145876 |
|
}, |
|
"chinese_llama.cc100-fa": { |
|
"vocab_size": 49953, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1150750, |
|
"n_chars": 1145876 |
|
}, |
|
"chinese_llama2.cc100-fa": { |
|
"vocab_size": 55296, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1155078, |
|
"n_chars": 1145876 |
|
}, |
|
"code_davinci_002.cc100-fa": { |
|
"vocab_size": 50281, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1292300, |
|
"n_chars": 1145876 |
|
}, |
|
"crystal_coder.cc100-fa": { |
|
"vocab_size": 32022, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1145076, |
|
"n_chars": 1145876 |
|
}, |
|
"dbrx_instruct.cc100-fa": { |
|
"vocab_size": 100280, |
|
"n_bytes": 2054052, |
|
"n_tokens": 818067, |
|
"n_chars": 1145876 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-fa": { |
|
"vocab_size": 32022, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1326109, |
|
"n_chars": 1145876 |
|
}, |
|
"deepseek_llm_7b_base.cc100-fa": { |
|
"vocab_size": 100015, |
|
"n_bytes": 2054052, |
|
"n_tokens": 973451, |
|
"n_chars": 1145876 |
|
}, |
|
"falcon_180b.cc100-fa": { |
|
"vocab_size": 65024, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1246580, |
|
"n_chars": 1145876 |
|
}, |
|
"falcon_7b.cc100-fa": { |
|
"vocab_size": 65024, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1246580, |
|
"n_chars": 1145876 |
|
}, |
|
"fastchat_t5_3b.cc100-fa": { |
|
"vocab_size": 32110, |
|
"n_bytes": 2054052, |
|
"n_tokens": 712443, |
|
"n_chars": 1145876 |
|
}, |
|
"flan_t5_base.cc100-fa": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2054052, |
|
"n_tokens": 493779, |
|
"n_chars": 1145876 |
|
}, |
|
"gemma_7b.cc100-fa": { |
|
"vocab_size": 256000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 373762, |
|
"n_chars": 1145876 |
|
}, |
|
"gpt2.cc100-fa": { |
|
"vocab_size": 50257, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1292300, |
|
"n_chars": 1145876 |
|
}, |
|
"gpt2_chinese.cc100-fa": { |
|
"vocab_size": 21128, |
|
"n_bytes": 2054052, |
|
"n_tokens": 406174, |
|
"n_chars": 1145876 |
|
}, |
|
"gpt_35_turbo.cc100-fa": { |
|
"vocab_size": 100277, |
|
"n_bytes": 2054052, |
|
"n_tokens": 818067, |
|
"n_chars": 1145876 |
|
}, |
|
"gpt_4.cc100-fa": { |
|
"vocab_size": 100277, |
|
"n_bytes": 2054052, |
|
"n_tokens": 818067, |
|
"n_chars": 1145876 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-fa": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 2036715, |
|
"n_chars": 1145876 |
|
}, |
|
"gpt_nexo_20b.cc100-fa": { |
|
"vocab_size": 50277, |
|
"n_bytes": 2054052, |
|
"n_tokens": 866434, |
|
"n_chars": 1145876 |
|
}, |
|
"grok_1.cc100-fa": { |
|
"vocab_size": 131072, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1073281, |
|
"n_chars": 1145876 |
|
}, |
|
"internlm2_chat_7b.cc100-fa": { |
|
"vocab_size": 92544, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1195032, |
|
"n_chars": 1145876 |
|
}, |
|
"internlm2_math_7b.cc100-fa": { |
|
"vocab_size": 92544, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1195032, |
|
"n_chars": 1145876 |
|
}, |
|
"internlm_chat_7b.cc100-fa": { |
|
"vocab_size": 103168, |
|
"n_bytes": 2054052, |
|
"n_tokens": 640945, |
|
"n_chars": 1145876 |
|
}, |
|
"internlm_xcomposer_7b.cc100-fa": { |
|
"vocab_size": 103168, |
|
"n_bytes": 2054052, |
|
"n_tokens": 640945, |
|
"n_chars": 1145876 |
|
}, |
|
"jamba_v0_1.cc100-fa": { |
|
"vocab_size": 65536, |
|
"n_bytes": 2054052, |
|
"n_tokens": 732550, |
|
"n_chars": 1145876 |
|
}, |
|
"kplug.cc100-fa": { |
|
"vocab_size": 10261, |
|
"n_bytes": 2054052, |
|
"n_tokens": 274671, |
|
"n_chars": 1145876 |
|
}, |
|
"llama.cc100-fa": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1155076, |
|
"n_chars": 1145876 |
|
}, |
|
"llama2.cc100-fa": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1155076, |
|
"n_chars": 1145876 |
|
}, |
|
"llama3.cc100-fa": { |
|
"vocab_size": 128256, |
|
"n_bytes": 2054052, |
|
"n_tokens": 387448, |
|
"n_chars": 1145876 |
|
}, |
|
"llama_3_chinese_8b.cc100-fa": { |
|
"vocab_size": 128256, |
|
"n_bytes": 2054052, |
|
"n_tokens": 397448, |
|
"n_chars": 1145876 |
|
}, |
|
"mistral_7b.cc100-fa": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1133278, |
|
"n_chars": 1145876 |
|
}, |
|
"mixtral_8_7b.cc100-fa": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1133278, |
|
"n_chars": 1145876 |
|
}, |
|
"mobilebert_uncased.cc100-fa": { |
|
"vocab_size": 30522, |
|
"n_bytes": 2054052, |
|
"n_tokens": 910783, |
|
"n_chars": 1145876 |
|
}, |
|
"moss.cc100-fa": { |
|
"vocab_size": 106072, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1285426, |
|
"n_chars": 1145876 |
|
}, |
|
"mt5_large.cc100-fa": { |
|
"vocab_size": 250100, |
|
"n_bytes": 2054052, |
|
"n_tokens": 429922, |
|
"n_chars": 1145876 |
|
}, |
|
"olmo_7b.cc100-fa": { |
|
"vocab_size": 50280, |
|
"n_bytes": 2054052, |
|
"n_tokens": 866434, |
|
"n_chars": 1145876 |
|
}, |
|
"orion_14b_chat.cc100-fa": { |
|
"vocab_size": 84608, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1131108, |
|
"n_chars": 1145876 |
|
}, |
|
"phi_1.cc100-fa": { |
|
"vocab_size": 50295, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1292300, |
|
"n_chars": 1145876 |
|
}, |
|
"phi_2.cc100-fa": { |
|
"vocab_size": 50295, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1292300, |
|
"n_chars": 1145876 |
|
}, |
|
"phi_3_mini.cc100-fa": { |
|
"vocab_size": 32011, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1155076, |
|
"n_chars": 1145876 |
|
}, |
|
"pko_t5_large.cc100-fa": { |
|
"vocab_size": 50358, |
|
"n_bytes": 2054052, |
|
"n_tokens": 2061040, |
|
"n_chars": 1145876 |
|
}, |
|
"prompt_clue.cc100-fa": { |
|
"vocab_size": 32128, |
|
"n_bytes": 2054052, |
|
"n_tokens": 740377, |
|
"n_chars": 1145876 |
|
}, |
|
"qwen1_5_14b_chat.cc100-fa": { |
|
"vocab_size": 151646, |
|
"n_bytes": 2054052, |
|
"n_tokens": 643421, |
|
"n_chars": 1145876 |
|
}, |
|
"qwen_1_8b_chat.cc100-fa": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2054052, |
|
"n_tokens": 643421, |
|
"n_chars": 1145876 |
|
}, |
|
"qwen_72b_chat.cc100-fa": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2054052, |
|
"n_tokens": 643421, |
|
"n_chars": 1145876 |
|
}, |
|
"qwen_7b_chat.cc100-fa": { |
|
"vocab_size": 151851, |
|
"n_bytes": 2054052, |
|
"n_tokens": 643421, |
|
"n_chars": 1145876 |
|
}, |
|
"roberta_chinese_clue.cc100-fa": { |
|
"vocab_size": 8021, |
|
"n_bytes": 2054052, |
|
"n_tokens": 407763, |
|
"n_chars": 1145876 |
|
}, |
|
"skywork_13b_base.cc100-fa": { |
|
"vocab_size": 65519, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1155072, |
|
"n_chars": 1145876 |
|
}, |
|
"skywork_13b_math.cc100-fa": { |
|
"vocab_size": 65519, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1155072, |
|
"n_chars": 1145876 |
|
}, |
|
"solar_10_7b.cc100-fa": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1133278, |
|
"n_chars": 1145876 |
|
}, |
|
"starchat_alpha.cc100-fa": { |
|
"vocab_size": 49156, |
|
"n_bytes": 2054052, |
|
"n_tokens": 851630, |
|
"n_chars": 1145876 |
|
}, |
|
"switch_c_2048.cc100-fa": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2054052, |
|
"n_tokens": 493767, |
|
"n_chars": 1145876 |
|
}, |
|
"t5_base.cc100-fa": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2054052, |
|
"n_tokens": 493767, |
|
"n_chars": 1145876 |
|
}, |
|
"t5_large.cc100-fa": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2054052, |
|
"n_tokens": 493767, |
|
"n_chars": 1145876 |
|
}, |
|
"t5_small.cc100-fa": { |
|
"vocab_size": 32100, |
|
"n_bytes": 2054052, |
|
"n_tokens": 493767, |
|
"n_chars": 1145876 |
|
}, |
|
"text_davinci_003.cc100-fa": { |
|
"vocab_size": 50281, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1292300, |
|
"n_chars": 1145876 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-fa": { |
|
"vocab_size": 60515, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1145046, |
|
"n_chars": 1145876 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-fa": { |
|
"vocab_size": 65110, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1145048, |
|
"n_chars": 1145876 |
|
}, |
|
"wizardcoder_15b_v1.cc100-fa": { |
|
"vocab_size": 49153, |
|
"n_bytes": 2054052, |
|
"n_tokens": 851630, |
|
"n_chars": 1145876 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-fa": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1155076, |
|
"n_chars": 1145876 |
|
}, |
|
"wizardlm_7b_v1.cc100-fa": { |
|
"vocab_size": 32001, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1155076, |
|
"n_chars": 1145876 |
|
}, |
|
"wizardmath_70b_v1.cc100-fa": { |
|
"vocab_size": 32002, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1155076, |
|
"n_chars": 1145876 |
|
}, |
|
"xlm_roberta.cc100-fa": { |
|
"vocab_size": 250002, |
|
"n_bytes": 2054052, |
|
"n_tokens": 330926, |
|
"n_chars": 1145876 |
|
}, |
|
"yi_34b.cc100-fa": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1337264, |
|
"n_chars": 1145876 |
|
}, |
|
"yi_6b.cc100-fa": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1337264, |
|
"n_chars": 1145876 |
|
}, |
|
"yi_vl34b.cc100-fa": { |
|
"vocab_size": 64000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1346819, |
|
"n_chars": 1145876 |
|
}, |
|
"zephyr_7b_beta.cc100-fa": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2054052, |
|
"n_tokens": 1133278, |
|
"n_chars": 1145876 |
|
}, |
|
"llama_3_chinese_8b.cc100-fr": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1540504, |
|
"n_tokens": 422146, |
|
"n_chars": 1484970 |
|
}, |
|
"llama_3_chinese_8b.cc100-ja": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1774770, |
|
"n_tokens": 424715, |
|
"n_chars": 603065 |
|
}, |
|
"aya_101.cc100-ko": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1524839, |
|
"n_tokens": 434586, |
|
"n_chars": 655190 |
|
}, |
|
"baichuan.cc100-ko": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 639258, |
|
"n_chars": 655190 |
|
}, |
|
"baichuan2.cc100-ko": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1524839, |
|
"n_tokens": 623358, |
|
"n_chars": 655190 |
|
}, |
|
"bert_base_cased.cc100-ko": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1524839, |
|
"n_tokens": 222828, |
|
"n_chars": 655190 |
|
}, |
|
"bert_base_chinese.cc100-ko": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1524839, |
|
"n_tokens": 219752, |
|
"n_chars": 655190 |
|
}, |
|
"bert_base_uncased.cc100-ko": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1524839, |
|
"n_tokens": 904756, |
|
"n_chars": 655190 |
|
}, |
|
"bloom.cc100-ko": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1524839, |
|
"n_tokens": 742111, |
|
"n_chars": 655190 |
|
}, |
|
"byt5_small.cc100-ko": { |
|
"vocab_size": 384, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1534839, |
|
"n_chars": 655190 |
|
}, |
|
"character_glm_6b.cc100-ko": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1524839, |
|
"n_tokens": 672160, |
|
"n_chars": 655190 |
|
}, |
|
"chatglm2_6b.cc100-ko": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1524839, |
|
"n_tokens": 672156, |
|
"n_chars": 655190 |
|
}, |
|
"chatglm3_6b.cc100-ko": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1524839, |
|
"n_tokens": 672160, |
|
"n_chars": 655190 |
|
}, |
|
"chatglm_6b.cc100-ko": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1524839, |
|
"n_tokens": 939630, |
|
"n_chars": 655190 |
|
}, |
|
"chatyuan_large_v2.cc100-ko": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1524839, |
|
"n_tokens": 354411, |
|
"n_chars": 655190 |
|
}, |
|
"chinese_llama.cc100-ko": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1524839, |
|
"n_tokens": 913553, |
|
"n_chars": 655190 |
|
}, |
|
"chinese_llama2.cc100-ko": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1524839, |
|
"n_tokens": 963427, |
|
"n_chars": 655190 |
|
}, |
|
"code_davinci_002.cc100-ko": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1308993, |
|
"n_chars": 655190 |
|
}, |
|
"crystal_coder.cc100-ko": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1524839, |
|
"n_tokens": 954428, |
|
"n_chars": 655190 |
|
}, |
|
"dbrx_instruct.cc100-ko": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1524839, |
|
"n_tokens": 652277, |
|
"n_chars": 655190 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-ko": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1454805, |
|
"n_chars": 655190 |
|
}, |
|
"deepseek_llm_7b_base.cc100-ko": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1081983, |
|
"n_chars": 655190 |
|
}, |
|
"falcon_180b.cc100-ko": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1330568, |
|
"n_chars": 655190 |
|
}, |
|
"falcon_7b.cc100-ko": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1330568, |
|
"n_chars": 655190 |
|
}, |
|
"fastchat_t5_3b.cc100-ko": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1524839, |
|
"n_tokens": 484953, |
|
"n_chars": 655190 |
|
}, |
|
"flan_t5_base.cc100-ko": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1524839, |
|
"n_tokens": 344457, |
|
"n_chars": 655190 |
|
}, |
|
"gemma_7b.cc100-ko": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 464410, |
|
"n_chars": 655190 |
|
}, |
|
"gpt2.cc100-ko": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1309029, |
|
"n_chars": 655190 |
|
}, |
|
"gpt2_chinese.cc100-ko": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1055974, |
|
"n_chars": 655190 |
|
}, |
|
"gpt_35_turbo.cc100-ko": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1524839, |
|
"n_tokens": 652277, |
|
"n_chars": 655190 |
|
}, |
|
"gpt_4.cc100-ko": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1524839, |
|
"n_tokens": 652277, |
|
"n_chars": 655190 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-ko": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1512832, |
|
"n_chars": 655190 |
|
}, |
|
"gpt_nexo_20b.cc100-ko": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1524839, |
|
"n_tokens": 973288, |
|
"n_chars": 655190 |
|
}, |
|
"grok_1.cc100-ko": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1152005, |
|
"n_chars": 655190 |
|
}, |
|
"internlm2_chat_7b.cc100-ko": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1008524, |
|
"n_chars": 655190 |
|
}, |
|
"internlm2_math_7b.cc100-ko": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1008524, |
|
"n_chars": 655190 |
|
}, |
|
"internlm_chat_7b.cc100-ko": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1524839, |
|
"n_tokens": 839609, |
|
"n_chars": 655190 |
|
}, |
|
"internlm_xcomposer_7b.cc100-ko": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1524839, |
|
"n_tokens": 839609, |
|
"n_chars": 655190 |
|
}, |
|
"jamba_v0_1.cc100-ko": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1524839, |
|
"n_tokens": 715688, |
|
"n_chars": 655190 |
|
}, |
|
"kplug.cc100-ko": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1524839, |
|
"n_tokens": 222771, |
|
"n_chars": 655190 |
|
}, |
|
"llama.cc100-ko": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 964428, |
|
"n_chars": 655190 |
|
}, |
|
"llama2.cc100-ko": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1524839, |
|
"n_tokens": 964428, |
|
"n_chars": 655190 |
|
}, |
|
"llama3.cc100-ko": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1524839, |
|
"n_tokens": 412595, |
|
"n_chars": 655190 |
|
}, |
|
"llama_3_chinese_8b.cc100-ko": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1524839, |
|
"n_tokens": 422595, |
|
"n_chars": 655190 |
|
}, |
|
"mistral_7b.cc100-ko": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 728766, |
|
"n_chars": 655190 |
|
}, |
|
"mixtral_8_7b.cc100-ko": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 728766, |
|
"n_chars": 655190 |
|
}, |
|
"mobilebert_uncased.cc100-ko": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1524839, |
|
"n_tokens": 904756, |
|
"n_chars": 655190 |
|
}, |
|
"moss.cc100-ko": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1305249, |
|
"n_chars": 655190 |
|
}, |
|
"mt5_large.cc100-ko": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1524839, |
|
"n_tokens": 434586, |
|
"n_chars": 655190 |
|
}, |
|
"olmo_7b.cc100-ko": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1524839, |
|
"n_tokens": 973288, |
|
"n_chars": 655190 |
|
}, |
|
"orion_14b_chat.cc100-ko": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1524839, |
|
"n_tokens": 351149, |
|
"n_chars": 655190 |
|
}, |
|
"phi_1.cc100-ko": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1308988, |
|
"n_chars": 655190 |
|
}, |
|
"phi_2.cc100-ko": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1308988, |
|
"n_chars": 655190 |
|
}, |
|
"phi_3_mini.cc100-ko": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1524839, |
|
"n_tokens": 964428, |
|
"n_chars": 655190 |
|
}, |
|
"pko_t5_large.cc100-ko": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1524839, |
|
"n_tokens": 471643, |
|
"n_chars": 655190 |
|
}, |
|
"prompt_clue.cc100-ko": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1524839, |
|
"n_tokens": 354411, |
|
"n_chars": 655190 |
|
}, |
|
"qwen1_5_14b_chat.cc100-ko": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1524839, |
|
"n_tokens": 457492, |
|
"n_chars": 655190 |
|
}, |
|
"qwen_1_8b_chat.cc100-ko": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1524839, |
|
"n_tokens": 457492, |
|
"n_chars": 655190 |
|
}, |
|
"qwen_72b_chat.cc100-ko": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1524839, |
|
"n_tokens": 457492, |
|
"n_chars": 655190 |
|
}, |
|
"qwen_7b_chat.cc100-ko": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1524839, |
|
"n_tokens": 457492, |
|
"n_chars": 655190 |
|
}, |
|
"roberta_chinese_clue.cc100-ko": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1524839, |
|
"n_tokens": 226812, |
|
"n_chars": 655190 |
|
}, |
|
"skywork_13b_base.cc100-ko": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1524839, |
|
"n_tokens": 962744, |
|
"n_chars": 655190 |
|
}, |
|
"skywork_13b_math.cc100-ko": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1524839, |
|
"n_tokens": 962744, |
|
"n_chars": 655190 |
|
}, |
|
"solar_10_7b.cc100-ko": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 728766, |
|
"n_chars": 655190 |
|
}, |
|
"starchat_alpha.cc100-ko": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1524839, |
|
"n_tokens": 580873, |
|
"n_chars": 655190 |
|
}, |
|
"switch_c_2048.cc100-ko": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1524839, |
|
"n_tokens": 344457, |
|
"n_chars": 655190 |
|
}, |
|
"t5_base.cc100-ko": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1524839, |
|
"n_tokens": 344457, |
|
"n_chars": 655190 |
|
}, |
|
"t5_large.cc100-ko": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1524839, |
|
"n_tokens": 344457, |
|
"n_chars": 655190 |
|
}, |
|
"t5_small.cc100-ko": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1524839, |
|
"n_tokens": 344457, |
|
"n_chars": 655190 |
|
}, |
|
"text_davinci_003.cc100-ko": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1308993, |
|
"n_chars": 655190 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-ko": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1524839, |
|
"n_tokens": 793053, |
|
"n_chars": 655190 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-ko": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1524839, |
|
"n_tokens": 484082, |
|
"n_chars": 655190 |
|
}, |
|
"wizardcoder_15b_v1.cc100-ko": { |
|
"vocab_size": 49153, |
|
"n_bytes": 1524839, |
|
"n_tokens": 580873, |
|
"n_chars": 655190 |
|
}, |
|
"wizardcoder_python_7b_v1.cc100-ko": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1524839, |
|
"n_tokens": 964428, |
|
"n_chars": 655190 |
|
}, |
|
"wizardlm_7b_v1.cc100-ko": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1524839, |
|
"n_tokens": 964428, |
|
"n_chars": 655190 |
|
}, |
|
"wizardmath_70b_v1.cc100-ko": { |
|
"vocab_size": 32002, |
|
"n_bytes": 1524839, |
|
"n_tokens": 964428, |
|
"n_chars": 655190 |
|
}, |
|
"xlm_roberta.cc100-ko": { |
|
"vocab_size": 250002, |
|
"n_bytes": 1524839, |
|
"n_tokens": 374571, |
|
"n_chars": 655190 |
|
}, |
|
"yi_34b.cc100-ko": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1203134, |
|
"n_chars": 655190 |
|
}, |
|
"yi_6b.cc100-ko": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1203134, |
|
"n_chars": 655190 |
|
}, |
|
"yi_vl34b.cc100-ko": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 1210021, |
|
"n_chars": 655190 |
|
}, |
|
"zephyr_7b_beta.cc100-ko": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1524839, |
|
"n_tokens": 728766, |
|
"n_chars": 655190 |
|
}, |
|
"llama_3_chinese_8b.cc100-zh-Hans": { |
|
"vocab_size": 128256, |
|
"n_bytes": 2633047, |
|
"n_tokens": 757405, |
|
"n_chars": 927311 |
|
}, |
|
"dutch_llama_tokenizer.cc100-en": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 291975, |
|
"n_chars": 1121360 |
|
}, |
|
"gronlp-gpt2-small-dutch.cc100-en": { |
|
"vocab_size": 40000, |
|
"n_bytes": 1124813, |
|
"n_tokens": 361710, |
|
"n_chars": 1121360 |
|
}, |
|
"yhavinga-gpt2-medium-dutch.cc100-en": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1124813, |
|
"n_tokens": 361847, |
|
"n_chars": 1121360 |
|
}, |
|
"yhavinga-ul2-large-en-nl.cc100-en": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1124813, |
|
"n_tokens": 297641, |
|
"n_chars": 1121360 |
|
}, |
|
"dutch_llama_tokenizer.cc100-zh-Hans": { |
|
"vocab_size": 32000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 2621293, |
|
"n_chars": 927311 |
|
}, |
|
"gronlp-gpt2-small-dutch.cc100-zh-Hans": { |
|
"vocab_size": 40000, |
|
"n_bytes": 2633047, |
|
"n_tokens": 1350320, |
|
"n_chars": 927311 |
|
}, |
|
"yhavinga-gpt2-medium-dutch.cc100-zh-Hans": { |
|
"vocab_size": 50257, |
|
"n_bytes": 2633047, |
|
"n_tokens": 2600872, |
|
"n_chars": 927311 |
|
}, |
|
"yhavinga-ul2-large-en-nl.cc100-zh-Hans": { |
|
"vocab_size": 32128, |
|
"n_bytes": 2633047, |
|
"n_tokens": 2519719, |
|
"n_chars": 927311 |
|
}, |
|
"aya_101.cc100-nl": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1513030, |
|
"n_tokens": 423616, |
|
"n_chars": 1508067 |
|
}, |
|
"baichuan.cc100-nl": { |
|
"vocab_size": 64000, |
|
"n_bytes": 1513030, |
|
"n_tokens": 574927, |
|
"n_chars": 1508067 |
|
}, |
|
"baichuan2.cc100-nl": { |
|
"vocab_size": 125696, |
|
"n_bytes": 1513030, |
|
"n_tokens": 540387, |
|
"n_chars": 1508067 |
|
}, |
|
"bert_base_cased.cc100-nl": { |
|
"vocab_size": 28996, |
|
"n_bytes": 1513030, |
|
"n_tokens": 630793, |
|
"n_chars": 1508067 |
|
}, |
|
"bert_base_chinese.cc100-nl": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1513030, |
|
"n_tokens": 626052, |
|
"n_chars": 1508067 |
|
}, |
|
"bert_base_uncased.cc100-nl": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1513030, |
|
"n_tokens": 574651, |
|
"n_chars": 1508067 |
|
}, |
|
"bloom.cc100-nl": { |
|
"vocab_size": 250680, |
|
"n_bytes": 1513030, |
|
"n_tokens": 488924, |
|
"n_chars": 1508067 |
|
}, |
|
"byt5_small.cc100-nl": { |
|
"vocab_size": 384, |
|
"n_bytes": 1513030, |
|
"n_tokens": 1523030, |
|
"n_chars": 1508067 |
|
}, |
|
"character_glm_6b.cc100-nl": { |
|
"vocab_size": 64789, |
|
"n_bytes": 1513030, |
|
"n_tokens": 559014, |
|
"n_chars": 1508067 |
|
}, |
|
"chatglm2_6b.cc100-nl": { |
|
"vocab_size": 64787, |
|
"n_bytes": 1513030, |
|
"n_tokens": 559017, |
|
"n_chars": 1508067 |
|
}, |
|
"chatglm3_6b.cc100-nl": { |
|
"vocab_size": 64796, |
|
"n_bytes": 1513030, |
|
"n_tokens": 559014, |
|
"n_chars": 1508067 |
|
}, |
|
"chatglm_6b.cc100-nl": { |
|
"vocab_size": 150344, |
|
"n_bytes": 1513030, |
|
"n_tokens": 533174, |
|
"n_chars": 1508067 |
|
}, |
|
"chatyuan_large_v2.cc100-nl": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1513030, |
|
"n_tokens": 837963, |
|
"n_chars": 1508067 |
|
}, |
|
"chinese_llama.cc100-nl": { |
|
"vocab_size": 49953, |
|
"n_bytes": 1513030, |
|
"n_tokens": 488766, |
|
"n_chars": 1508067 |
|
}, |
|
"chinese_llama2.cc100-nl": { |
|
"vocab_size": 55296, |
|
"n_bytes": 1513030, |
|
"n_tokens": 495966, |
|
"n_chars": 1508067 |
|
}, |
|
"code_davinci_002.cc100-nl": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1513030, |
|
"n_tokens": 559119, |
|
"n_chars": 1508067 |
|
}, |
|
"crystal_coder.cc100-nl": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1513030, |
|
"n_tokens": 485966, |
|
"n_chars": 1508067 |
|
}, |
|
"dbrx_instruct.cc100-nl": { |
|
"vocab_size": 100280, |
|
"n_bytes": 1513030, |
|
"n_tokens": 449343, |
|
"n_chars": 1508067 |
|
}, |
|
"deepseek_coder_33b_instruct.cc100-nl": { |
|
"vocab_size": 32022, |
|
"n_bytes": 1513030, |
|
"n_tokens": 603966, |
|
"n_chars": 1508067 |
|
}, |
|
"deepseek_llm_7b_base.cc100-nl": { |
|
"vocab_size": 100015, |
|
"n_bytes": 1513030, |
|
"n_tokens": 536746, |
|
"n_chars": 1508067 |
|
}, |
|
"dutch_llama_tokenizer.cc100-nl": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1513030, |
|
"n_tokens": 366481, |
|
"n_chars": 1508067 |
|
}, |
|
"falcon_180b.cc100-nl": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1513030, |
|
"n_tokens": 438112, |
|
"n_chars": 1508067 |
|
}, |
|
"falcon_7b.cc100-nl": { |
|
"vocab_size": 65024, |
|
"n_bytes": 1513030, |
|
"n_tokens": 438112, |
|
"n_chars": 1508067 |
|
}, |
|
"fastchat_t5_3b.cc100-nl": { |
|
"vocab_size": 32110, |
|
"n_bytes": 1513030, |
|
"n_tokens": 933018, |
|
"n_chars": 1508067 |
|
}, |
|
"flan_t5_base.cc100-nl": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1513030, |
|
"n_tokens": 696337, |
|
"n_chars": 1508067 |
|
}, |
|
"gemma_7b.cc100-nl": { |
|
"vocab_size": 256000, |
|
"n_bytes": 1513030, |
|
"n_tokens": 387522, |
|
"n_chars": 1508067 |
|
}, |
|
"gpt2.cc100-nl": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1513030, |
|
"n_tokens": 559119, |
|
"n_chars": 1508067 |
|
}, |
|
"gpt2_chinese.cc100-nl": { |
|
"vocab_size": 21128, |
|
"n_bytes": 1513030, |
|
"n_tokens": 676651, |
|
"n_chars": 1508067 |
|
}, |
|
"gpt_35_turbo.cc100-nl": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1513030, |
|
"n_tokens": 449343, |
|
"n_chars": 1508067 |
|
}, |
|
"gpt_4.cc100-nl": { |
|
"vocab_size": 100277, |
|
"n_bytes": 1513030, |
|
"n_tokens": 449343, |
|
"n_chars": 1508067 |
|
}, |
|
"gpt_neox_japanese_2_7b.cc100-nl": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1513030, |
|
"n_tokens": 1509448, |
|
"n_chars": 1508067 |
|
}, |
|
"gpt_nexo_20b.cc100-nl": { |
|
"vocab_size": 50277, |
|
"n_bytes": 1513030, |
|
"n_tokens": 497728, |
|
"n_chars": 1508067 |
|
}, |
|
"grok_1.cc100-nl": { |
|
"vocab_size": 131072, |
|
"n_bytes": 1513030, |
|
"n_tokens": 457359, |
|
"n_chars": 1508067 |
|
}, |
|
"gronlp-gpt2-small-dutch.cc100-nl": { |
|
"vocab_size": 40000, |
|
"n_bytes": 1513030, |
|
"n_tokens": 332376, |
|
"n_chars": 1508067 |
|
}, |
|
"internlm2_chat_7b.cc100-nl": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1513030, |
|
"n_tokens": 494821, |
|
"n_chars": 1508067 |
|
}, |
|
"internlm2_math_7b.cc100-nl": { |
|
"vocab_size": 92544, |
|
"n_bytes": 1513030, |
|
"n_tokens": 494821, |
|
"n_chars": 1508067 |
|
}, |
|
"internlm_chat_7b.cc100-nl": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1513030, |
|
"n_tokens": 494108, |
|
"n_chars": 1508067 |
|
}, |
|
"internlm_xcomposer_7b.cc100-nl": { |
|
"vocab_size": 103168, |
|
"n_bytes": 1513030, |
|
"n_tokens": 494108, |
|
"n_chars": 1508067 |
|
}, |
|
"jamba_v0_1.cc100-nl": { |
|
"vocab_size": 65536, |
|
"n_bytes": 1513030, |
|
"n_tokens": 442176, |
|
"n_chars": 1508067 |
|
}, |
|
"kplug.cc100-nl": { |
|
"vocab_size": 10261, |
|
"n_bytes": 1513030, |
|
"n_tokens": 678131, |
|
"n_chars": 1508067 |
|
}, |
|
"llama.cc100-nl": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1513030, |
|
"n_tokens": 495966, |
|
"n_chars": 1508067 |
|
}, |
|
"llama2.cc100-nl": { |
|
"vocab_size": 32001, |
|
"n_bytes": 1513030, |
|
"n_tokens": 495966, |
|
"n_chars": 1508067 |
|
}, |
|
"llama3.cc100-nl": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1513030, |
|
"n_tokens": 448173, |
|
"n_chars": 1508067 |
|
}, |
|
"llama_3_chinese_8b.cc100-nl": { |
|
"vocab_size": 128256, |
|
"n_bytes": 1513030, |
|
"n_tokens": 458173, |
|
"n_chars": 1508067 |
|
}, |
|
"mistral_7b.cc100-nl": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1513030, |
|
"n_tokens": 515884, |
|
"n_chars": 1508067 |
|
}, |
|
"mixtral_8_7b.cc100-nl": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1513030, |
|
"n_tokens": 515884, |
|
"n_chars": 1508067 |
|
}, |
|
"mobilebert_uncased.cc100-nl": { |
|
"vocab_size": 30522, |
|
"n_bytes": 1513030, |
|
"n_tokens": 574651, |
|
"n_chars": 1508067 |
|
}, |
|
"moss.cc100-nl": { |
|
"vocab_size": 106072, |
|
"n_bytes": 1513030, |
|
"n_tokens": 557984, |
|
"n_chars": 1508067 |
|
}, |
|
"mt5_large.cc100-nl": { |
|
"vocab_size": 250100, |
|
"n_bytes": 1513030, |
|
"n_tokens": 423616, |
|
"n_chars": 1508067 |
|
}, |
|
"dutch_llama_tokenizer.cc100-es": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 610314, |
|
"n_chars": 1630297 |
|
}, |
|
"gronlp-gpt2-small-dutch.cc100-es": { |
|
"vocab_size": 40000, |
|
"n_bytes": 1664455, |
|
"n_tokens": 608465, |
|
"n_chars": 1630297 |
|
}, |
|
"yhavinga-gpt2-medium-dutch.cc100-es": { |
|
"vocab_size": 50257, |
|
"n_bytes": 1664455, |
|
"n_tokens": 605886, |
|
"n_chars": 1630297 |
|
}, |
|
"yhavinga-ul2-large-en-nl.cc100-es": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1664455, |
|
"n_tokens": 686255, |
|
"n_chars": 1630297 |
|
}, |
|
"olmo_7b.cc100-nl": { |
|
"vocab_size": 50280, |
|
"n_bytes": 1513030, |
|
"n_tokens": 497728, |
|
"n_chars": 1508067 |
|
}, |
|
"orion_14b_chat.cc100-nl": { |
|
"vocab_size": 84608, |
|
"n_bytes": 1513030, |
|
"n_tokens": 599429, |
|
"n_chars": 1508067 |
|
}, |
|
"phi_1.cc100-nl": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1513030, |
|
"n_tokens": 559124, |
|
"n_chars": 1508067 |
|
}, |
|
"phi_2.cc100-nl": { |
|
"vocab_size": 50295, |
|
"n_bytes": 1513030, |
|
"n_tokens": 559124, |
|
"n_chars": 1508067 |
|
}, |
|
"phi_3_mini.cc100-nl": { |
|
"vocab_size": 32011, |
|
"n_bytes": 1513030, |
|
"n_tokens": 495966, |
|
"n_chars": 1508067 |
|
}, |
|
"pko_t5_large.cc100-nl": { |
|
"vocab_size": 50358, |
|
"n_bytes": 1513030, |
|
"n_tokens": 1017288, |
|
"n_chars": 1508067 |
|
}, |
|
"prompt_clue.cc100-nl": { |
|
"vocab_size": 32128, |
|
"n_bytes": 1513030, |
|
"n_tokens": 837963, |
|
"n_chars": 1508067 |
|
}, |
|
"qwen1_5_14b_chat.cc100-nl": { |
|
"vocab_size": 151646, |
|
"n_bytes": 1513030, |
|
"n_tokens": 453342, |
|
"n_chars": 1508067 |
|
}, |
|
"qwen_1_8b_chat.cc100-nl": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1513030, |
|
"n_tokens": 453342, |
|
"n_chars": 1508067 |
|
}, |
|
"qwen_72b_chat.cc100-nl": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1513030, |
|
"n_tokens": 453342, |
|
"n_chars": 1508067 |
|
}, |
|
"qwen_7b_chat.cc100-nl": { |
|
"vocab_size": 151851, |
|
"n_bytes": 1513030, |
|
"n_tokens": 453342, |
|
"n_chars": 1508067 |
|
}, |
|
"roberta_chinese_clue.cc100-nl": { |
|
"vocab_size": 8021, |
|
"n_bytes": 1513030, |
|
"n_tokens": 821246, |
|
"n_chars": 1508067 |
|
}, |
|
"skywork_13b_base.cc100-nl": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1513030, |
|
"n_tokens": 495958, |
|
"n_chars": 1508067 |
|
}, |
|
"skywork_13b_math.cc100-nl": { |
|
"vocab_size": 65519, |
|
"n_bytes": 1513030, |
|
"n_tokens": 495958, |
|
"n_chars": 1508067 |
|
}, |
|
"solar_10_7b.cc100-nl": { |
|
"vocab_size": 32000, |
|
"n_bytes": 1513030, |
|
"n_tokens": 515884, |
|
"n_chars": 1508067 |
|
}, |
|
"starchat_alpha.cc100-nl": { |
|
"vocab_size": 49156, |
|
"n_bytes": 1513030, |
|
"n_tokens": 532871, |
|
"n_chars": 1508067 |
|
}, |
|
"switch_c_2048.cc100-nl": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1513030, |
|
"n_tokens": 696333, |
|
"n_chars": 1508067 |
|
}, |
|
"t5_base.cc100-nl": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1513030, |
|
"n_tokens": 696333, |
|
"n_chars": 1508067 |
|
}, |
|
"t5_large.cc100-nl": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1513030, |
|
"n_tokens": 696333, |
|
"n_chars": 1508067 |
|
}, |
|
"t5_small.cc100-nl": { |
|
"vocab_size": 32100, |
|
"n_bytes": 1513030, |
|
"n_tokens": 696333, |
|
"n_chars": 1508067 |
|
}, |
|
"text_davinci_003.cc100-nl": { |
|
"vocab_size": 50281, |
|
"n_bytes": 1513030, |
|
"n_tokens": 559119, |
|
"n_chars": 1508067 |
|
}, |
|
"tigerbot_13b_chat_v2.cc100-nl": { |
|
"vocab_size": 60515, |
|
"n_bytes": 1513030, |
|
"n_tokens": 486271, |
|
"n_chars": 1508067 |
|
}, |
|
"tigerbot_70b_chat_v4_4k.cc100-nl": { |
|
"vocab_size": 65110, |
|
"n_bytes": 1513030, |
|
"n_tokens": 486472, |
|
"n_chars": 1508067 |
|
} |
|
} |