diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..c4fe3f0c0881b1222321cd289c5002c09e3a3a82 --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,70 @@ +{ + "model_type": "rwkv5", + "quantization": "q4f16_1", + "model_config": { + "hidden_size": 4096, + "intermediate_size": 14336, + "num_hidden_layers": 32, + "vocab_size": 65536, + "model_version": "5_2", + "tensor_parallel_shards": 1, + "rescale_every": 6, + "head_size": 64, + "layer_norm_epsilon": 1e-05, + "context_window_size": -1, + "prefill_chunk_size": 4096, + "num_heads": 64, + "max_batch_size": 80 + }, + "vocab_size": 65536, + "context_window_size": -1, + "sliding_window_size": -1, + "prefill_chunk_size": 4096, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0.7, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 0.1, + "conv_template": { + "name": "rwkv_world", + "system_template": "User: hi\n\nAssistant: {system_message}", + "system_message": "Hi. I am your assistant and I will provide expert full response in full details. Please feel free to ask any question and I will always answer it.", + "add_role_after_system_message": true, + "roles": { + "user": "User", + "assistant": "Assistant" + }, + "role_templates": { + "user": "{user_message}", + "assistant": "{assistant_message}", + "tool": "{tool_message}" + }, + "messages": [], + "seps": [ + "\n\n" + ], + "role_content_sep": ": ", + "role_empty_sep": ": ", + "stop_str": [ + "\n\n" + ], + "stop_token_ids": [ + 0 + ], + "function_string": "", + "use_function_calling": false + }, + "pad_token_id": 0, + "bos_token_id": 0, + "eos_token_id": 0, + "tokenizer_files": [ + "tokenizer_config.json" + ], + "token_table_postproc_method": "byte_fallback", + "version": "0.1.0" +} \ No newline at end of file diff --git a/ndarray-cache-b16.json b/ndarray-cache-b16.json new file mode 100644 index 0000000000000000000000000000000000000000..b3d2037363b67bca786adcf6abd4628a94d11c74 --- /dev/null +++ b/ndarray-cache-b16.json @@ -0,0 +1,11709 @@ +{ + "metadata": { + "ParamSize": 968, + "ParamBytes": 4232609792.0, + "BitsPerParam": 4.503947784206684 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.embeddings.q_weight", + "shape": [ + 65536, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "046e30a4a3cfd1f0f164a97940f9e540" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 26312704, + "records": [ + { + "name": "model.embeddings.q_scale", + "shape": [ + 65536, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + }, + { + "name": "model.blocks.0.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16777216 + }, + { + "name": "model.blocks.0.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16785408 + }, + { + "name": "model.blocks.0.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "model.blocks.0.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16801792 + }, + { + "name": "model.blocks.0.pre_ln.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16809984 + }, + { + "name": "model.blocks.0.pre_ln.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16818176 + }, + { + "name": "model.blocks.0.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16826368 + }, + { + "name": "model.blocks.0.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16834560 + }, + { + "name": "model.blocks.0.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16842752 + }, + { + "name": "model.blocks.0.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16850944 + }, + { + "name": "model.blocks.0.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16859136 + }, + { + "name": "model.blocks.0.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16867328 + }, + { + "name": "model.blocks.0.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16875520 + }, + { + "name": "model.blocks.0.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 25264128 + } + ], + "md5sum": "0295524fe14816683e43aeed0b74e6f5" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.blocks.0.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.0.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.0.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.0.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.0.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.0.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 27262976 + } + ], + "md5sum": "24520cbea6e3b902b4b9b1c4544a1cde" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.0.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "866cec5e28a0a4c633ed20d40a50965f" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.0.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3bea6f61a7605f236f0c1dcef0c6e62c" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 26329088, + "records": [ + { + "name": "model.blocks.0.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.0.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.0.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.0.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9445376 + }, + { + "name": "model.blocks.0.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9453568 + }, + { + "name": "model.blocks.0.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9461760 + }, + { + "name": "model.blocks.0.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9469952 + }, + { + "name": "model.blocks.0.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13139968 + }, + { + "name": "model.blocks.0.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21528576 + }, + { + "name": "model.blocks.0.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.1.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26247168 + }, + { + "name": "model.blocks.1.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26255360 + }, + { + "name": "model.blocks.1.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26263552 + }, + { + "name": "model.blocks.1.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26271744 + }, + { + "name": "model.blocks.1.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26279936 + }, + { + "name": "model.blocks.1.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26288128 + }, + { + "name": "model.blocks.1.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26296320 + }, + { + "name": "model.blocks.1.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26304512 + }, + { + "name": "model.blocks.1.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 26312704 + }, + { + "name": "model.blocks.1.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 26320896 + } + ], + "md5sum": "661dc4de7e50384e69e9306211766bef" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.blocks.1.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.1.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.1.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.1.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.1.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.1.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 27262976 + } + ], + "md5sum": "efc86f517ab68b932cfb79d91b512611" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.1.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b980efdc563407d6c83a75219d76814f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.1.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6c7bcfc161bc84938e904409aecfb72d" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.1.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.1.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.1.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.1.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.1.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.1.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.1.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.1.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.1.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.1.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.1.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "34538b55ee6d6de02ae33ac034ca600f" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.1.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.2.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.2.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.2.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.2.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.2.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.2.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.2.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.2.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.2.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.2.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.2.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.2.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.2.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.2.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.2.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.2.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "2315fa1cbbed365f00d6f4b1796cdf0b" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.2.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "10ffe073eaaf5c92c632081d5398516d" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.2.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "067aea6c57cb395d5a7de73df9584408" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.2.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.2.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.2.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.2.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.2.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.2.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.2.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.2.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.2.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.2.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.2.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "dae027157816803616b9a2661a4cdad0" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.2.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.3.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.3.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.3.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.3.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.3.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.3.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.3.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.3.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.3.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.3.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.3.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.3.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.3.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.3.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.3.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.3.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "dacfc1937e8cfe2853f69d93bbdf79e0" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.3.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "198d005315cea9d6c48808ff1de7530e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.3.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4b92713f7e18be75736692e73f952c3c" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.3.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.3.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.3.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.3.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.3.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.3.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.3.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.3.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.3.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.3.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.3.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "7351fc4707e68baeefa0df7c6c623568" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.3.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.4.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.4.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.4.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.4.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.4.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.4.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.4.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.4.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.4.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.4.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.4.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.4.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.4.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.4.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.4.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.4.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "f54e7159751b86752a84b5f1b17a15a2" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.4.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cb6619f759c798c48d8f305e771e86ad" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.4.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7f54497db79f632a61698b80ad4bffb9" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.4.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.4.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.4.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.4.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.4.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.4.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.4.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.4.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.4.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.4.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.4.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "9b52086970bb041c8918001bfcbf0d49" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.4.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.5.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.5.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.5.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.5.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.5.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.5.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.5.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.5.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.5.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.5.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.5.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.5.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.5.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.5.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.5.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.5.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "9c644d4c89f00b95c4b72a3f3aa83664" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.5.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3d0b583ab74e64d1844babd9939d9b8f" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.5.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "88ff61b9e6999528d91bdfeec6a4229a" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.5.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.5.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.5.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.5.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.5.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.5.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.5.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.5.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.5.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.5.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.5.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "3ac3b218f29991dbed1b4c9b51f22f8f" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.5.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.6.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.6.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.6.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.6.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.6.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.6.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.6.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.6.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.6.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.6.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.6.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.6.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.6.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.6.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.6.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.6.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "dea5a129bd5019f00c655333515108a8" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.6.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1d40f97be2590ed11ec51e9fb630b7ed" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.6.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9314627148ca4905167f1ce61cf8358d" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.6.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.6.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.6.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.6.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.6.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.6.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.6.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.6.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.6.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.6.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.6.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "90679f2e25dc8d33900f586a43f7b9e8" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.6.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.7.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.7.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.7.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.7.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.7.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.7.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.7.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.7.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.7.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.7.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.7.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.7.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.7.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.7.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.7.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.7.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "07ed7b601822a7a71fcda4460e3ccc71" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.7.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "edf9527ad19ab0a6cab605607a247c04" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.7.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "87e357c8978cceb7d327a21d9904c4b8" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.7.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.7.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.7.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.7.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.7.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.7.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.7.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.7.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.7.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.7.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.7.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "f91dc74ad448f1ccb94d5881f4ddd391" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.7.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.8.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.8.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.8.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.8.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.8.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.8.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.8.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.8.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.8.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.8.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.8.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.8.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.8.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.8.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.8.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.8.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "d6b46c0d4a6010838f7f47156045f60d" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.8.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "74c066b92485eb4bc3575a6b55ce177d" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.8.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b2efef932ff3673f5f1bf9a82703ae53" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.8.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.8.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.8.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.8.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.8.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.8.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.8.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.8.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.8.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.8.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.8.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "f2e4fbaafc8b75252519feb17ff0f594" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.8.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.9.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.9.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.9.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.9.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.9.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.9.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.9.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.9.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.9.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.9.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.9.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.9.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.9.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.9.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.9.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.9.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "8416d090ef375f0ecf2e2a946ef3ce7d" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.9.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "650c045cdc4b9121eba74dad0b74b928" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.9.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "45bb2237c29ae1c6d3f6ffe2a0f58f1c" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.9.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.9.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.9.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.9.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.9.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.9.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.9.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.9.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.9.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.9.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.9.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "7a97d793ca53279bcc4b0b9a5eb42d5c" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.9.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.10.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.10.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.10.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.10.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.10.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.10.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.10.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.10.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.10.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.10.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.10.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.10.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.10.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.10.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.10.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.10.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "9f5528dd7425508274f14c30d1baae2f" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.10.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0753a6c451a2bba808743a3fad39ac9c" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.10.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "289cf390d5b78283d60cfbe0408e931c" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.10.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.10.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.10.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.10.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.10.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.10.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.10.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.10.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.10.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.10.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.10.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "55314819fca430d3be78d7189015bca3" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.10.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.11.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.11.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.11.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.11.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.11.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.11.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.11.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.11.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.11.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.11.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.11.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.11.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.11.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.11.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.11.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.11.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "b09d4abf7e0dd0a18f2c676f555e3109" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.11.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "aec2c76707cd09171d2c84187541813e" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.11.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0ad9ca4a5f1fbb76c348d3f82b8b0d3e" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.11.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.11.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.11.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.11.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.11.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.11.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.11.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.11.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.11.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.11.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.11.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "d8b1c3db2a9d81e4e8b601f7389259c6" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.11.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.12.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.12.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.12.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.12.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.12.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.12.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.12.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.12.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.12.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.12.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.12.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.12.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.12.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.12.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.12.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.12.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "7af819acbaac0aa1ff9ea12be38b346d" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.12.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ca9cc3c2806682cb581059d03bad831d" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.12.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8be59f7a46352917352ff352156fb034" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.12.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.12.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.12.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.12.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.12.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.12.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.12.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.12.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.12.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.12.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.12.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "c16d4141de11f9e2bf7e5d07606b2266" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.12.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.13.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.13.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.13.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.13.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.13.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.13.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.13.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.13.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.13.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.13.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.13.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.13.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.13.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.13.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.13.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.13.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "cbe7fa3850baf4b3cae63cbf4ec2ee64" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.13.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b700200a9a04b1d97623f8a41645636b" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.13.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "34bc7a88722a31bc4d035d86198618da" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.13.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.13.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.13.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.13.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.13.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.13.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.13.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.13.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.13.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.13.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.13.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "c8c6f5d7e13b7e50a91ea8cd173f49a6" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.13.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.14.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.14.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.14.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.14.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.14.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.14.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.14.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.14.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.14.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.14.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.14.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.14.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.14.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.14.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.14.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.14.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "4e47d2fe8528a5dbdc973d41f55c3e89" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.14.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "184996fd57d6f79bb6416b71033c8d47" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.14.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a9a602610807c6aa19fd083a4e2be034" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.14.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.14.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.14.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.14.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.14.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.14.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.14.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.14.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.14.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.14.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.14.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "9a719bab5ee0b6b46838ab89bed3e5e0" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.14.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.15.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.15.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.15.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.15.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.15.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.15.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.15.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.15.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.15.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.15.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.15.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.15.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.15.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.15.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.15.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.15.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "ae9c852e935eeade224ceb1dc47d6525" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.15.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9d728fcdba1fd7ca1d3969fced4d481f" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.15.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "61e8c6ec3bdf91dcfe5be32564a09961" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.15.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.15.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.15.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.15.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.15.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.15.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.15.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.15.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.15.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.15.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.15.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "46c24894a22cbf694ef973c5161e17b2" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.15.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.16.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.16.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.16.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.16.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.16.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.16.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.16.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.16.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.16.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.16.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.16.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.16.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.16.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.16.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.16.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.16.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "8010343132404299d2034cc787776626" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.16.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8ae651b6522b9e37efd31776ec1470e9" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.16.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4c2b2c76c2ae918452c459ba0dad9166" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.16.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.16.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.16.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.16.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.16.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.16.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.16.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.16.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.16.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.16.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.16.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "902a6ff6acbee8cb0d608f2b0991ce30" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.16.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.17.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.17.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.17.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.17.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.17.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.17.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.17.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.17.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.17.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.17.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.17.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.17.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.17.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.17.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.17.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.17.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "b0abcb70ab84011b0396efaa799e7729" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.17.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "eed9242afa924e4f855b6c8558fa44fe" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.17.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fa53fd6930fe478d1c7303093d5ac14f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.17.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.17.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.17.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.17.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.17.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.17.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.17.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.17.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.17.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.17.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.17.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "afdb0d5a92c2d71542396ce28e65deb2" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.17.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.18.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.18.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.18.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.18.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.18.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.18.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.18.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.18.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.18.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.18.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.18.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.18.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.18.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.18.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.18.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.18.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "5f84aafe1b29dbaf558c2430977c6180" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.18.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d0004cb8cf869f5d915956582a594fdf" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.18.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e4028f4ef402a15ec87c36a44ed2c9b5" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.18.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.18.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.18.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.18.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.18.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.18.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.18.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.18.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.18.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.18.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.18.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "9bd2595dab92f2557e60cc4229a13128" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.18.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.19.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.19.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.19.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.19.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.19.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.19.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.19.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.19.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.19.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.19.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.19.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.19.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.19.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.19.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.19.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.19.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "02f35718e3cda850a04740502a493e32" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.19.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e70a6728478f0b012b492fedeeaa1ade" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.19.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b339eaa20864cc05c6fa4bbeaf9debab" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.19.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.19.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.19.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.19.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.19.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.19.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.19.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.19.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.19.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.19.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.19.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "f9ddc201398dfc171f88d8203a68c60e" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.19.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.20.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.20.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.20.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.20.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.20.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.20.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.20.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.20.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.20.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.20.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.20.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.20.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.20.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.20.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.20.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.20.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "92aa8c5dbada8f75c7e6788af36a69bf" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.20.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "22f898049d0d8cf37658854fcde5e8ac" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.20.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "50e1080b1c866efd927ac80cacb7561b" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.20.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.20.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.20.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.20.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.20.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.20.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.20.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.20.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.20.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.20.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.20.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "4b2efd10838f1df096b7c0c24db5ab57" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.20.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.21.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.21.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.21.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.21.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.21.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.21.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.21.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.21.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.21.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.21.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.21.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.21.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.21.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.21.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.21.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.21.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "b6aa7d975cb9d3353fc18d498ec61d47" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.21.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ccb2fa28ddc235310835d0318f42857d" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.21.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3732364bf6482d98280a446c4dddae6b" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.21.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.21.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.21.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.21.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.21.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.21.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.21.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.21.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.21.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.21.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.21.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "72e41c09550151679abeb5a0f9a1922c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.21.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.22.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.22.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.22.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.22.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.22.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.22.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.22.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.22.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.22.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.22.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.22.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.22.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.22.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.22.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.22.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.22.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "74f7c817c52838821ee4de8ebe237197" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.22.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "61966489f2e053a582553f15c6b3047e" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.22.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3b75f4a022339becb0c62b49b459726b" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.22.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.22.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.22.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.22.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.22.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.22.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.22.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.22.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.22.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.22.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.22.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "72dae72473fbf42e1254c6f1cd83b4da" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.22.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.23.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.23.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.23.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.23.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.23.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.23.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.23.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.23.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.23.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.23.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.23.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.23.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.23.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.23.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.23.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.23.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "5a391c3304b3e770df9001a6920f39bc" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.23.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "eae307f40052be1bae2aac1652ac5afd" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.23.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c7cac6e2b8b9d673ec755831c8886f02" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.23.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.23.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.23.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.23.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.23.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.23.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.23.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.23.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.23.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.23.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.23.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "e71060a89c4b7ad8652f8d563f5ed3ef" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.23.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.24.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.24.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.24.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.24.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.24.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.24.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.24.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.24.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.24.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.24.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.24.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.24.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.24.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.24.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.24.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.24.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "293156db377fa0898e5edb08378aafda" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.24.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e12372bb194045d945f1878e0afaffa0" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.24.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5bf2793268a541a647dad57404cc5b17" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.24.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.24.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.24.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.24.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.24.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.24.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.24.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.24.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.24.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.24.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.24.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "18e9aa14be867cc0742e65962af884af" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.24.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.25.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.25.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.25.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.25.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.25.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.25.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.25.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.25.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.25.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.25.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.25.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.25.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.25.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.25.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.25.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.25.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "33c0bb0001b3b7bd5c21eb6c1dcd5026" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.25.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "119bc976a5301be7489b85bceb37c0cc" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.25.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "92f2a6f658a9c78f879485b0332d2e18" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.25.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.25.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.25.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.25.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.25.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.25.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.25.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.25.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.25.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.25.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.25.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "d34e73086bbb49771b5b141e0ea1a79e" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.25.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.26.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.26.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.26.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.26.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.26.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.26.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.26.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.26.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.26.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.26.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.26.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.26.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.26.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.26.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.26.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.26.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "64e9aa449c084f487de04108e7ab8743" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.26.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ed09d3de92814b90ade5b8c19e4fb4dc" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.26.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c9e9aa84cf6b2ae962453b3a43934cd8" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.26.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.26.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.26.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.26.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.26.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.26.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.26.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.26.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.26.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.26.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.26.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "4297288819d8924de502621d698a744b" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.26.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.27.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.27.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.27.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.27.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.27.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.27.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.27.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.27.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.27.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.27.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.27.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.27.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.27.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.27.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.27.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.27.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "46b7eb70e9f8b7c51eccc60c5e892987" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.27.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2621bca49d9d8b258a8187abc37db1fd" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.27.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "87a4734fc6990803571b6477b4a0e387" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.27.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.27.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.27.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.27.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.27.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.27.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.27.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.27.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.27.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.27.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.27.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "0652d3630b2d7f27de53286f92ad53de" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.27.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.28.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.28.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.28.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.28.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.28.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.28.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.28.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.28.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.28.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.28.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.28.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.28.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.28.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.28.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.28.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.28.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "f1156b137dec77d4e6523e84010b7494" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.28.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a0b22b5d16b013c1f8132fa72962a0b6" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.28.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d536e24d1b00996a947dd16e8a94d6fc" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.28.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.28.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.28.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.28.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.28.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.28.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.28.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.28.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.28.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.28.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.28.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "7bbfc5dbe53b79a1b784cf99969e683c" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.28.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.29.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.29.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.29.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.29.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.29.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.29.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.29.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.29.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.29.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.29.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.29.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.29.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.29.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.29.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.29.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.29.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "9c5e4f54200a33e3ab10f15f669e4a58" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.29.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "90df27561a1ff4fb2450337feef63e80" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.29.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f954b8ecfe4d19d697db90e1d4a8817d" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.29.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.29.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.29.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.29.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.29.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.29.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.29.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.29.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.29.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.29.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.29.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "01ae69a9f9c61843f9901e4de0b0ef7b" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.29.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.30.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.30.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.30.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.30.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.30.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.30.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.30.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.30.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.30.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.30.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.30.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.30.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.30.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.30.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.30.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.30.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "c275ba41fb708efeed2a73a50609aafe" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.30.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5546cc902315c367aa04a8fd264fe273" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.30.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "32cb3e258005840a51652ee2b762b420" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.30.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.30.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.30.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.30.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.30.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.30.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.30.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.30.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.30.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.30.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.30.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "2cc258f7fff62ef3e0c25532d24b3cf9" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.30.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.31.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.31.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.31.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.31.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.31.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.31.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.31.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.31.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.31.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.31.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.31.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.31.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.31.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.31.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.31.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.31.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "16e257d9ef167d48dd55a000506d5e4f" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.31.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "674d28fdeb269d40d9606467e6ba6299" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.31.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ee8460b09579b11467c92510fe5ab04f" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.31.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.31.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.31.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.31.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.31.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.31.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.31.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.31.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.31.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.31.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.31.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "dc190cc79413353873caa9190a097b22" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "head.q_weight", + "shape": [ + 65536, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2ae12c299d81bd2ccebca8a2ec03e035" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 20463616, + "records": [ + { + "name": "model.blocks.31.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.ln_out.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.ln_out.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "head.q_scale", + "shape": [ + 65536, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 3686400 + } + ], + "md5sum": "0cb368a38d525edab3f07917edb80aff" + } + ] +} \ No newline at end of file diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..a7033f9a8992778dea8443b792f6c20549d3f35e --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,11709 @@ +{ + "metadata": { + "ParamSize": 968, + "ParamBytes": 4232609792.0, + "BitsPerParam": 4.503947784206684 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.embeddings.q_weight", + "shape": [ + 65536, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "046e30a4a3cfd1f0f164a97940f9e540" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 26312704, + "records": [ + { + "name": "model.embeddings.q_scale", + "shape": [ + 65536, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + }, + { + "name": "model.blocks.0.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16777216 + }, + { + "name": "model.blocks.0.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16785408 + }, + { + "name": "model.blocks.0.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "model.blocks.0.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16801792 + }, + { + "name": "model.blocks.0.pre_ln.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16809984 + }, + { + "name": "model.blocks.0.pre_ln.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16818176 + }, + { + "name": "model.blocks.0.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16826368 + }, + { + "name": "model.blocks.0.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16834560 + }, + { + "name": "model.blocks.0.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16842752 + }, + { + "name": "model.blocks.0.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16850944 + }, + { + "name": "model.blocks.0.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16859136 + }, + { + "name": "model.blocks.0.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16867328 + }, + { + "name": "model.blocks.0.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16875520 + }, + { + "name": "model.blocks.0.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 25264128 + } + ], + "md5sum": "0295524fe14816683e43aeed0b74e6f5" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.blocks.0.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.0.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.0.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.0.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.0.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.0.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 27262976 + } + ], + "md5sum": "24520cbea6e3b902b4b9b1c4544a1cde" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.0.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "866cec5e28a0a4c633ed20d40a50965f" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.0.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3bea6f61a7605f236f0c1dcef0c6e62c" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 26329088, + "records": [ + { + "name": "model.blocks.0.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.0.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.0.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.0.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9445376 + }, + { + "name": "model.blocks.0.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9453568 + }, + { + "name": "model.blocks.0.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9461760 + }, + { + "name": "model.blocks.0.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9469952 + }, + { + "name": "model.blocks.0.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13139968 + }, + { + "name": "model.blocks.0.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21528576 + }, + { + "name": "model.blocks.0.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.1.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26247168 + }, + { + "name": "model.blocks.1.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26255360 + }, + { + "name": "model.blocks.1.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26263552 + }, + { + "name": "model.blocks.1.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26271744 + }, + { + "name": "model.blocks.1.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26279936 + }, + { + "name": "model.blocks.1.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26288128 + }, + { + "name": "model.blocks.1.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26296320 + }, + { + "name": "model.blocks.1.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26304512 + }, + { + "name": "model.blocks.1.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26312704 + }, + { + "name": "model.blocks.1.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26320896 + } + ], + "md5sum": "661dc4de7e50384e69e9306211766bef" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 28311552, + "records": [ + { + "name": "model.blocks.1.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.1.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.1.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.1.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.1.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.1.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 27262976 + } + ], + "md5sum": "efc86f517ab68b932cfb79d91b512611" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.1.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b980efdc563407d6c83a75219d76814f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.1.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6c7bcfc161bc84938e904409aecfb72d" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.1.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.1.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.1.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.1.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.1.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.1.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.1.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.1.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.1.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.1.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.1.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "34538b55ee6d6de02ae33ac034ca600f" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.1.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.2.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.2.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.2.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.2.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.2.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.2.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.2.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.2.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.2.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.2.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.2.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.2.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.2.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.2.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.2.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.2.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "2315fa1cbbed365f00d6f4b1796cdf0b" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.2.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "10ffe073eaaf5c92c632081d5398516d" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.2.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "067aea6c57cb395d5a7de73df9584408" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.2.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.2.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.2.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.2.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.2.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.2.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.2.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.2.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.2.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.2.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.2.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "dae027157816803616b9a2661a4cdad0" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.2.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.3.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.3.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.3.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.3.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.3.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.3.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.3.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.3.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.3.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.3.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.3.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.3.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.3.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.3.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.3.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.3.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "dacfc1937e8cfe2853f69d93bbdf79e0" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.3.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "198d005315cea9d6c48808ff1de7530e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.3.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4b92713f7e18be75736692e73f952c3c" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.3.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.3.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.3.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.3.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.3.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.3.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.3.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.3.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.3.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.3.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.3.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "7351fc4707e68baeefa0df7c6c623568" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.3.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.4.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.4.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.4.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.4.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.4.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.4.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.4.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.4.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.4.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.4.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.4.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.4.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.4.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.4.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.4.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.4.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "f54e7159751b86752a84b5f1b17a15a2" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.4.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cb6619f759c798c48d8f305e771e86ad" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.4.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7f54497db79f632a61698b80ad4bffb9" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.4.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.4.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.4.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.4.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.4.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.4.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.4.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.4.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.4.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.4.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.4.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "9b52086970bb041c8918001bfcbf0d49" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.4.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.5.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.5.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.5.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.5.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.5.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.5.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.5.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.5.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.5.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.5.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.5.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.5.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.5.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.5.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.5.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.5.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "9c644d4c89f00b95c4b72a3f3aa83664" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.5.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3d0b583ab74e64d1844babd9939d9b8f" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.5.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "88ff61b9e6999528d91bdfeec6a4229a" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.5.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.5.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.5.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.5.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.5.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.5.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.5.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.5.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.5.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.5.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.5.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "3ac3b218f29991dbed1b4c9b51f22f8f" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.5.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.6.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.6.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.6.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.6.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.6.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.6.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.6.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.6.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.6.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.6.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.6.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.6.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.6.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.6.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.6.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.6.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "dea5a129bd5019f00c655333515108a8" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.6.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1d40f97be2590ed11ec51e9fb630b7ed" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.6.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9314627148ca4905167f1ce61cf8358d" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.6.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.6.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.6.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.6.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.6.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.6.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.6.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.6.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.6.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.6.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.6.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "90679f2e25dc8d33900f586a43f7b9e8" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.6.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.7.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.7.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.7.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.7.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.7.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.7.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.7.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.7.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.7.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.7.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.7.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.7.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.7.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.7.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.7.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.7.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "07ed7b601822a7a71fcda4460e3ccc71" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.7.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "edf9527ad19ab0a6cab605607a247c04" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.7.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "87e357c8978cceb7d327a21d9904c4b8" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.7.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.7.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.7.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.7.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.7.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.7.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.7.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.7.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.7.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.7.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.7.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "f91dc74ad448f1ccb94d5881f4ddd391" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.7.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.8.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.8.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.8.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.8.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.8.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.8.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.8.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.8.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.8.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.8.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.8.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.8.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.8.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.8.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.8.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.8.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "d6b46c0d4a6010838f7f47156045f60d" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.8.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "74c066b92485eb4bc3575a6b55ce177d" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.8.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b2efef932ff3673f5f1bf9a82703ae53" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.8.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.8.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.8.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.8.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.8.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.8.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.8.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.8.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.8.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.8.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.8.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "f2e4fbaafc8b75252519feb17ff0f594" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.8.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.9.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.9.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.9.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.9.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.9.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.9.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.9.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.9.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.9.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.9.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.9.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.9.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.9.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.9.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.9.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.9.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "8416d090ef375f0ecf2e2a946ef3ce7d" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.9.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "650c045cdc4b9121eba74dad0b74b928" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.9.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "45bb2237c29ae1c6d3f6ffe2a0f58f1c" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.9.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.9.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.9.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.9.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.9.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.9.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.9.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.9.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.9.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.9.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.9.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "7a97d793ca53279bcc4b0b9a5eb42d5c" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.9.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.10.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.10.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.10.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.10.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.10.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.10.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.10.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.10.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.10.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.10.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.10.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.10.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.10.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.10.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.10.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.10.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "9f5528dd7425508274f14c30d1baae2f" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.10.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0753a6c451a2bba808743a3fad39ac9c" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.10.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "289cf390d5b78283d60cfbe0408e931c" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.10.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.10.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.10.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.10.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.10.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.10.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.10.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.10.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.10.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.10.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.10.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "55314819fca430d3be78d7189015bca3" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.10.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.11.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.11.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.11.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.11.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.11.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.11.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.11.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.11.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.11.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.11.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.11.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.11.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.11.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.11.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.11.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.11.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "b09d4abf7e0dd0a18f2c676f555e3109" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.11.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "aec2c76707cd09171d2c84187541813e" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.11.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0ad9ca4a5f1fbb76c348d3f82b8b0d3e" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.11.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.11.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.11.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.11.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.11.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.11.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.11.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.11.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.11.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.11.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.11.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "d8b1c3db2a9d81e4e8b601f7389259c6" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.11.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.12.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.12.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.12.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.12.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.12.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.12.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.12.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.12.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.12.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.12.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.12.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.12.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.12.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.12.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.12.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.12.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "7af819acbaac0aa1ff9ea12be38b346d" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.12.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ca9cc3c2806682cb581059d03bad831d" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.12.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8be59f7a46352917352ff352156fb034" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.12.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.12.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.12.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.12.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.12.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.12.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.12.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.12.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.12.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.12.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.12.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "c16d4141de11f9e2bf7e5d07606b2266" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.12.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.13.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.13.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.13.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.13.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.13.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.13.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.13.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.13.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.13.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.13.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.13.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.13.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.13.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.13.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.13.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.13.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "cbe7fa3850baf4b3cae63cbf4ec2ee64" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.13.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b700200a9a04b1d97623f8a41645636b" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.13.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "34bc7a88722a31bc4d035d86198618da" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.13.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.13.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.13.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.13.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.13.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.13.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.13.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.13.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.13.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.13.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.13.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "c8c6f5d7e13b7e50a91ea8cd173f49a6" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.13.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.14.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.14.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.14.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.14.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.14.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.14.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.14.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.14.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.14.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.14.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.14.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.14.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.14.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.14.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.14.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.14.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "4e47d2fe8528a5dbdc973d41f55c3e89" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.14.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "184996fd57d6f79bb6416b71033c8d47" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.14.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a9a602610807c6aa19fd083a4e2be034" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.14.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.14.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.14.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.14.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.14.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.14.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.14.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.14.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.14.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.14.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.14.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "9a719bab5ee0b6b46838ab89bed3e5e0" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.14.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.15.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.15.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.15.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.15.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.15.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.15.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.15.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.15.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.15.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.15.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.15.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.15.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.15.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.15.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.15.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.15.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "ae9c852e935eeade224ceb1dc47d6525" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.15.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9d728fcdba1fd7ca1d3969fced4d481f" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.15.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "61e8c6ec3bdf91dcfe5be32564a09961" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.15.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.15.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.15.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.15.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.15.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.15.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.15.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.15.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.15.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.15.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.15.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "46c24894a22cbf694ef973c5161e17b2" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.15.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.16.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.16.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.16.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.16.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.16.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.16.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.16.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.16.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.16.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.16.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.16.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.16.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.16.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.16.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.16.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.16.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "8010343132404299d2034cc787776626" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.16.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8ae651b6522b9e37efd31776ec1470e9" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.16.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4c2b2c76c2ae918452c459ba0dad9166" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.16.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.16.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.16.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.16.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.16.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.16.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.16.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.16.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.16.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.16.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.16.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "902a6ff6acbee8cb0d608f2b0991ce30" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.16.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.17.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.17.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.17.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.17.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.17.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.17.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.17.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.17.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.17.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.17.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.17.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.17.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.17.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.17.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.17.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.17.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "b0abcb70ab84011b0396efaa799e7729" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.17.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "eed9242afa924e4f855b6c8558fa44fe" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.17.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fa53fd6930fe478d1c7303093d5ac14f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.17.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.17.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.17.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.17.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.17.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.17.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.17.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.17.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.17.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.17.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.17.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "afdb0d5a92c2d71542396ce28e65deb2" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.17.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.18.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.18.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.18.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.18.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.18.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.18.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.18.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.18.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.18.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.18.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.18.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.18.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.18.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.18.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.18.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.18.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "5f84aafe1b29dbaf558c2430977c6180" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.18.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d0004cb8cf869f5d915956582a594fdf" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.18.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e4028f4ef402a15ec87c36a44ed2c9b5" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.18.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.18.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.18.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.18.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.18.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.18.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.18.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.18.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.18.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.18.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.18.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "9bd2595dab92f2557e60cc4229a13128" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.18.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.19.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.19.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.19.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.19.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.19.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.19.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.19.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.19.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.19.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.19.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.19.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.19.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.19.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.19.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.19.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.19.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "02f35718e3cda850a04740502a493e32" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.19.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e70a6728478f0b012b492fedeeaa1ade" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.19.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b339eaa20864cc05c6fa4bbeaf9debab" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.19.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.19.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.19.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.19.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.19.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.19.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.19.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.19.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.19.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.19.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.19.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "f9ddc201398dfc171f88d8203a68c60e" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.19.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.20.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.20.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.20.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.20.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.20.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.20.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.20.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.20.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.20.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.20.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.20.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.20.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.20.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.20.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.20.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.20.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "92aa8c5dbada8f75c7e6788af36a69bf" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.20.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "22f898049d0d8cf37658854fcde5e8ac" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.20.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "50e1080b1c866efd927ac80cacb7561b" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.20.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.20.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.20.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.20.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.20.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.20.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.20.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.20.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.20.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.20.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.20.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "4b2efd10838f1df096b7c0c24db5ab57" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.20.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.21.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.21.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.21.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.21.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.21.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.21.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.21.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.21.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.21.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.21.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.21.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.21.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.21.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.21.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.21.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.21.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "b6aa7d975cb9d3353fc18d498ec61d47" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.21.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ccb2fa28ddc235310835d0318f42857d" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.21.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3732364bf6482d98280a446c4dddae6b" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.21.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.21.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.21.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.21.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.21.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.21.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.21.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.21.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.21.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.21.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.21.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "72e41c09550151679abeb5a0f9a1922c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.21.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.22.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.22.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.22.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.22.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.22.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.22.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.22.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.22.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.22.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.22.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.22.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.22.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.22.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.22.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.22.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.22.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "74f7c817c52838821ee4de8ebe237197" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.22.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "61966489f2e053a582553f15c6b3047e" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.22.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3b75f4a022339becb0c62b49b459726b" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.22.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.22.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.22.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.22.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.22.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.22.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.22.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.22.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.22.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.22.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.22.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "72dae72473fbf42e1254c6f1cd83b4da" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.22.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.23.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.23.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.23.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.23.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.23.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.23.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.23.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.23.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.23.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.23.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.23.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.23.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.23.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.23.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.23.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.23.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "5a391c3304b3e770df9001a6920f39bc" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.23.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "eae307f40052be1bae2aac1652ac5afd" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.23.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c7cac6e2b8b9d673ec755831c8886f02" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.23.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.23.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.23.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.23.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.23.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.23.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.23.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.23.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.23.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.23.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.23.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "e71060a89c4b7ad8652f8d563f5ed3ef" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.23.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.24.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.24.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.24.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.24.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.24.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.24.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.24.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.24.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.24.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.24.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.24.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.24.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.24.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.24.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.24.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.24.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "293156db377fa0898e5edb08378aafda" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.24.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e12372bb194045d945f1878e0afaffa0" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.24.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5bf2793268a541a647dad57404cc5b17" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.24.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.24.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.24.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.24.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.24.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.24.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.24.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.24.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.24.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.24.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.24.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "18e9aa14be867cc0742e65962af884af" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.24.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.25.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.25.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.25.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.25.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.25.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.25.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.25.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.25.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.25.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.25.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.25.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.25.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.25.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.25.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.25.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.25.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "33c0bb0001b3b7bd5c21eb6c1dcd5026" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.25.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "119bc976a5301be7489b85bceb37c0cc" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.25.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "92f2a6f658a9c78f879485b0332d2e18" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.25.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.25.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.25.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.25.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.25.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.25.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.25.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.25.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.25.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.25.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.25.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "d34e73086bbb49771b5b141e0ea1a79e" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.25.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.26.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.26.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.26.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.26.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.26.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.26.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.26.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.26.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.26.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.26.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.26.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.26.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.26.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.26.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.26.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.26.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "64e9aa449c084f487de04108e7ab8743" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.26.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ed09d3de92814b90ade5b8c19e4fb4dc" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.26.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c9e9aa84cf6b2ae962453b3a43934cd8" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.26.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.26.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.26.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.26.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.26.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.26.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.26.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.26.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.26.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.26.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.26.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "4297288819d8924de502621d698a744b" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.26.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.27.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.27.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.27.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.27.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.27.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.27.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.27.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.27.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.27.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.27.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.27.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.27.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.27.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.27.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.27.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.27.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "46b7eb70e9f8b7c51eccc60c5e892987" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.27.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2621bca49d9d8b258a8187abc37db1fd" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.27.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "87a4734fc6990803571b6477b4a0e387" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.27.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.27.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.27.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.27.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.27.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.27.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.27.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.27.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.27.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.27.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.27.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "0652d3630b2d7f27de53286f92ad53de" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.27.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.28.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.28.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.28.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.28.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.28.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.28.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.28.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.28.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.28.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.28.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.28.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.28.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.28.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.28.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.28.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.28.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "f1156b137dec77d4e6523e84010b7494" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.28.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a0b22b5d16b013c1f8132fa72962a0b6" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.28.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d536e24d1b00996a947dd16e8a94d6fc" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.28.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.28.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.28.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.28.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.28.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.28.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.28.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.28.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.28.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.28.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.28.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "7bbfc5dbe53b79a1b784cf99969e683c" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.28.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.29.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.29.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.29.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.29.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.29.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.29.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.29.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.29.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.29.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.29.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.29.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.29.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.29.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.29.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.29.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.29.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "9c5e4f54200a33e3ab10f15f669e4a58" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.29.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "90df27561a1ff4fb2450337feef63e80" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.29.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f954b8ecfe4d19d697db90e1d4a8817d" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.29.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.29.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.29.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.29.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.29.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.29.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.29.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.29.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.29.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.29.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.29.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "01ae69a9f9c61843f9901e4de0b0ef7b" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.29.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.30.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.30.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.30.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.30.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.30.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.30.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.30.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.30.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.30.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.30.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.30.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.30.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.30.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.30.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.30.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.30.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "c275ba41fb708efeed2a73a50609aafe" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.30.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5546cc902315c367aa04a8fd264fe273" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.30.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "32cb3e258005840a51652ee2b762b420" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.30.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.30.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.30.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.30.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.30.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.30.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.30.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.30.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.30.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.30.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.30.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "2cc258f7fff62ef3e0c25532d24b3cf9" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.blocks.30.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.blocks.31.ln1.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.blocks.31.ln1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "model.blocks.31.ln2.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3686400 + }, + { + "name": "model.blocks.31.ln2.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3694592 + }, + { + "name": "model.blocks.31.attention.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3702784 + }, + { + "name": "model.blocks.31.attention.time_mix_value", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3710976 + }, + { + "name": "model.blocks.31.attention.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3719168 + }, + { + "name": "model.blocks.31.attention.time_mix_gate", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3727360 + }, + { + "name": "model.blocks.31.attention.time_decay", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3735552 + }, + { + "name": "model.blocks.31.attention.time_faaaa", + "shape": [ + 64, + 64 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3743744 + }, + { + "name": "model.blocks.31.attention.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3751936 + }, + { + "name": "model.blocks.31.attention.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12140544 + }, + { + "name": "model.blocks.31.attention.key.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 13189120 + }, + { + "name": "model.blocks.31.attention.key.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21577728 + }, + { + "name": "model.blocks.31.attention.value.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22626304 + }, + { + "name": "model.blocks.31.attention.value.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31014912 + } + ], + "md5sum": "16e257d9ef167d48dd55a000506d5e4f" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.31.feed_forward.key.q_weight", + "shape": [ + 14336, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "674d28fdeb269d40d9606467e6ba6299" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.blocks.31.feed_forward.value.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ee8460b09579b11467c92510fe5ab04f" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 32014336, + "records": [ + { + "name": "model.blocks.31.attention.output.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.blocks.31.attention.output.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.blocks.31.attention.gate.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.blocks.31.attention.gate.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.blocks.31.attention.ln_x.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.blocks.31.attention.ln_x.bias", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18882560 + }, + { + "name": "model.blocks.31.feed_forward.time_mix_key", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "model.blocks.31.feed_forward.time_mix_receptance", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18898944 + }, + { + "name": "model.blocks.31.feed_forward.key.q_scale", + "shape": [ + 14336, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18907136 + }, + { + "name": "model.blocks.31.feed_forward.receptance.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22577152 + }, + { + "name": "model.blocks.31.feed_forward.receptance.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30965760 + } + ], + "md5sum": "dc190cc79413353873caa9190a097b22" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "head.q_weight", + "shape": [ + 65536, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2ae12c299d81bd2ccebca8a2ec03e035" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 20463616, + "records": [ + { + "name": "model.blocks.31.feed_forward.value.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.ln_out.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.ln_out.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3678208 + }, + { + "name": "head.q_scale", + "shape": [ + 65536, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 3686400 + } + ], + "md5sum": "0cb368a38d525edab3f07917edb80aff" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..17f2f753ef00ac3db320067a73f8efd2225ef061 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4720c7596d2d0a81afbad2b0c397f18df3e9ccbbf810ba7bb34f223ad069ed94 +size 134217728 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..5007988adfbc153f77f1661e8f75e2f966edf43c --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d35e0d66f8f443c37804e7aa075f03c5d601d2635ab82b30a0c6ad2903a21e1 +size 26312704 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..98328028f3e2c0330be0afc5f2a5c0833418b204 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35ea0eb6714e9035dbe9dc664b8bdf8135f8881aee14745720211b203fa20f54 +size 32063488 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e5ca568569c02164aa87ad701558b7023959c43 --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92839fa846c7842fa0c7874eded60e04fd26173dcd6133e74cc2eef58baba5eb +size 29360128 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..7bd452eca796a9b74444f3d6e80b751c784e6a70 --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34ada4b7016880318728777a018144e636195873a093964415d73043f1d55235 +size 32014336 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe4048829f7e51efc44a4cca8f4aa813f2e6daaa --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd492a8d190b809c0ee1fab5404125fe5747ceea7a0ed1fce1c66a1dff759994 +size 32063488 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e0341958dd198603b3fea4b50b40ba745113c4e --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84cb5e653169f111417a19bb9dda94ed71adb89bc304e743c8094dd3749ca784 +size 29360128 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..b96df7fcb55e1ae40c18664e1c60ac9df2a479b1 --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dcad979ec60d5b1dc7e3f5e24feadf2a658c0d0bfed4edb7040de974b3b9575 +size 29360128 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..de70ea14f3d901e83cb9d5b3c98c0ad564859f2a --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87bce6deff5d6423c2bf824c6cff380adf0dad9964212b18a104981353c329aa +size 32014336 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..9082782860d43a9419b326077db575977e08f85d --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00a471a26874a9276763cb2c96d8f9af33699d78e1c1a16066068a0cd81c4292 +size 32063488 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..caf7381eb0f7a5f265001a959e8fa6b176406e62 --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e5fbd47fa3f3827d8e7293977b91300e19371c47e7c903c541d02a8881e2d8 +size 29360128 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..866d867b7e6b82873982ce584aa1779f2cf0d20e --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e04705d84220e2c47be141f1e8103e3712996f557ac7b3ad02cdf6f2d2d29514 +size 29360128 diff --git a/params_shard_109.bin b/params_shard_109.bin new file mode 100644 index 0000000000000000000000000000000000000000..93c5ab1127e0bb6eacfabf5c1cece28738c9df72 --- /dev/null +++ b/params_shard_109.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c09fcd9d6b985d9d659d21f1c5d407612cff9351bf279ac6958857e7524f93 +size 32014336 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..771d48f03ca0d2bdf966f192efc7bd695a040d42 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f23e1a2794ad96cd1748c92bcd36de7924fe8549ebe2f6353274502d283085a +size 29360128 diff --git a/params_shard_110.bin b/params_shard_110.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdfd84151aa40f1c3947a845a5914b3999b7a9ad --- /dev/null +++ b/params_shard_110.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8ee7577808961516194b43c30fe8cc73be5d8a2f99f69c3c3b7418751abf00 +size 32063488 diff --git a/params_shard_111.bin b/params_shard_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcaec3356d294362900456fd22031b7dc9b00b4a --- /dev/null +++ b/params_shard_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b003c046f6f60fbdcd0c77987c34cbb75f5c0e7ce171138fb992122cf10708 +size 29360128 diff --git a/params_shard_112.bin b/params_shard_112.bin new file mode 100644 index 0000000000000000000000000000000000000000..007817590608de6b4b485106a1a67e7c9c37a51d --- /dev/null +++ b/params_shard_112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b0d663edab5b0ff3b3b67a76c521c7c1cf40c28ffb74bd4f6231d9e90eb1e17 +size 29360128 diff --git a/params_shard_113.bin b/params_shard_113.bin new file mode 100644 index 0000000000000000000000000000000000000000..08c11f0557c6ad9ebf8d8e533a526b87a0e591f8 --- /dev/null +++ b/params_shard_113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea458d818ab6c213102749dbab15814aa8d6ae2d4e7ba242246b5e895faca865 +size 32014336 diff --git a/params_shard_114.bin b/params_shard_114.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ee30647796bd3c5dd7742f42c26b2667c741fd7 --- /dev/null +++ b/params_shard_114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab252844fe29c9c7bad2e65a3e3d457284c2347910cb0394f725df99fd1cd658 +size 32063488 diff --git a/params_shard_115.bin b/params_shard_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..907549999832e146478710cc8ac5a7f3e438a9f7 --- /dev/null +++ b/params_shard_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05fbd26b7bbd9f4cb4de80652973e589086a775e71f66b420048ff14f105fc80 +size 29360128 diff --git a/params_shard_116.bin b/params_shard_116.bin new file mode 100644 index 0000000000000000000000000000000000000000..e26ea635adc5e2f17b99f39dd6f103e5d33eb24e --- /dev/null +++ b/params_shard_116.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c4902ced5ccb24028f1cd89ca3153b2ab6436313ccdae38926093a9e0b8d88d +size 29360128 diff --git a/params_shard_117.bin b/params_shard_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..ec3092837aea896bc31c4b8f354b1598fbf8b396 --- /dev/null +++ b/params_shard_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c4f2b5d6c4bb841b3bfe4eb0323d88410ba4c6ea87324371d4601ae47493e20 +size 32014336 diff --git a/params_shard_118.bin b/params_shard_118.bin new file mode 100644 index 0000000000000000000000000000000000000000..89a172eb5078d4159c076c01e33c57caad3ec7f3 --- /dev/null +++ b/params_shard_118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e0ce58199a6c083a0d1bd96fcb5004ca1d86b815ba4309fbd6cfaee4b659ad3 +size 32063488 diff --git a/params_shard_119.bin b/params_shard_119.bin new file mode 100644 index 0000000000000000000000000000000000000000..ffc5c1666d83286cc7d723004ff4564fe5969349 --- /dev/null +++ b/params_shard_119.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999a860fa35637eee76b47675b78a85801b1c4fcde03aa4224e772c83d99a0b4 +size 29360128 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f6d576d398335c68d0cdca5c82d152527ef2451 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771fa3b30deaa7aaf75028836c42b3b12c1d3e6a80b8e41363bb3ec770ffb4a8 +size 29360128 diff --git a/params_shard_120.bin b/params_shard_120.bin new file mode 100644 index 0000000000000000000000000000000000000000..ead49d653a11dc2b3d39303210e7f7dc8123dd4d --- /dev/null +++ b/params_shard_120.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb05c7fa201ef073fee96d6b9b11562ef7e553156664d95f4f16052069098c65 +size 29360128 diff --git a/params_shard_121.bin b/params_shard_121.bin new file mode 100644 index 0000000000000000000000000000000000000000..51cbb9055133ff978bcb28d1ed969d741a8e555b --- /dev/null +++ b/params_shard_121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94ff0507f2be6507fe51ee653027f3f2eb4619e30883b7112711324057d08912 +size 32014336 diff --git a/params_shard_122.bin b/params_shard_122.bin new file mode 100644 index 0000000000000000000000000000000000000000..6194d384cf60668e20bd22ff0f31a27a4cfa85b6 --- /dev/null +++ b/params_shard_122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbb4c789cdc182dd8f502e284abf4bc55cd99b3d4b4daa1ac303a9488b32a669 +size 32063488 diff --git a/params_shard_123.bin b/params_shard_123.bin new file mode 100644 index 0000000000000000000000000000000000000000..4399a49804b3df1e10beeb1c87667ea249a1a955 --- /dev/null +++ b/params_shard_123.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac2c8ccdeb8a265dc9c48bcf52bff5ee2a2a3a374ecce55f6f18fd09f16ee86b +size 29360128 diff --git a/params_shard_124.bin b/params_shard_124.bin new file mode 100644 index 0000000000000000000000000000000000000000..9b1be9927259ef93ce9195b2e15ea4fb8aaed272 --- /dev/null +++ b/params_shard_124.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57555a20bf9262b991e96a699365e9da694b2c03972a55b09375589294de435a +size 29360128 diff --git a/params_shard_125.bin b/params_shard_125.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7cb180b857038a915faa0ea92a493321959806b --- /dev/null +++ b/params_shard_125.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6da2c888312c15e69fa8c18066b011efaad1786ec8fda91380fd88b8be5b1e64 +size 32014336 diff --git a/params_shard_126.bin b/params_shard_126.bin new file mode 100644 index 0000000000000000000000000000000000000000..26fd7d5d65c62b4ca7b3eb627f909805f18f1832 --- /dev/null +++ b/params_shard_126.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b4a60640fc444c092d53cc46c6200f57009f7911c499aa94c425d42b17c26f +size 32063488 diff --git a/params_shard_127.bin b/params_shard_127.bin new file mode 100644 index 0000000000000000000000000000000000000000..5d34c1ba8ff292ffb98f0179f358eaef408d4474 --- /dev/null +++ b/params_shard_127.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95dd6233ccc1daec891dc85f8b6c541141fb3c61199f97b929f9e5dbd8c3eb65 +size 29360128 diff --git a/params_shard_128.bin b/params_shard_128.bin new file mode 100644 index 0000000000000000000000000000000000000000..38caea832c7e8f54df1aae4e2235fa3cbc7448d8 --- /dev/null +++ b/params_shard_128.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf33b88cad332ecb75ad8894aaf8e4391e9ad9dafdb08e4876c45e97bd710a9 +size 29360128 diff --git a/params_shard_129.bin b/params_shard_129.bin new file mode 100644 index 0000000000000000000000000000000000000000..6702fd698e35986d2979d27dc34294f728ac1eb2 --- /dev/null +++ b/params_shard_129.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb509176d5c5f65ec00531cc65a035ac049d7806c07036695efa4b8d6f7c9a80 +size 32014336 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..c031765adec3d0ccdaf920320ffe1d8d2bae8bac --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695694f38be166e9e8a7796733014d1a9eae01f3b885087f0d0a812c16311a20 +size 32014336 diff --git a/params_shard_130.bin b/params_shard_130.bin new file mode 100644 index 0000000000000000000000000000000000000000..669c73601804d927c5f355395129d66b1dfacdc4 --- /dev/null +++ b/params_shard_130.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93b7bc9585ce10fcb44f6996d68d2211d7c0737985c6273b608db5d997920d00 +size 134217728 diff --git a/params_shard_131.bin b/params_shard_131.bin new file mode 100644 index 0000000000000000000000000000000000000000..69feac5c2c5dac31341e3edba83f9a2ba379818f --- /dev/null +++ b/params_shard_131.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8dbe7ed0491c2d1d8502ee8a7d73a62bd8d9b803bbda52c4ad0c4ec12a18a7a +size 20463616 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..d288a2567792e0ef832929412c71874ee24adac0 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a552d6c16eebb539624f3dac68e4075b840cd51a4cc7fac5b2bf45c3385607ee +size 32063488 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc03a1919b68387699c73a6105c4e6c9bad7e2bf --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43fbe8c37557e0c08906ad5901364ad790dbd75ec1ecfc7aefeaccdea6a2d651 +size 29360128 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..e18a9c5bca84bf2dc121b3164e5cc7b59d76f399 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bbd59dfc4f04e111a395af131224fe8ca6a717f2fedc3175479b54750a0bf68 +size 29360128 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..282fce2213d82aed9394242c87680f617193bd53 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d06d1fbc5e6bc96a241b119d41fe06effa54b95456500cb2b0f66fb6ae47b0a7 +size 32014336 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..7259d6c05288f56d395bd66d897cd8ab81f8ac6e --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09613e401508c62dc53aaa03ea238be4852aef587a7cafe5c048bbff06c4b471 +size 32063488 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..dc43c6281a39347c75b41dc868a35a00f2ae172c --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11fe2883f7e010fd925af15807035aa95170bd32bf8c596a3c537f5d6d18de86 +size 29360128 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f108eafaf9bcc36f1f054409a45a54c7310f1e9 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20fdfb55181315b63f9f5882831e248348077258fa98383f5820c4b2c9843869 +size 28311552 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e0808ab9d94d8456975b22d7529ef47c238396b --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6b5a372ede6b46dd70bf11b3d738303c1126034b39efd1e632b2d6cb6bfff3 +size 29360128 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..fd8f801b0182cd9c2e05d0cf5714bc8c1ba2ad76 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4216a3ec5e03a8711e38984cae174ce6218dd3d6be14406f98b23f5b7adc6f8 +size 32014336 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea99412188c547cda79d18cb693983e623018b1b --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49312193b1341acc1d2931b1625eca9464a32113c9c49a68b91b0967d766009 +size 32063488 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..53da2647209b7417b05b1c196bb4659fd4f96b2f --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0ae242de7dbb97faa1564ec2b5b6208e23654a5bbb88e2643d72a2b7ffdf6d +size 29360128 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4528465cba4e13d7255fcedcd1ee6415c974d98 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ec93cf33d7a811574166e5cbe9ea77d6c55863c02348e4ec44216698872f70 +size 29360128 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..f580fa6eff0911df576900c486e9014e8e925cbc --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:561876d066f72dd67a8c80dc9064d46cdd01927de2de69962fdd6dfcfa951e3d +size 32014336 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f0a1b53c6b50eae37d9ba87d936307c6bff88d1 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f067d90eb516a0a19708687e5c750e3abe7d2e70f129019a2b76d8a6f5d458f7 +size 32063488 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..7525cdd125a9fd8944ea09f9587d1f0f6dfc9611 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6df08679e2c865c84f44a60245a0318fced85648aefa41a5ded9044e1c0ff075 +size 29360128 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..62a385ba67bbcd05b5b17ef9648fc395ade00181 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d71019c2c0bf3eaf2e08973514536567ed946a2d9baaa3d103cb88b619ea8d52 +size 29360128 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..37bf2deca6db3616a8197a9ba09be1b4b981cc91 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48d9428589244fc16acdd132c47d13ab04124a6bec6dca11ab094cfb316b8ac2 +size 32014336 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..49ae84be2a3f0f310f3c80596e0828c16778843a --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e5d5c47a0eca68c1b1b678a4dffe63da5e7f062aab5003e28272b72440125c +size 29360128 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6a03f121564325b9d4faeeb2f14625d574f8eac --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61becef25f83ad4dce799b663c49676af8d991ebd1faf646723113aae84b74ea +size 32063488 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b6d2f9c414d2b06e3bcaf002584c99d3ee79a6f --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:374c499406ba928ca49c36372d0f5e17ba8caf1a4bef3caf43356871bacb9f15 +size 29360128 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c53116749661dc9fbacd770b19489623059351b --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da0c8d08b27c4aa482e28d0c3e65d3407cbfe466ce803c09d7cae9edd7b9bd04 +size 29360128 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..83f35417aba0d6b68b089370ee6d3e66fc3cbe2d --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee8a22f432cf14c08c4eaeb7b578c3a6207b2411e52cfc4f1d930472dc7b2af4 +size 32014336 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..4caf0db909ef15affe687f798c62eea6a1d82413 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dfc12211b8e5c398fda91f2203b6402fe346737bd4e5836195949b8dcc6328a +size 32063488 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..31a950fc2fab4439096692c2487d8a1be680f68f --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8226ecb7dbfa5692fa6283b484aac84bc4b02fcf7d2e98cc0d23f71596da9b5d +size 29360128 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..33dc0c477bf6d9d0434ab82b9888b096beaa2045 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2db002c6c7ec3ff6066901508e0c5454db43a1f55e89d64eee39dbb0e74d12d6 +size 29360128 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ed570edf91bd30f933199e81d98cf611f308b29 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ca70be7549a12d98d1178393e7d5530d322162b83292639df3b3009df0ea06d +size 32014336 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b17aded90ef28ea64b47b2e20d27608514dfc74 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6efa5efe01cf5ba0f2e29b0d8b9b882144959233d9c2ff6ad7f30b03399d9384 +size 32063488 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad3160ad0792fc431e48447cbc09b9b2be78d0d9 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11235d7012137272a6d23b84cff60efc232b32da4b6e445793ca7b0d2f80c64b +size 29360128 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..908c6d1f2145360aba214bfeece69a39c7f86ebf --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab0f5e330c56425777c7a50fedcb8e257df74b83924665a184426330451881f3 +size 29360128 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..e73c41255652cfac445011d633b877d8fa038835 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090b980b6b04403bcfd94233751df3df5718969c8c11504f00ee12ce9ed8501c +size 29360128 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..1981699ec4ff90548c078523cbb9f22de295a833 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ec95af2aa175663fcad155f22bcf9cc18e69e3a19a5d7debe925498e0a91b9f +size 32014336 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..9309e2062791c13734fbcf917a395cfe745dfc9b --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c06c41743437a875206a417973f5c870e8a00e32c10ce8f176ab1fa5e8a85d21 +size 32063488 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..818c33e28bed492f5f851b6131a816c91cd3fc2b --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe9a93f5c3800440fe386a1c189f2510f2d82224480b3cc27307192171da08a6 +size 29360128 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e69fa7dab56fa28a342e33ddaeb143d2d621c86 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c828ee5c981530d532ce24525cc2049bab41a584620f62d874550772084b1a95 +size 29360128 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ebccb40bdfa6ee16bd834dd82b63deeb7fd6378 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83b552ce23bb86ac555704c61c8638d3dff7a2c319c20932bc250f1e936708e3 +size 32014336 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..4ec4102b2f8188e488918e8947aa0cb96ed87b36 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f46aad3857388a6fa957b0bf08111829aee15293488dc5747046a5d1e09d81 +size 32063488 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..4321e4911e3e3b61d5a20c8fc87faec8cec8865a --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f1a511374c16cf9f999987f8322d959347e934908d9a9da14cbd5cd25c972f0 +size 29360128 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..8bf7e179ae3b959e6f60048b8147ca874fd2919a --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf5118b36e47d8261c4f2c362547719ca91035067a5c2dcb53491031cf1a61a0 +size 29360128 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..e77c93b32c5ffe321255addf921c3812a77663e6 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2ca4b20c132e487f8e6ff15525328fcd7b39067d97d594d8298619520c1e97c +size 32014336 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e0227e9b39fda450373363fd0d2a8c8e3bad1ef --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1475ce73de11b2ce2365bafb861d91f7ac5a9bed5b120a84d5a588cdb12766 +size 26329088 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..af3d6b9037f2f90f47a979c6209653979755664c --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d982f82cfec02db183a6033ff390e2c63e3980417035dd0f2dedf5dce463bc0 +size 32063488 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..b93858bac4cc3570d8904abb9d705eae7b325b03 --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcf27e5afef3486ee6ef36fed5791f98b7d296b9765ce3b3a7f4f693cc7d776f +size 29360128 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..62a30324a7077cae04fb431f44a6321bcfaf5137 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068468a6cb1aaeb44e33750d4a86b743954a18a0b7ec6643d1a871615b3493ca +size 29360128 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..152cae21f340113a2c8ea892be04a0a0b44fac82 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1550b366940a45992251f3a0e65b76149893ad719de0bd88f43bee1f68512c67 +size 32014336 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e2e617c12c96f3a23cc1c85578eb2c3fa1c5be0 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5739093c52f2872c30c1b8d1e85ffd4715e0efddad58b2e9c6ff14657de69dec +size 32063488 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..1daf229a3b80432320166fe36bdfbacb82f4ea3d --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec848b3c399b0517ac9e898dcd8462aeae0253912f72f11a67fa0c02bae515e +size 29360128 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..18e1da2bbaf7caa30a6e336468bc675f8c47d1e5 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:719b8c6bfe9950b8dd6ac4c2b08dafc200b7d851ab64ac6675017c80fc81835a +size 29360128 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9c0037d9f2eeb8efabcc72504cea7d259f5cce7 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed8eaecec3f3400cb08f1a99a3be5bbcf0842c461262676c67b0ab3b32943b1 +size 32014336 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..f5facd836d3ba8d5495292adc376e1a492d465b2 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a27ebd24aed13083b67fa614489ec71f30d42b79cd173241b17dd706e907423 +size 32063488 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..731eb4faa767262b653b323359d77e48d5eba9c6 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b34cd52bf0b63bf855ed42d90458ddf7ac2139a1d0e5196f29a0d98d2d78761 +size 29360128 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..dcffa54c74a656c638272e671bc06779fed7072e --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:319e538dd3ba23fc794563801861ad1202437a49e1086d69fbc2ed2dc2423446 +size 28311552 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..9318319c11eeafe9ba7bc790e0ffa1ea0801817d --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c303644501b6031d2f313d9d9b59594c261e60a86242c15bdd5bb6a571c85f +size 29360128 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d21a849f046e1acef4d38b680c68eeb52cef815 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63208eed0e932ef09ee77f46eba1caf8db64640ab64b83fef4657b7c3baea6df +size 32014336 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..e614cc2aace6a475732a4bf67dfadc698d4f38e3 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c7f10abbe71935d042459700559ad98fc2fc85e7323d5c24a2ac1afef8bc7fb +size 32063488 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..86ac7de1e581a0f7b3895149ad152c266b4787c2 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aecd2ed7b9c1837d3fd3822b489b88f39a9b0cc8cba345dcafddeaf88b7de99 +size 29360128 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..81053c2fb192bbb2e70d8c0fe4c08a1783c4d853 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d129ca199d5127516cdc196a6da3f6016e04df6e3ba83f9b77f2a9adad2a98 +size 29360128 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..2865190bc8345475032ad3e7c8e8973575e83d69 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07e8241543679669bc5e941efac01ec5a3b6212ee796fc7ac4f8d60d328a591 +size 32014336 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..67df50088e4f6e9c6201e599d07bd919c4d2fe6b --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b97ff0404050a97a953ee801923c957e832d64dde994e7e55c755b469d40d7e +size 32063488 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..dddb1e85b119b83630b6eff4c7d1f4cab13d3559 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d685ac16ba874e28443f5893ed1086729c97b4651edd772a901c225ff53a2e1 +size 29360128 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..b9055438b4e92eec26d2a9c862fa68d72a5f6be1 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8931bf7c6bb09ebe6c3a8ee34f6ddc0b4bab43ad965cfc99f8926b49442e96 +size 29360128 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..9832344e6004448110fa2510ed0d77fa37b02997 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35f35829fb50aa4de809a5c9afdf8f3725426896fc3f5b7abeb8e4576a53bf61 +size 32014336 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c455d0cb2cc6a07355c9413651c49b8a27e3964 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86b9e06c7e0aacaa0a6be8fffc95b304d23331a10586917c83e72a718ff46c4b +size 29360128 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b67be6eabfff8da21e09a16f0bba366e9a3d5ed --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9403da426940191871a2be0a41624404b748d26e586333f791c7022d209df26e +size 32063488 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d557241520d7ff0dbb97c3746a3f2f9f5c67a8a --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e1a5e2449801c31c18ca7bf5795ad0d0af94c9e407fd5e931d0a967ab70e9e +size 29360128 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..53ebceb16784de2c2faa5b6c04da3f9b4ef8f2db --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b90d65b6801bd96c97f8cd711edc9ac5fa61d9e572a80e5d1d9a94e88c51804b +size 29360128 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..9fedac2104a837da5c452a332d05037560144e6a --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5719c3cf83397685f7e224d24667eb57c42a8e209724f9e4dc7acde452439f8e +size 32014336 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..6cdd466b3376096dc28726467134432e60a33539 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:983b3f9a58cd548b72092dcaadb421c7af4560eb671088ff2c3d79be6368a0de +size 32063488 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..58a6bfaf3caf0f4abe818d41d7d2073f97ceafbb --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95f41e3460e625101e1d7a1874c80e34b4efb63c71d1b8fe4dc481c3563c348c +size 29360128 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..915751c05587160cdb7499bce36160b97bdd6a0f --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17bf6f0297fe1e7714d6ceb97568d21b6b92f8f7c395f04313f1473e00330b33 +size 29360128 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a259d925071a1fdeaeeba18d534256851f05f79 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38564cca213865957d92484e942b9d590bc47a141ec1fc9a88adc1cfaa386bab +size 32014336 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..2de07e6180e326ffe7b809a203c393d933f9e85f --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4014b77c34b311137e6a9088f50c89dce90012026f9cae4b906ba57b14922c8 +size 32063488 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..bedfd5be461cee65a901fcc155f296d77c5260c6 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ee40a5805ace24f43dc62d359a0e66c23ff71d7ba47e270694ffd873086d4d1 +size 29360128 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b31350c66caf47e4f231ef538d2aed001482372 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ce94be786248c33dd188dd726da85a282adf76660125170cb599aad8d646e9 +size 29360128 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..64ed573d0c05b364d63c282856bd202da676d48c --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee67399cf724c01252a3ed92a43a1e383aa740a7f218ed01864a9e3d71de9e6 +size 29360128 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..d2a5dcbe19cde5211b4a5eca3fa3549d9d4d3e3c --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aba2628e0800dbd41f201245d04595a51b7b15d6c6fbf7256a769905653206f +size 32014336 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdd9ce1954a460664d24ddec69231e3c351f3c33 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:409cf0489f51a30c423a4dddbbff583e4eff7bfec88db8fc7e231751df154ef4 +size 32063488 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..4c3f86315cab848f34bc8133dc9bee4631c0812f --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf26fd530af3287d1d9612719b7d6ab0fc2e2a99f38e469d405123716063ac0 +size 29360128 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef4ea6226bb3cf9f66cec363baf3af7973220f75 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aec78c2335b86ff42bd83ac73f8aaaaf1c8348ecc11a1df36c9d0c492255427 +size 29360128 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d0a57055b26a12797b2ea417137e3fe202a6ca1 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db23355f8811dbe80599722b79e0749cd36decd562d0da2a27f7bea2733f7272 +size 32014336 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..3caa4ad855365c37e55f0fe27cde0cc116ad278d --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f3e3991a873152d5f444d9f15ebae7d9d44c330c87e25284758b099a7a59a22 +size 32063488 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbf3d50da6074e728b8a84ac762c6da5d0a2ece0 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ddef29f492dc213b70f904982fe8028513482e73495dc6426a8008cfffcdb0 +size 29360128 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..9dab7bed0f1d48b5f208f1b92790632ad89d56c1 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b26e24c336181d1a06d6fa82e135926da6d3ba8d892622e1af5caa20eada0cc +size 29360128 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..a706d2f193489cc36e8e8d405e737ca88c199caf --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89683c2a001bd07016f189ebe0b8cfbd8610fbb29372918b1ccc5bff617f969 +size 32014336 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..cb55e2dd00081b00ee79d01db0e4db6bce0696e3 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5014cbac2d5bd07f7c71d214dbc16a82c8922f98c1700e662ce5a1f292aed2cd +size 32014336 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..0396f89974a0bb7db273cd8e272735689774e5ef --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2af39199b1aaa36ce4b56f73a3812e40eb69b8c12e219833a05e742f3fb6701 +size 32063488 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..ccacf8882ee6c84ef57edb2c444db32bbba5a5e8 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eeb9c5540904471639a3c7c0858aa1c961b45e635341ed8c934d813fab18cab +size 29360128 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..601cb31406b5c957795dcca291bef88e7f3a30b8 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c7cbfd25f459e34ee5f47279868017bb8cae8b04d529e514ad3612793fbced +size 29360128 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..c77c159bad001bce73cd5ad148b0a52e40883458 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380297f86b88459ef871fb90198ff8b6f7690fadbf5c90c8315ab52affaa9ba5 +size 32014336 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..c601cf51d24502669f904463d2c4d89803bb22db --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6694f08ff99318661d35f24dd4c6c71278fbe27cf104db1e6022ae66843c7513 +size 32063488 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce9e124022cc176f5baf7be320161d536612f287 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ad461862fa5799abf51057f4d5b4ade7b8fec709185bb9c4afab70f9be21e9d +size 29360128 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..1f7394fee9a006158ac02c27675d52e9d64cf67f --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5d4b3d6cbb48d4e97ef5f392c83677d4bd15f3dc08dcaa119f79e4851dc7fcc +size 29360128 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..fdbdadf511b6979c26e3a3b4639cbc9e61950127 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72234281326ceef27a967a9386a20b8bc55eb4aca6469e4c77f0d70702ed1001 +size 32014336 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..78537a6e854efca54f629fce8e514bcac8d1c866 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a0239d33efc1d52093210b74fe4732a82b73983c7c9ce1a19644b7f1005c491 +size 32063488 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..eeecd8ac265657470398a5c66bd8c1dbbafc4014 --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc057dfa513b7d5872447b358f0196ee8484d7531b265525806ab0879b561371 +size 29360128 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d19b0e5a67e1ff9906b89748ace8eb466feac74c --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "name_or_path": "rwkv-world", + "add_prefix_space": false, + "tokenizer_class": "RWKVWorldTokenizer", + "use_fast": false, + "auto_map": { + "AutoTokenizer": [ + "tokenization_rwkv_world.RWKVWorldTokenizer", + null + ] + } +} \ No newline at end of file diff --git a/tokenizer_model b/tokenizer_model new file mode 100644 index 0000000000000000000000000000000000000000..8a607ac2fd88ff6fa236e0ae58a5477b1a6c6dd3 Binary files /dev/null and b/tokenizer_model differ