yessenzhar commited on
Commit
2d64b65
1 Parent(s): 03200fe

add weights compiled by 0.6.1 trtllm

Browse files
tensorrt_llm/1/config.json CHANGED
@@ -1,14 +1,17 @@
1
  {
2
  "builder_config": {
3
- "fp8": true,
4
  "hidden_act": "silu",
5
  "hidden_size": 8192,
6
  "int8": false,
 
7
  "max_batch_size": 64,
 
8
  "max_input_len": 4096,
9
  "max_num_tokens": null,
10
  "max_output_len": 4096,
11
  "max_position_embeddings": 4096,
 
12
  "name": "llama",
13
  "num_heads": 64,
14
  "num_kv_heads": 8,
@@ -25,12 +28,14 @@
25
  "attention_qk_half_accumulation": false,
26
  "bert_attention_plugin": false,
27
  "context_fmha_type": 1,
28
- "gemm_plugin": "float16",
29
  "gpt_attention_plugin": "float16",
30
  "identity_plugin": false,
31
  "layernorm_plugin": false,
32
  "layernorm_quantization_plugin": false,
33
  "lookup_plugin": false,
 
 
34
  "nccl_plugin": "float16",
35
  "paged_kv_cache": true,
36
  "quantize_per_token_plugin": false,
@@ -39,8 +44,9 @@
39
  "rmsnorm_plugin": false,
40
  "rmsnorm_quantization_plugin": false,
41
  "smooth_quant_gemm_plugin": false,
42
- "tokens_per_block": 64,
43
  "use_custom_all_reduce": false,
 
44
  "weight_only_groupwise_quant_matmul_plugin": false,
45
  "weight_only_quant_matmul_plugin": false
46
  }
 
1
  {
2
  "builder_config": {
3
+ "gather_all_token_logits": false,
4
  "hidden_act": "silu",
5
  "hidden_size": 8192,
6
  "int8": false,
7
+ "lora_target_modules": [],
8
  "max_batch_size": 64,
9
+ "max_beam_width": 1,
10
  "max_input_len": 4096,
11
  "max_num_tokens": null,
12
  "max_output_len": 4096,
13
  "max_position_embeddings": 4096,
14
+ "max_prompt_embedding_table_size": 0,
15
  "name": "llama",
16
  "num_heads": 64,
17
  "num_kv_heads": 8,
 
28
  "attention_qk_half_accumulation": false,
29
  "bert_attention_plugin": false,
30
  "context_fmha_type": 1,
31
+ "gemm_plugin": false,
32
  "gpt_attention_plugin": "float16",
33
  "identity_plugin": false,
34
  "layernorm_plugin": false,
35
  "layernorm_quantization_plugin": false,
36
  "lookup_plugin": false,
37
+ "lora_plugin": false,
38
+ "multi_block_mode": false,
39
  "nccl_plugin": "float16",
40
  "paged_kv_cache": true,
41
  "quantize_per_token_plugin": false,
 
44
  "rmsnorm_plugin": false,
45
  "rmsnorm_quantization_plugin": false,
46
  "smooth_quant_gemm_plugin": false,
47
+ "tokens_per_block": 128,
48
  "use_custom_all_reduce": false,
49
+ "use_paged_context_fmha": false,
50
  "weight_only_groupwise_quant_matmul_plugin": false,
51
  "weight_only_quant_matmul_plugin": false
52
  }
tensorrt_llm/1/llama_float16_tp4_rank0.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ae291b22b15f930c663ff010cc9b9f6af3212d3226dcbd0d9010c5c9c6e1eeb
3
- size 17804138844
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8f3c8e892fb4ef0dd5a9d63d0e8d0b8dca3dd83418d1cfeac3aaf068fc156fa
3
+ size 17800444308
tensorrt_llm/1/llama_float16_tp4_rank1.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a9deaf774e7becdf49addbb01410eb04dfbbc54be0d02bdc8aa8990fde175d9
3
- size 17807935668
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a934c247f067afa75475d3f931f4e19833df79b60ddb8d992ecf766b1515600
3
+ size 17804224852
tensorrt_llm/1/llama_float16_tp4_rank2.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73f77f3c25a629e74815dfb815f911a6153c788b54b5eaeea893f0967c93a39f
3
- size 17807909644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c65f50d9d4e88f814747a43946f88b22d29ead7921afeb71febd0630b0f5e19
3
+ size 17800453380
tensorrt_llm/1/llama_float16_tp4_rank3.engine CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59ae35d43e0dc3ea0d02765e2ea83da11240e0f4602da688791b60207a16a1c8
3
- size 17807909644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e668582a984b449279db1a962270c1a9e45dbfe0f15d0cb782196a6e85a494
3
+ size 17800444948
tensorrt_llm/1/model.cache CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cae99d957bf241fc690c3026770efa761db026b78c1747dea214dccd6365e0c6
3
- size 22623370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61d6231f5c2697f27d5cddb6556ea9d545968275a6ae336f1540b6511317d226
3
+ size 13821175