DeepInfra
/

Llama-2-70b-chat-hf-trt-fp8

yessenzhar commited on Dec 15, 2023

Commit

2d64b65

•

1 Parent(s): 03200fe

add weights compiled by 0.6.1 trtllm

Files changed (6) hide show

tensorrt_llm/1/config.json CHANGED Viewed

@@ -1,14 +1,17 @@
 {
   "builder_config": {
-    "fp8": true,
     "hidden_act": "silu",
     "hidden_size": 8192,
     "int8": false,
     "max_batch_size": 64,
     "max_input_len": 4096,
     "max_num_tokens": null,
     "max_output_len": 4096,
     "max_position_embeddings": 4096,
     "name": "llama",
     "num_heads": 64,
     "num_kv_heads": 8,
@@ -25,12 +28,14 @@
     "attention_qk_half_accumulation": false,
     "bert_attention_plugin": false,
     "context_fmha_type": 1,
-    "gemm_plugin": "float16",
     "gpt_attention_plugin": "float16",
     "identity_plugin": false,
     "layernorm_plugin": false,
     "layernorm_quantization_plugin": false,
     "lookup_plugin": false,
     "nccl_plugin": "float16",
     "paged_kv_cache": true,
     "quantize_per_token_plugin": false,
@@ -39,8 +44,9 @@
     "rmsnorm_plugin": false,
     "rmsnorm_quantization_plugin": false,
     "smooth_quant_gemm_plugin": false,
-    "tokens_per_block": 64,
     "use_custom_all_reduce": false,
     "weight_only_groupwise_quant_matmul_plugin": false,
     "weight_only_quant_matmul_plugin": false
   }

 {
   "builder_config": {
+    "gather_all_token_logits": false,
     "hidden_act": "silu",
     "hidden_size": 8192,
     "int8": false,
+    "lora_target_modules": [],
     "max_batch_size": 64,
+    "max_beam_width": 1,
     "max_input_len": 4096,
     "max_num_tokens": null,
     "max_output_len": 4096,
     "max_position_embeddings": 4096,
+    "max_prompt_embedding_table_size": 0,
     "name": "llama",
     "num_heads": 64,
     "num_kv_heads": 8,
     "attention_qk_half_accumulation": false,
     "bert_attention_plugin": false,
     "context_fmha_type": 1,
+    "gemm_plugin": false,
     "gpt_attention_plugin": "float16",
     "identity_plugin": false,
     "layernorm_plugin": false,
     "layernorm_quantization_plugin": false,
     "lookup_plugin": false,
+    "lora_plugin": false,
+    "multi_block_mode": false,
     "nccl_plugin": "float16",
     "paged_kv_cache": true,
     "quantize_per_token_plugin": false,
     "rmsnorm_plugin": false,
     "rmsnorm_quantization_plugin": false,
     "smooth_quant_gemm_plugin": false,
+    "tokens_per_block": 128,
     "use_custom_all_reduce": false,
+    "use_paged_context_fmha": false,
     "weight_only_groupwise_quant_matmul_plugin": false,
     "weight_only_quant_matmul_plugin": false
   }

tensorrt_llm/1/llama_float16_tp4_rank0.engine CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8ae291b22b15f930c663ff010cc9b9f6af3212d3226dcbd0d9010c5c9c6e1eeb
-size 17804138844

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8f3c8e892fb4ef0dd5a9d63d0e8d0b8dca3dd83418d1cfeac3aaf068fc156fa
+size 17800444308

tensorrt_llm/1/llama_float16_tp4_rank1.engine CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9a9deaf774e7becdf49addbb01410eb04dfbbc54be0d02bdc8aa8990fde175d9
-size 17807935668

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a934c247f067afa75475d3f931f4e19833df79b60ddb8d992ecf766b1515600
+size 17804224852

tensorrt_llm/1/llama_float16_tp4_rank2.engine CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73f77f3c25a629e74815dfb815f911a6153c788b54b5eaeea893f0967c93a39f
-size 17807909644

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c65f50d9d4e88f814747a43946f88b22d29ead7921afeb71febd0630b0f5e19
+size 17800453380

tensorrt_llm/1/llama_float16_tp4_rank3.engine CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59ae35d43e0dc3ea0d02765e2ea83da11240e0f4602da688791b60207a16a1c8
-size 17807909644

 version https://git-lfs.github.com/spec/v1
+oid sha256:39e668582a984b449279db1a962270c1a9e45dbfe0f15d0cb782196a6e85a494
+size 17800444948

tensorrt_llm/1/model.cache CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cae99d957bf241fc690c3026770efa761db026b78c1747dea214dccd6365e0c6
-size 22623370

 version https://git-lfs.github.com/spec/v1
+oid sha256:61d6231f5c2697f27d5cddb6556ea9d545968275a6ae336f1540b6511317d226
+size 13821175