yuekai commited on
Commit
7e28c75
1 Parent(s): 9500f83

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. build.sh +31 -0
build.sh ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ INFERENCE_PRECISION=float16
4
+ MAX_BEAM_WIDTH=4
5
+ MAX_BATCH_SIZE=64
6
+
7
+ checkpoint_dir=$1
8
+ output_dir=$2
9
+
10
+ trtllm-build --checkpoint_dir ${checkpoint_dir}/encoder \
11
+ --output_dir ${output_dir}/encoder \
12
+ --moe_plugin disable \
13
+ --enable_xqa disable \
14
+ --max_batch_size ${MAX_BATCH_SIZE} \
15
+ --gemm_plugin disable \
16
+ --bert_attention_plugin ${INFERENCE_PRECISION} \
17
+ --max_input_len 3000 --max_seq_len=3000
18
+
19
+
20
+ trtllm-build --checkpoint_dir ${checkpoint_dir}/decoder \
21
+ --output_dir ${output_dir}/decoder \
22
+ --moe_plugin disable \
23
+ --enable_xqa disable \
24
+ --max_beam_width ${MAX_BEAM_WIDTH} \
25
+ --max_batch_size ${MAX_BATCH_SIZE} \
26
+ --max_seq_len 114 \
27
+ --max_input_len 14 \
28
+ --max_encoder_input_len 3000 \
29
+ --gemm_plugin ${INFERENCE_PRECISION} \
30
+ --bert_attention_plugin ${INFERENCE_PRECISION} \
31
+ --gpt_attention_plugin ${INFERENCE_PRECISION}