File size: 1,065 Bytes
7e28c75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
INFERENCE_PRECISION=float16
MAX_BEAM_WIDTH=4
MAX_BATCH_SIZE=64
checkpoint_dir=$1
output_dir=$2
trtllm-build --checkpoint_dir ${checkpoint_dir}/encoder \
--output_dir ${output_dir}/encoder \
--moe_plugin disable \
--enable_xqa disable \
--max_batch_size ${MAX_BATCH_SIZE} \
--gemm_plugin disable \
--bert_attention_plugin ${INFERENCE_PRECISION} \
--max_input_len 3000 --max_seq_len=3000
trtllm-build --checkpoint_dir ${checkpoint_dir}/decoder \
--output_dir ${output_dir}/decoder \
--moe_plugin disable \
--enable_xqa disable \
--max_beam_width ${MAX_BEAM_WIDTH} \
--max_batch_size ${MAX_BATCH_SIZE} \
--max_seq_len 114 \
--max_input_len 14 \
--max_encoder_input_len 3000 \
--gemm_plugin ${INFERENCE_PRECISION} \
--bert_attention_plugin ${INFERENCE_PRECISION} \
--gpt_attention_plugin ${INFERENCE_PRECISION} |