INFERENCE_PRECISION=float16 MAX_BEAM_WIDTH=4 MAX_BATCH_SIZE=64 checkpoint_dir=$1 output_dir=$2 trtllm-build --checkpoint_dir ${checkpoint_dir}/encoder \ --output_dir ${output_dir}/encoder \ --moe_plugin disable \ --enable_xqa disable \ --max_batch_size ${MAX_BATCH_SIZE} \ --gemm_plugin disable \ --bert_attention_plugin ${INFERENCE_PRECISION} \ --max_input_len 3000 --max_seq_len=3000 trtllm-build --checkpoint_dir ${checkpoint_dir}/decoder \ --output_dir ${output_dir}/decoder \ --moe_plugin disable \ --enable_xqa disable \ --max_beam_width ${MAX_BEAM_WIDTH} \ --max_batch_size ${MAX_BATCH_SIZE} \ --max_seq_len 114 \ --max_input_len 14 \ --max_encoder_input_len 3000 \ --gemm_plugin ${INFERENCE_PRECISION} \ --bert_attention_plugin ${INFERENCE_PRECISION} \ --gpt_attention_plugin ${INFERENCE_PRECISION}