File size: 1,466 Bytes
f605912 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
export CUDA_VISIBLE_DEVICES=1
INFERENCE_PRECISION=float16
MAX_BEAM_WIDTH=4
MAX_BATCH_SIZE=8
checkpoint_dir=tllm_checkpoint
output_dir=distill_whisper_large_v3
# trtllm-build --checkpoint_dir ${checkpoint_dir}/encoder \
# --output_dir ${output_dir}/encoder \
# --paged_kv_cache disable \
# --moe_plugin disable \
# --enable_xqa disable \
# --max_batch_size ${MAX_BATCH_SIZE} \
# --gemm_plugin disable \
# --bert_attention_plugin ${INFERENCE_PRECISION} \
# --remove_input_padding disable
# trtllm-build --checkpoint_dir ${checkpoint_dir}/decoder \
# --output_dir ${output_dir}/decoder \
# --paged_kv_cache disable \
# --moe_plugin disable \
# --enable_xqa disable \
# --max_beam_width ${MAX_BEAM_WIDTH} \
# --max_batch_size ${MAX_BATCH_SIZE} \
# --max_seq_len 100 \
# --max_input_len 14 \
# --max_encoder_input_len 1500 \
# --gemm_plugin ${INFERENCE_PRECISION} \
# --bert_attention_plugin ${INFERENCE_PRECISION} \
# --gpt_attention_plugin ${INFERENCE_PRECISION} \
# --remove_input_padding disable
python3 run.py --engine_dir $output_dir --dataset hf-internal-testing/librispeech_asr_dummy --name librispeech_dummy_${output_dir}
|