File size: 730 Bytes
40afe12
 
 
 
 
 
 
 
 
 
 
 
81d6e3d
 
40afe12
 
 
 
81d6e3d
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/bash

# Default values
MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
DTYPE=${DTYPE:-"half"}
MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}  
MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}            
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}      
ENFORCE_EAGER=${ENFORCE_EAGER:-true}       


# Construct the command
CMD="vllm serve $MODEL \
--dtype $DTYPE \
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
--max-num-seqs $MAX_NUM_SEQS \
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
--max-model-len $MAX_MODEL_LEN"

# Add enforce-eager only if it's set to true
if [ "$ENFORCE_EAGER" = "true" ]; then
    CMD="$CMD --enforce-eager"
fi

# Execute the command
exec $CMD