Damien Benveniste commited on
Commit
9ed86a1
·
1 Parent(s): 1bdf708
Files changed (1) hide show
  1. entrypoint.sh +4 -2
entrypoint.sh CHANGED
@@ -9,6 +9,9 @@ GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
9
  MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
10
  ENFORCE_EAGER=${ENFORCE_EAGER:-true}
11
 
 
 
 
12
  # Print environment for debugging
13
  echo "Environment variables:"
14
  env
@@ -31,8 +34,7 @@ CMD="vllm serve $MODEL \
31
  --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
32
  --max-num-seqs $MAX_NUM_SEQS \
33
  --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
34
- --max-model-len $MAX_MODEL_LEN \
35
- --disable-usage-stats"
36
 
37
  # Add enforce-eager only if it's set to true
38
  if [ "$ENFORCE_EAGER" = "true" ]; then
 
9
  MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
10
  ENFORCE_EAGER=${ENFORCE_EAGER:-true}
11
 
12
+ # Disable usage stats via environment variable
13
+ export VLLM_DISABLE_USAGE_STATS=true
14
+
15
  # Print environment for debugging
16
  echo "Environment variables:"
17
  env
 
34
  --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
35
  --max-num-seqs $MAX_NUM_SEQS \
36
  --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
37
+ --max-model-len $MAX_MODEL_LEN
 
38
 
39
  # Add enforce-eager only if it's set to true
40
  if [ "$ENFORCE_EAGER" = "true" ]; then