Spaces:
Sleeping
Sleeping
Damien Benveniste
commited on
Commit
·
9ed86a1
1
Parent(s):
1bdf708
modified
Browse files- entrypoint.sh +4 -2
entrypoint.sh
CHANGED
@@ -9,6 +9,9 @@ GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
|
|
9 |
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
10 |
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
11 |
|
|
|
|
|
|
|
12 |
# Print environment for debugging
|
13 |
echo "Environment variables:"
|
14 |
env
|
@@ -31,8 +34,7 @@ CMD="vllm serve $MODEL \
|
|
31 |
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
32 |
--max-num-seqs $MAX_NUM_SEQS \
|
33 |
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
34 |
-
--max-model-len $MAX_MODEL_LEN
|
35 |
-
--disable-usage-stats"
|
36 |
|
37 |
# Add enforce-eager only if it's set to true
|
38 |
if [ "$ENFORCE_EAGER" = "true" ]; then
|
|
|
9 |
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
10 |
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
11 |
|
12 |
+
# Disable usage stats via environment variable
|
13 |
+
export VLLM_DISABLE_USAGE_STATS=true
|
14 |
+
|
15 |
# Print environment for debugging
|
16 |
echo "Environment variables:"
|
17 |
env
|
|
|
34 |
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
35 |
--max-num-seqs $MAX_NUM_SEQS \
|
36 |
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
37 |
+
--max-model-len $MAX_MODEL_LEN
|
|
|
38 |
|
39 |
# Add enforce-eager only if it's set to true
|
40 |
if [ "$ENFORCE_EAGER" = "true" ]; then
|