Spaces:

damienbenveniste
/

deploy-vllm2

Sleeping

Damien Benveniste commited on Aug 14, 2024

Commit

1bdf708

1 Parent(s): 9aaf11d

modified

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -2,18 +2,21 @@
 FROM vllm/vllm-openai:latest
 # Set environment variables
-# ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
 ENV HF_HOME="/tmp/huggingface"
 ENV XDG_CACHE_HOME="/tmp/cache"
 ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
 ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
 # Ensure PATH includes common Python locations
 ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
 # Create necessary directories and set permissions
-RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config && \
-    chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config
 # Set the working directory
 WORKDIR /app

 FROM vllm/vllm-openai:latest
 # Set environment variables
+ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
 ENV HF_HOME="/tmp/huggingface"
 ENV XDG_CACHE_HOME="/tmp/cache"
 ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
 ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
+ENV XDG_CONFIG_HOME="/.config"
+ENV VLLM_USE_MODELSCOPE="false"
+ENV VLLM_DISABLE_USAGE_STATS="true"
 # Ensure PATH includes common Python locations
 ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
 # Create necessary directories and set permissions
+RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config && \
+    chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config
 # Set the working directory
 WORKDIR /app

entrypoint.sh CHANGED Viewed

@@ -1,20 +1,20 @@
 #!/bin/bash
 # Default values
-MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
 DTYPE=${DTYPE:-"half"}
-MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
-MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
 GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
-MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
-ENFORCE_EAGER=${ENFORCE_EAGER:-true}
 # Print environment for debugging
 echo "Environment variables:"
 env
 # Check and set permissions for directories
-for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config; do
     if [ ! -d "$dir" ]; then
         mkdir -p "$dir"
     fi
@@ -23,7 +23,6 @@ for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp
     ls -la "$dir"
 done
 # Construct the command
 CMD="vllm serve $MODEL \
 --host 0.0.0.0 \
@@ -32,18 +31,14 @@ CMD="vllm serve $MODEL \
 --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
 --max-num-seqs $MAX_NUM_SEQS \
 --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
---max-model-len $MAX_MODEL_LEN"
 # Add enforce-eager only if it's set to true
 if [ "$ENFORCE_EAGER" = "true" ]; then
     CMD="$CMD --enforce-eager"
 fi
-# Disable usage reporting if set
-if [ "$DISABLE_USAGE_REPORTING" = "true" ]; then
-    CMD="$CMD --disable-usage-reporting"
-fi
 # Execute the command
 echo "Running command: $CMD"
 exec $CMD

 #!/bin/bash
 # Default values
+MODEL=${MODEL:-"microsoft/phi-3-mini"}
 DTYPE=${DTYPE:-"half"}
+MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
+MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
 GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
+MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
+ENFORCE_EAGER=${ENFORCE_EAGER:-true}
 # Print environment for debugging
 echo "Environment variables:"
 env
 # Check and set permissions for directories
+for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
     if [ ! -d "$dir" ]; then
         mkdir -p "$dir"
     fi
     ls -la "$dir"
 done
 # Construct the command
 CMD="vllm serve $MODEL \
 --host 0.0.0.0 \
 --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
 --max-num-seqs $MAX_NUM_SEQS \
 --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
+--max-model-len $MAX_MODEL_LEN \
+--disable-usage-stats"
 # Add enforce-eager only if it's set to true
 if [ "$ENFORCE_EAGER" = "true" ]; then
     CMD="$CMD --enforce-eager"
 fi
 # Execute the command
 echo "Running command: $CMD"
 exec $CMD