Damien Benveniste commited on
Commit
1bdf708
·
1 Parent(s): 9aaf11d
Files changed (2) hide show
  1. Dockerfile +6 -3
  2. entrypoint.sh +8 -13
Dockerfile CHANGED
@@ -2,18 +2,21 @@
2
  FROM vllm/vllm-openai:latest
3
 
4
  # Set environment variables
5
- # ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
6
  ENV HF_HOME="/tmp/huggingface"
7
  ENV XDG_CACHE_HOME="/tmp/cache"
8
  ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
9
  ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
 
 
 
10
 
11
  # Ensure PATH includes common Python locations
12
  ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
13
 
14
  # Create necessary directories and set permissions
15
- RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config && \
16
- chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config
17
 
18
  # Set the working directory
19
  WORKDIR /app
 
2
  FROM vllm/vllm-openai:latest
3
 
4
  # Set environment variables
5
+ ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
6
  ENV HF_HOME="/tmp/huggingface"
7
  ENV XDG_CACHE_HOME="/tmp/cache"
8
  ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
9
  ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
10
+ ENV XDG_CONFIG_HOME="/.config"
11
+ ENV VLLM_USE_MODELSCOPE="false"
12
+ ENV VLLM_DISABLE_USAGE_STATS="true"
13
 
14
  # Ensure PATH includes common Python locations
15
  ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
16
 
17
  # Create necessary directories and set permissions
18
+ RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config && \
19
+ chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config
20
 
21
  # Set the working directory
22
  WORKDIR /app
entrypoint.sh CHANGED
@@ -1,20 +1,20 @@
1
  #!/bin/bash
2
 
3
  # Default values
4
- MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
5
  DTYPE=${DTYPE:-"half"}
6
- MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
7
- MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
8
  GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
9
- MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
10
- ENFORCE_EAGER=${ENFORCE_EAGER:-true}
11
 
12
  # Print environment for debugging
13
  echo "Environment variables:"
14
  env
15
 
16
  # Check and set permissions for directories
17
- for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp/config; do
18
  if [ ! -d "$dir" ]; then
19
  mkdir -p "$dir"
20
  fi
@@ -23,7 +23,6 @@ for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp
23
  ls -la "$dir"
24
  done
25
 
26
-
27
  # Construct the command
28
  CMD="vllm serve $MODEL \
29
  --host 0.0.0.0 \
@@ -32,18 +31,14 @@ CMD="vllm serve $MODEL \
32
  --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
33
  --max-num-seqs $MAX_NUM_SEQS \
34
  --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
35
- --max-model-len $MAX_MODEL_LEN"
 
36
 
37
  # Add enforce-eager only if it's set to true
38
  if [ "$ENFORCE_EAGER" = "true" ]; then
39
  CMD="$CMD --enforce-eager"
40
  fi
41
 
42
- # Disable usage reporting if set
43
- if [ "$DISABLE_USAGE_REPORTING" = "true" ]; then
44
- CMD="$CMD --disable-usage-reporting"
45
- fi
46
-
47
  # Execute the command
48
  echo "Running command: $CMD"
49
  exec $CMD
 
1
  #!/bin/bash
2
 
3
  # Default values
4
+ MODEL=${MODEL:-"microsoft/phi-3-mini"}
5
  DTYPE=${DTYPE:-"half"}
6
+ MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
7
+ MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
8
  GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
9
+ MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
10
+ ENFORCE_EAGER=${ENFORCE_EAGER:-true}
11
 
12
  # Print environment for debugging
13
  echo "Environment variables:"
14
  env
15
 
16
  # Check and set permissions for directories
17
+ for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
18
  if [ ! -d "$dir" ]; then
19
  mkdir -p "$dir"
20
  fi
 
23
  ls -la "$dir"
24
  done
25
 
 
26
  # Construct the command
27
  CMD="vllm serve $MODEL \
28
  --host 0.0.0.0 \
 
31
  --max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
32
  --max-num-seqs $MAX_NUM_SEQS \
33
  --gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
34
+ --max-model-len $MAX_MODEL_LEN \
35
+ --disable-usage-stats"
36
 
37
  # Add enforce-eager only if it's set to true
38
  if [ "$ENFORCE_EAGER" = "true" ]; then
39
  CMD="$CMD --enforce-eager"
40
  fi
41
 
 
 
 
 
 
42
  # Execute the command
43
  echo "Running command: $CMD"
44
  exec $CMD