Spaces:
Sleeping
Sleeping
Damien Benveniste
commited on
Commit
·
1bdf708
1
Parent(s):
9aaf11d
modified
Browse files- Dockerfile +6 -3
- entrypoint.sh +8 -13
Dockerfile
CHANGED
@@ -2,18 +2,21 @@
|
|
2 |
FROM vllm/vllm-openai:latest
|
3 |
|
4 |
# Set environment variables
|
5 |
-
|
6 |
ENV HF_HOME="/tmp/huggingface"
|
7 |
ENV XDG_CACHE_HOME="/tmp/cache"
|
8 |
ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
|
9 |
ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
|
|
|
|
|
|
|
10 |
|
11 |
# Ensure PATH includes common Python locations
|
12 |
ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
|
13 |
|
14 |
# Create necessary directories and set permissions
|
15 |
-
RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache
|
16 |
-
chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache
|
17 |
|
18 |
# Set the working directory
|
19 |
WORKDIR /app
|
|
|
2 |
FROM vllm/vllm-openai:latest
|
3 |
|
4 |
# Set environment variables
|
5 |
+
ENV HUGGING_FACE_HUB_TOKEN="your_hf_token_here"
|
6 |
ENV HF_HOME="/tmp/huggingface"
|
7 |
ENV XDG_CACHE_HOME="/tmp/cache"
|
8 |
ENV NUMBA_CACHE_DIR="/tmp/numba_cache"
|
9 |
ENV OUTLINES_CACHE_DIR="/tmp/outlines_cache"
|
10 |
+
ENV XDG_CONFIG_HOME="/.config"
|
11 |
+
ENV VLLM_USE_MODELSCOPE="false"
|
12 |
+
ENV VLLM_DISABLE_USAGE_STATS="true"
|
13 |
|
14 |
# Ensure PATH includes common Python locations
|
15 |
ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
|
16 |
|
17 |
# Create necessary directories and set permissions
|
18 |
+
RUN mkdir -p /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config && \
|
19 |
+
chmod -R 777 /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config
|
20 |
|
21 |
# Set the working directory
|
22 |
WORKDIR /app
|
entrypoint.sh
CHANGED
@@ -1,20 +1,20 @@
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
# Default values
|
4 |
-
MODEL=${MODEL:-"microsoft/
|
5 |
DTYPE=${DTYPE:-"half"}
|
6 |
-
MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
|
7 |
-
MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
|
8 |
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
|
9 |
-
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
10 |
-
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
11 |
|
12 |
# Print environment for debugging
|
13 |
echo "Environment variables:"
|
14 |
env
|
15 |
|
16 |
# Check and set permissions for directories
|
17 |
-
for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache
|
18 |
if [ ! -d "$dir" ]; then
|
19 |
mkdir -p "$dir"
|
20 |
fi
|
@@ -23,7 +23,6 @@ for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /tmp
|
|
23 |
ls -la "$dir"
|
24 |
done
|
25 |
|
26 |
-
|
27 |
# Construct the command
|
28 |
CMD="vllm serve $MODEL \
|
29 |
--host 0.0.0.0 \
|
@@ -32,18 +31,14 @@ CMD="vllm serve $MODEL \
|
|
32 |
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
33 |
--max-num-seqs $MAX_NUM_SEQS \
|
34 |
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
35 |
-
--max-model-len $MAX_MODEL_LEN
|
|
|
36 |
|
37 |
# Add enforce-eager only if it's set to true
|
38 |
if [ "$ENFORCE_EAGER" = "true" ]; then
|
39 |
CMD="$CMD --enforce-eager"
|
40 |
fi
|
41 |
|
42 |
-
# Disable usage reporting if set
|
43 |
-
if [ "$DISABLE_USAGE_REPORTING" = "true" ]; then
|
44 |
-
CMD="$CMD --disable-usage-reporting"
|
45 |
-
fi
|
46 |
-
|
47 |
# Execute the command
|
48 |
echo "Running command: $CMD"
|
49 |
exec $CMD
|
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
# Default values
|
4 |
+
MODEL=${MODEL:-"microsoft/phi-3-mini"}
|
5 |
DTYPE=${DTYPE:-"half"}
|
6 |
+
MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
|
7 |
+
MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
|
8 |
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
|
9 |
+
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
10 |
+
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
11 |
|
12 |
# Print environment for debugging
|
13 |
echo "Environment variables:"
|
14 |
env
|
15 |
|
16 |
# Check and set permissions for directories
|
17 |
+
for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
|
18 |
if [ ! -d "$dir" ]; then
|
19 |
mkdir -p "$dir"
|
20 |
fi
|
|
|
23 |
ls -la "$dir"
|
24 |
done
|
25 |
|
|
|
26 |
# Construct the command
|
27 |
CMD="vllm serve $MODEL \
|
28 |
--host 0.0.0.0 \
|
|
|
31 |
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
32 |
--max-num-seqs $MAX_NUM_SEQS \
|
33 |
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
34 |
+
--max-model-len $MAX_MODEL_LEN \
|
35 |
+
--disable-usage-stats"
|
36 |
|
37 |
# Add enforce-eager only if it's set to true
|
38 |
if [ "$ENFORCE_EAGER" = "true" ]; then
|
39 |
CMD="$CMD --enforce-eager"
|
40 |
fi
|
41 |
|
|
|
|
|
|
|
|
|
|
|
42 |
# Execute the command
|
43 |
echo "Running command: $CMD"
|
44 |
exec $CMD
|