yusufs commited on
Commit
8132d1f
·
1 Parent(s): 22ac900

fix(using sail/Sailor2-3B-Chat): sail/Sailor2-3B-Chat

Browse files
Files changed (2) hide show
  1. Dockerfile +0 -2
  2. runner.sh +3 -11
Dockerfile CHANGED
@@ -38,7 +38,5 @@ EXPOSE 7860
38
  #CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
39
 
40
  RUN chmod +x /app/runner.sh
41
- RUN chmod +x /app/run-llama.sh
42
- RUN chmod +x /app/run-sailor.sh
43
 
44
  CMD ["/app/runner.sh"]
 
38
  #CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
39
 
40
  RUN chmod +x /app/runner.sh
 
 
41
 
42
  CMD ["/app/runner.sh"]
runner.sh CHANGED
@@ -13,16 +13,8 @@ case "$MODEL_ID" in
13
  MODEL_REV="0cb88a4f764b7a12671c53f0838cd831a0843b95"
14
  ;;
15
  2)
16
- MODEL_NAME="sail/Sailor-4B-Chat"
17
- MODEL_REV="89a866a7041e6ec023dd462adeca8e28dd53c83e"
18
- ;;
19
- 3)
20
- MODEL_NAME="DeepSeek-R1-Distill-Qwen-32B"
21
- MODEL_REV="d66bcfc2f3fd52799f95943264f32ba15ca0003d"
22
- ;;
23
- 4)
24
- MODEL_NAME="deepseek-ai/DeepSeek-V3"
25
- MODEL_REV="1d044fd82b15f1cedb197a288e50cc96a2c27205"
26
  ;;
27
  *)
28
  echo "Error: Invalid MODEL_ID. Valid values are 1 or 2."
@@ -67,7 +59,7 @@ python -u /app/openai_compatible_api_server.py \
67
  --port 7860 \
68
  --max-num-batched-tokens 32768 \
69
  --max-model-len 32768 \
70
- --dtype float16 \
71
  --enforce-eager \
72
  --gpu-memory-utilization 0.9 \
73
  --enable-prefix-caching \
 
13
  MODEL_REV="0cb88a4f764b7a12671c53f0838cd831a0843b95"
14
  ;;
15
  2)
16
+ MODEL_NAME="sail/Sailor2-3B-Chat"
17
+ MODEL_REV="d60722644e700133576489719dcbc288036628d5"
 
 
 
 
 
 
 
 
18
  ;;
19
  *)
20
  echo "Error: Invalid MODEL_ID. Valid values are 1 or 2."
 
59
  --port 7860 \
60
  --max-num-batched-tokens 32768 \
61
  --max-model-len 32768 \
62
+ --dtype bfloat16 \
63
  --enforce-eager \
64
  --gpu-memory-utilization 0.9 \
65
  --enable-prefix-caching \