Spaces:

yusufs
/

sailor2-3b-chat

Paused

yusufs commited on Apr 16

Commit

8132d1f

1 Parent(s): 22ac900

fix(using sail/Sailor2-3B-Chat): sail/Sailor2-3B-Chat

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -38,7 +38,5 @@ EXPOSE 7860
 #CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 RUN chmod +x /app/runner.sh
-RUN chmod +x /app/run-llama.sh
-RUN chmod +x /app/run-sailor.sh
 CMD ["/app/runner.sh"]

 #CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
 RUN chmod +x /app/runner.sh
 CMD ["/app/runner.sh"]

runner.sh CHANGED Viewed

@@ -13,16 +13,8 @@ case "$MODEL_ID" in
     MODEL_REV="0cb88a4f764b7a12671c53f0838cd831a0843b95"
     ;;
   2)
-    MODEL_NAME="sail/Sailor-4B-Chat"
-    MODEL_REV="89a866a7041e6ec023dd462adeca8e28dd53c83e"
-    ;;
-  3)
-    MODEL_NAME="DeepSeek-R1-Distill-Qwen-32B"
-    MODEL_REV="d66bcfc2f3fd52799f95943264f32ba15ca0003d"
-    ;;
-  4)
-    MODEL_NAME="deepseek-ai/DeepSeek-V3"
-    MODEL_REV="1d044fd82b15f1cedb197a288e50cc96a2c27205"
     ;;
   *)
     echo "Error: Invalid MODEL_ID. Valid values are 1 or 2."
@@ -67,7 +59,7 @@ python -u /app/openai_compatible_api_server.py \
     --port 7860 \
     --max-num-batched-tokens 32768 \
     --max-model-len 32768 \
-    --dtype float16 \
     --enforce-eager \
     --gpu-memory-utilization 0.9 \
     --enable-prefix-caching \

     MODEL_REV="0cb88a4f764b7a12671c53f0838cd831a0843b95"
     ;;
   2)
+    MODEL_NAME="sail/Sailor2-3B-Chat"
+    MODEL_REV="d60722644e700133576489719dcbc288036628d5"
     ;;
   *)
     echo "Error: Invalid MODEL_ID. Valid values are 1 or 2."
     --port 7860 \
     --max-num-batched-tokens 32768 \
     --max-model-len 32768 \
+    --dtype bfloat16 \
     --enforce-eager \
     --gpu-memory-utilization 0.9 \
     --enable-prefix-caching \