File size: 351 Bytes
cfeaf3b 63c0baf 27ddfde 63c0baf |
1 2 3 4 5 6 7 8 9 10 11 12 |
VLLM_ALLOW_RUNTIME_LORA_UPDATING=1 VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve ~/models/gemma2-2b \
--gpu-memory-utilization=1 \
--port 6002 \
--served-model-name="gemma" \
--trust-remote-code \
--max-model-len 8192 \
--disable-log-requests \
--enable-lora \
--lora-modules gpqa=./ \
# --guided-decoding-backend lm-format-enforcer \
|