File size: 351 Bytes

cfeaf3b
 
 
 
 
 
 
 
63c0baf
27ddfde
63c0baf

VLLM_ALLOW_RUNTIME_LORA_UPDATING=1 VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve ~/models/gemma2-2b \
  --gpu-memory-utilization=1 \
  --port 6002 \
  --served-model-name="gemma" \
  --trust-remote-code \
  --max-model-len 8192 \
  --disable-log-requests \
  --enable-lora \
  --lora-modules gpqa=./ \

  # --guided-decoding-backend lm-format-enforcer \