VLLM_ALLOW_RUNTIME_LORA_UPDATING=1 VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve ~/models/gemma2-2b \ --gpu-memory-utilization=1 \ --port 6002 \ --served-model-name="gemma" \ --trust-remote-code \ --max-model-len 8192 \ --disable-log-requests \ --enable-lora \ --lora-modules gpqa=./ \ # --guided-decoding-backend lm-format-enforcer \