Upload folder using huggingface_hub

80e252f verified 17 days ago

343 Bytes

	VLLM_ALLOW_RUNTIME_LORA_UPDATING=1 VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve ~/models/gemma2-2b \
	--gpu-memory-utilization=1 \
	--port 6002 \
	--served-model-name="gemma" \
	--trust-remote-code \
	--max-model-len 8192 \
	--disable-log-requests \
	--enable-lora \
	--lora-modules lora=./output/unsloth/lora_model --max-lora-rank 64