fullstack commited on
Commit
80e252f
1 Parent(s): 5a0a55a

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. run_gemma2-2b.sh +9 -0
run_gemma2-2b.sh ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ VLLM_ALLOW_RUNTIME_LORA_UPDATING=1 VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 vllm serve ~/models/gemma2-2b \
2
+ --gpu-memory-utilization=1 \
3
+ --port 6002 \
4
+ --served-model-name="gemma" \
5
+ --trust-remote-code \
6
+ --max-model-len 8192 \
7
+ --disable-log-requests \
8
+ --enable-lora \
9
+ --lora-modules lora=./output/unsloth/lora_model --max-lora-rank 64