|
"""Test command line interface for model inference.""" |
|
import argparse |
|
import os |
|
|
|
from fastchat.utils import run_cmd |
|
|
|
|
|
def test_single_gpu(): |
|
models = [ |
|
"lmsys/vicuna-7b-v1.5", |
|
"lmsys/longchat-7b-16k", |
|
"lmsys/fastchat-t5-3b-v1.0", |
|
"meta-llama/Llama-2-7b-chat-hf", |
|
"THUDM/chatglm-6b", |
|
"THUDM/chatglm2-6b", |
|
"mosaicml/mpt-7b-chat", |
|
"tiiuae/falcon-7b-instruct", |
|
"~/model_weights/alpaca-7b", |
|
"~/model_weights/RWKV-4-Raven-7B-v11x-Eng99%-Other1%-20230429-ctx8192.pth", |
|
] |
|
|
|
for model_path in models: |
|
if "model_weights" in model_path and not os.path.exists( |
|
os.path.expanduser(model_path) |
|
): |
|
continue |
|
cmd = ( |
|
f"python3 -m fastchat.serve.cli --model-path {model_path} " |
|
f"--style programmatic < test_cli_inputs.txt" |
|
) |
|
ret = run_cmd(cmd) |
|
if ret != 0: |
|
return |
|
|
|
print("") |
|
|
|
|
|
def test_multi_gpu(): |
|
models = [ |
|
"lmsys/vicuna-13b-v1.3", |
|
] |
|
|
|
for model_path in models: |
|
cmd = ( |
|
f"python3 -m fastchat.serve.cli --model-path {model_path} " |
|
f"--style programmatic --num-gpus 2 --max-gpu-memory 14Gib < test_cli_inputs.txt" |
|
) |
|
ret = run_cmd(cmd) |
|
if ret != 0: |
|
return |
|
print("") |
|
|
|
|
|
def test_8bit(): |
|
models = [ |
|
"lmsys/vicuna-13b-v1.3", |
|
] |
|
|
|
for model_path in models: |
|
cmd = ( |
|
f"python3 -m fastchat.serve.cli --model-path {model_path} " |
|
f"--style programmatic --load-8bit < test_cli_inputs.txt" |
|
) |
|
ret = run_cmd(cmd) |
|
if ret != 0: |
|
return |
|
print("") |
|
|
|
|
|
def test_hf_api(): |
|
models = [ |
|
"lmsys/vicuna-7b-v1.5", |
|
"lmsys/fastchat-t5-3b-v1.0", |
|
] |
|
|
|
for model_path in models: |
|
cmd = f"python3 -m fastchat.serve.huggingface_api --model-path {model_path}" |
|
ret = run_cmd(cmd) |
|
if ret != 0: |
|
return |
|
print("") |
|
|
|
|
|
if __name__ == "__main__": |
|
test_single_gpu() |
|
test_multi_gpu() |
|
test_8bit() |
|
test_hf_api() |
|
|