running this model in inf2 8x throw assertion error

#164
by Arun63 - opened

vllm serve mistralai/Mistral-7B-Instruct-v0.2 --device neuron --chat-template ./tool_chat_template_mistral.jinja --tokenizer-mode mistral --load-format mistral --config-format mistral --tensor-parallel-size 2 --max-model-len 16384 --max-num-seqs 8

INFO 01-27 14:25:19 init.py:179] Automatically detected platform neuron.
INFO 01-27 14:25:19 api_server.py:195] Started engine process with PID 4216
Traceback (most recent call last):
File "/opt/conda/bin/vllm", line 8, in
sys.exit(main())
File "/workspace/vllm/vllm/scripts.py", line 201, in main
args.dispatch_function(args)
File "/workspace/vllm/vllm/scripts.py", line 42, in serve
uvloop.run(run_server(args))
File "/opt/conda/lib/python3.10/site-packages/uvloop/init.py", line 82, in run
return loop.run_until_complete(wrapper())
File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
File "/opt/conda/lib/python3.10/site-packages/uvloop/init.py", line 61, in wrapper
return await main
File "/workspace/vllm/vllm/entrypoints/openai/api_server.py", line 793, in run_server
async with build_async_engine_client(args) as engine_client:
File "/opt/conda/lib/python3.10/contextlib.py", line 199, in aenter
return await anext(self.gen)
File "/workspace/vllm/vllm/entrypoints/openai/api_server.py", line 125, in build_async_engine_client
async with build_async_engine_client_from_engine_args(
File "/opt/conda/lib/python3.10/contextlib.py", line 199, in aenter
return await anext(self.gen)
File "/workspace/vllm/vllm/entrypoints/openai/api_server.py", line 206, in build_async_engine_client_from_engine_args
engine_config = engine_args.create_engine_config()
File "/workspace/vllm/vllm/engine/arg_utils.py", line 1043, in create_engine_config
model_config = self.create_model_config()
File "/workspace/vllm/vllm/engine/arg_utils.py", line 969, in create_model_config
return ModelConfig(
File "/workspace/vllm/vllm/config.py", line 283, in init
hf_config = get_config(self.model, trust_remote_code, revision,
File "/workspace/vllm/vllm/transformers_utils/config.py", line 243, in get_config
config = load_params_config(model, revision, token=HF_TOKEN, **kwargs)
File "/workspace/vllm/vllm/transformers_utils/config.py", line 483, in load_params_config
assert isinstance(config_dict, dict)
AssertionError
INFO 01-27 14:25:25 init.py:179] Automatically detected platform neuron.
ERROR 01-27 14:25:25 engine.py:366]
Traceback (most recent call last):
File "/workspace/vllm/vllm/engine/multiprocessing/engine.py", line 357, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
File "/workspace/vllm/vllm/engine/multiprocessing/engine.py", line 114, in from_engine_args
engine_config = engine_args.create_engine_config(usage_context)
File "/workspace/vllm/vllm/engine/arg_utils.py", line 1043, in create_engine_config
model_config = self.create_model_config()
File "/workspace/vllm/vllm/engine/arg_utils.py", line 969, in create_model_config
return ModelConfig(
File "/workspace/vllm/vllm/config.py", line 283, in init
hf_config = get_config(self.model, trust_remote_code, revision,
File "/workspace/vllm/vllm/transformers_utils/config.py", line 243, in get_config
config = load_params_config(model, revision, token=HF_TOKEN, **kwargs)
File "/workspace/vllm/vllm/transformers_utils/config.py", line 483, in load_params_config
assert isinstance(config_dict, dict)
AssertionError
Process SpawnProcess-1:
Traceback (most recent call last):
File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
self.run()
File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/workspace/vllm/vllm/engine/multiprocessing/engine.py", line 368, in run_mp_engine
raise e
File "/workspace/vllm/vllm/engine/multiprocessing/engine.py", line 357, in run_mp_engine
engine = MQLLMEngine.from_engine_args(engine_args=engine_args,
File "/workspace/vllm/vllm/engine/multiprocessing/engine.py", line 114, in from_engine_args
engine_config = engine_args.create_engine_config(usage_context)
File "/workspace/vllm/vllm/engine/arg_utils.py", line 1043, in create_engine_config
model_config = self.create_model_config()
File "/workspace/vllm/vllm/engine/arg_utils.py", line 969, in create_model_config
return ModelConfig(
File "/workspace/vllm/vllm/config.py", line 283, in init
hf_config = get_config(self.model, trust_remote_code, revision,
File "/workspace/vllm/vllm/transformers_utils/config.py", line 243, in get_config
config = load_params_config(model, revision, token=HF_TOKEN, **kwargs)
File "/workspace/vllm/vllm/transformers_utils/config.py", line 483, in load_params_config
assert isinstance(config_dict, dict)
AssertionError

Sign up or log in to comment