Huggingface code to inference this model

#1
by TahirC - opened

I am trying to run this model using huggingface but i am getting error , running on Kaggle 2xT4

#####################CODE

from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
from qwen_vl_utils import process_vision_info
import torch

model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
"unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", load_in_4bit = True,torch_dtype=torch.float16, device_map="auto"
)

processor = AutoProcessor.from_pretrained("unsloth/Qwen2-VL-7B-Instruct-bnb-4bit")

#################### ERROR

The load_in_4bit and load_in_8bit arguments are deprecated and will be removed in the future versions. Please, pass a BitsAndBytesConfig object in quantization_config argument instead.
You are using a model of type qwen2_vl to instantiate a model of type qwen2_5_vl. This is not supported for all configurations of models and can yield errors.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
/opt/conda/lib/python3.10/site-packages/transformers/quantizers/auto.py:195: UserWarning: You passed quantization_config or equivalent parameters to from_pretrained but the model you're loading already has a quantization_config attribute. The quantization_config from the model will be used.
warnings.warn(warning_msg)
[Open Browser Console for more detailed log - Double click to close this message]
Failed to load model class 'HBoxModel' from module '@jupyter-widgets/controls'
Error: Module @jupyter-widgets/controls, version ^1.5.0 is not registered, however, 2.0.0 is
at f.loadClass (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/446.fdf8b1b233cb8c1783f6.js?v=fdf8b1b233cb8c1783f6:1:75041)
at f.loadModelClass (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/327.68dbf8491690b3aff1e7.js?v=68dbf8491690b3aff1e7:1:10729)
at f._make_model (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/327.68dbf8491690b3aff1e7.js?v=68dbf8491690b3aff1e7:1:7517)
at f.new_model (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/327.68dbf8491690b3aff1e7.js?v=68dbf8491690b3aff1e7:1:5137)
at f.handle_comm_open (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/327.68dbf8491690b3aff1e7.js?v=68dbf8491690b3aff1e7:1:3894)
at _handleCommOpen (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/lab/extensions/@jupyter-widgets/jupyterlab-manager/static/446.fdf8b1b233cb8c1783f6.js?v=fdf8b1b233cb8c1783f6:1:73457)
at f._handleCommOpen (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/notebook/3676.bundle.js:1:30958)
at async f._handleMessage (https://kkb-production.jupyter-proxy.kaggle.net/static/dist/jupyterlab/v-45a2b7357cb926cda99c4f837edd0b05/notebook/3676.bundle.js:1:32852)

ValueError Traceback (most recent call last)
Cell In[1], line 11
4 import torch
6 # default: Load the model on the available device(s)
7 # model = Qwen2VLForConditionalGeneration.from_pretrained(
8 # "Qwen/Qwen2-VL-7B-Instruct", torch_dtype=torch.float16, device_map="auto"
9 # )
---> 11 model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
12 "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit", load_in_4bit = True,torch_dtype=torch.float16, device_map="auto"
13 )
15 # We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
16 # model = Qwen2VLForConditionalGeneration.from_pretrained(
17 # "Qwen/Qwen2-VL-7B-Instruct",
(...)
23 # default processer
24 # processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct",min_pixels = 2562828 ,max_pixels = 12802828)
26 processor = AutoProcessor.from_pretrained("unsloth/Qwen2-VL-7B-Instruct-bnb-4bit")

File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:4270, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)
4260 if dtype_orig is not None:
4261 torch.set_default_dtype(dtype_orig)
4263 (
4264 model,
4265 missing_keys,
4266 unexpected_keys,
4267 mismatched_keys,
4268 offload_index,
4269 error_msgs,
-> 4270 ) = cls._load_pretrained_model(
4271 model,
4272 state_dict,
4273 loaded_state_dict_keys, # XXX: rename?
4274 resolved_archive_file,
4275 pretrained_model_name_or_path,
4276 ignore_mismatched_sizes=ignore_mismatched_sizes,
4277 sharded_metadata=sharded_metadata,
4278 _fast_init=_fast_init,
4279 low_cpu_mem_usage=low_cpu_mem_usage,
4280 device_map=device_map,
4281 offload_folder=offload_folder,
4282 offload_state_dict=offload_state_dict,
4283 dtype=torch_dtype,
4284 hf_quantizer=hf_quantizer,
4285 keep_in_fp32_modules=keep_in_fp32_modules,
4286 gguf_path=gguf_path,
4287 weights_only=weights_only,
4288 )
4290 # make sure token embedding weights are still tied if needed
4291 model.tie_weights()

File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:4848, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, hf_quantizer, keep_in_fp32_modules, gguf_path, weights_only)
4846 else:
4847 fixed_state_dict = cls._fix_state_dict_keys_on_load(state_dict)
-> 4848 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
4849 model_to_load,
4850 fixed_state_dict,
4851 start_prefix,
4852 expected_keys,
4853 device_map=device_map,
4854 offload_folder=offload_folder,
4855 offload_index=offload_index,
4856 state_dict_folder=state_dict_folder,
4857 state_dict_index=state_dict_index,
4858 dtype=dtype,
4859 hf_quantizer=hf_quantizer,
4860 is_safetensors=is_safetensors,
4861 keep_in_fp32_modules=keep_in_fp32_modules,
4862 unexpected_keys=unexpected_keys,
4863 )
4864 error_msgs += new_error_msgs
4865 else:
4866 # Sharded checkpoint or whole but low_cpu_mem_usage==True

File /opt/conda/lib/python3.10/site-packages/transformers/modeling_utils.py:876, in _load_state_dict_into_meta_model(model, state_dict, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, hf_quantizer, is_safetensors, keep_in_fp32_modules, unexpected_keys, pretrained_model_name_or_path)
873 param_device = "cpu" if is_local_dist_rank_0() else "meta"
875 # For backward compatibility with older versions of accelerate and for non-quantized params
--> 876 set_module_tensor_to_device(model, param_name, param_device, **set_module_kwargs)
877 else:
878 hf_quantizer.create_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)

File /opt/conda/lib/python3.10/site-packages/accelerate/utils/modeling.py:373, in set_module_tensor_to_device(module, tensor_name, device, value, dtype, fp16_statistics, tied_params_map)
369 if value is not None:
370 # We can expect mismatches when using bnb 4bit since Params4bit will reshape and pack the weights.
371 # In other cases, we want to make sure we're not loading checkpoints that do not match the config.
372 if old_value.shape != value.shape and param_cls.name != "Params4bit":
--> 373 raise ValueError(
374 f'Trying to set a tensor of shape {value.shape} in "{tensor_name}" (which has shape {old_value.shape}), this looks incorrect.'
375 )
377 if dtype is None:
378 # For compatibility with PyTorch load_state_dict which converts state dict dtype to existing dtype in model
379 value = value.to(old_value.dtype)

ValueError: Trying to set a tensor of shape torch.Size([1280]) in "weight" (which has shape torch.Size([3584])), this looks incorrect.

also when running the qwen 2.5 vl instruct in the shared colab notebook i get error

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!

RuntimeError Traceback (most recent call last)
in <cell line: 0>()
20 ] # More models at https://huggingface.co/unsloth
21
---> 22 model, tokenizer = FastVisionModel.from_pretrained(
23 "unsloth/Qwen2.5-VL-7B-Instruct-unsloth-bnb-4bit",
24 load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.

/usr/local/lib/python3.11/dist-packages/unsloth/models/loader.py in from_pretrained(model_name, max_seq_length, dtype, load_in_4bit, token, device_map, rope_scaling, fix_tokenizer, trust_remote_code, use_gradient_checkpointing, resize_model_vocab, revision, return_logits, fullgraph, use_exact_model_name, *args, **kwargs)
446 f"to obtain the latest transformers build, then restart this session."
447 )
--> 448 raise RuntimeError(autoconfig_error or peft_error)
449 pass
450

RuntimeError: The checkpoint you are trying to load has model type qwen2_5_vl but Transformers does not recognize this architecture. This could be because of an issue with the checkpoint, or because your version of Transformers is out of date.

Sign up or log in to comment