Keep getting AssertionError: Flash Attention is not available when load the model
#7
by
Complete-your-profile
- opened
I followed the instruction in model card and confirmed I'm using transformer 4.40.2.
Any ideas?
File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:558, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
556 else:
557 cls.register(config.__class__, model_class, exist_ok=True)
--> 558 return model_class.from_pretrained(
559 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
560 )
561 elif type(config) in cls._model_mapping.keys():
562 model_class = _get_model_class(config, cls._model_mapping)
File ~/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/transformers/modeling_utils.py:3550, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
3544 config = cls._autoset_attn_implementation(
3545 config, use_flash_attention_2=use_flash_attention_2, torch_dtype=torch_dtype, device_map=device_map
3546 )
3548 with ContextManagers(init_contexts):
3549 # Let's make sure we don't run the init function of buffer modules
-> 3550 model = cls(config, *model_args, **model_kwargs)
3552 # make sure we use the model's config since the __init__ call might have copied it
3553 config = model.config
File ~/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3-small-8k-instruct/1adb635233ffce9e13385862a4111606d4382762/modeling_phi3_small.py:903, in Phi3SmallForCausalLM.__init__(self, config)
901 def __init__(self, config):
902 super().__init__(config)
--> 903 self.model = Phi3SmallModel(config)
904 self.vocab_size = config.vocab_size
905 self.lm_head = nn.Linear(config.hidden_size, self.vocab_size, bias=False)
File ~/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3-small-8k-instruct/1adb635233ffce9e13385862a4111606d4382762/modeling_phi3_small.py:745, in Phi3SmallModel.__init__(self, config)
742 # MuP Embedding scaling
743 self.mup_embedding_multiplier = config.mup_embedding_multiplier
--> 745 self.layers = nn.ModuleList([Phi3SmallDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)])
747 self.final_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_epsilon)
749 self.gradient_checkpointing = False
File ~/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3-small-8k-instruct/1adb635233ffce9e13385862a4111606d4382762/modeling_phi3_small.py:745, in <listcomp>(.0)
742 # MuP Embedding scaling
743 self.mup_embedding_multiplier = config.mup_embedding_multiplier
--> 745 self.layers = nn.ModuleList([Phi3SmallDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)])
747 self.final_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_epsilon)
749 self.gradient_checkpointing = False
File ~/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3-small-8k-instruct/1adb635233ffce9e13385862a4111606d4382762/modeling_phi3_small.py:651, in Phi3SmallDecoderLayer.__init__(self, config, layer_idx)
649 super().__init__()
650 self.hidden_size = config.hidden_size
--> 651 self.self_attn = Phi3SmallSelfAttention(config, layer_idx)
652 self.mlp = Phi3SmallMLP(config)
654 self.input_layernorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_epsilon)
File ~/.cache/huggingface/modules/transformers_modules/microsoft/Phi-3-small-8k-instruct/1adb635233ffce9e13385862a4111606d4382762/modeling_phi3_small.py:218, in Phi3SmallSelfAttention.__init__(self, config, layer_idx)
213 if self.config.dense_attention_every_n_layers and ((self.layer_idx + 1) % self.config.dense_attention_every_n_layers == 0):
214 logger.info(
215 f"Layer {layer_idx + 1} is using dense attention since it is divisible by "
216 f"{self.config.dense_attention_every_n_layers}"
217 )
--> 218 assert is_flash_attention_available, "Flash Attention is not available, but is needed for dense attention"
219 else:
220 # BlockSparse related Parameters
221 self.blocksparse_params = BlockSparseParams.from_config(config)
AssertionError: Flash Attention is not available, but is needed for dense attention
bapatra
changed discussion status to
closed