NEOX / configs /sparse.yml
akswelh's picture
Upload 251 files
d90b3a8 verified
# Add this to your config for sparse attention every other layer
{
"attention_config": [[["local", "global"], "all"]],
# sparsity config:
# (these are the defaults for local sliding window sparsity, training will work without this here, but it's left in for
# illustrative purposes)
# see https://www.deepspeed.ai/tutorials/sparse-attention/#how-to-config-sparsity-structures for
# more detailed config instructions and available parameters
"sparsity_config": {
"block": 16, # block size
"num_local_blocks": 32,
}
}