[general] name = "flash_mla" [torch] src = ["torch-ext/torch_binding.cpp", "torch-ext/torch_binding.h"] [kernel.activation] cuda-capabilities = [ # "7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", # Only available on H100 and H200 "9.0", # (Hopper) ] src = [ "flash_mla/flash_mla_api.cu", "flash_mla/flash_fwd_mla_bf16_sm90.cu", "flash_mla/flash_fwd_mla_fp16_sm90.cu", "flash_mla/flash_fwd_mla_kernel.h", "flash_mla/flash_fwd_mla_metadata.cu", "flash_mla/flash_mla.h", "flash_mla/named_barrier.h", "flash_mla/softmax.h", "flash_mla/static_switch.h", "flash_mla/utils.h", ] depends = ["torch", "cutlass_3_6"]