drbh
commited on
Commit
·
8acf152
1
Parent(s):
1f83cde
feat: add inital build output
Browse files- .gitattributes +1 -0
- build/torch25-cxx11-cu118-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch25-cxx11-cu118-x86_64-linux/flash_mla/_flash_mla_y6bdeh54o26h6.abi3.so +3 -0
- build/torch25-cxx11-cu118-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch25-cxx11-cu121-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch25-cxx11-cu121-x86_64-linux/flash_mla/_flash_mla_mytbuokq46mgm.abi3.so +3 -0
- build/torch25-cxx11-cu121-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch25-cxx11-cu124-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch25-cxx11-cu124-x86_64-linux/flash_mla/_flash_mla_iohry4qbuggqa.abi3.so +3 -0
- build/torch25-cxx11-cu124-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch25-cxx98-cu118-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch25-cxx98-cu118-x86_64-linux/flash_mla/_flash_mla_ks7izefym4ha2.abi3.so +3 -0
- build/torch25-cxx98-cu118-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch25-cxx98-cu121-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch25-cxx98-cu121-x86_64-linux/flash_mla/_flash_mla_cd6mvrbov7aye.abi3.so +3 -0
- build/torch25-cxx98-cu121-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch25-cxx98-cu124-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch25-cxx98-cu124-x86_64-linux/flash_mla/_flash_mla_muvqsop7ydtdg.abi3.so +3 -0
- build/torch25-cxx98-cu124-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch26-cxx11-cu118-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch26-cxx11-cu118-x86_64-linux/flash_mla/_flash_mla_4gt6haj2eaeve.abi3.so +3 -0
- build/torch26-cxx11-cu118-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch26-cxx11-cu124-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch26-cxx11-cu124-x86_64-linux/flash_mla/_flash_mla_fqlv2wdc4vwcs.abi3.so +3 -0
- build/torch26-cxx11-cu124-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch26-cxx11-cu126-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch26-cxx11-cu126-x86_64-linux/flash_mla/_flash_mla_b4xwzd3vzzbdy.abi3.so +3 -0
- build/torch26-cxx11-cu126-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch26-cxx98-cu118-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch26-cxx98-cu118-x86_64-linux/flash_mla/_flash_mla_iyfleyrgf34la.abi3.so +3 -0
- build/torch26-cxx98-cu118-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch26-cxx98-cu124-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch26-cxx98-cu124-x86_64-linux/flash_mla/_flash_mla_fywd4tw2yn6ew.abi3.so +3 -0
- build/torch26-cxx98-cu124-x86_64-linux/flash_mla/_ops.py +9 -0
- build/torch26-cxx98-cu126-x86_64-linux/flash_mla/__init__.py +36 -0
- build/torch26-cxx98-cu126-x86_64-linux/flash_mla/_flash_mla_duoktlzcb6q5w.abi3.so +3 -0
- build/torch26-cxx98-cu126-x86_64-linux/flash_mla/_ops.py +9 -0
.gitattributes
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.so filter=lfs diff=lfs merge=lfs -text
|
build/torch25-cxx11-cu118-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch25-cxx11-cu118-x86_64-linux/flash_mla/_flash_mla_y6bdeh54o26h6.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c90743918a3a5a7e3f21c3de54c524af2d04d5bf112b9b638ac8b05c5009538
|
3 |
+
size 2558728
|
build/torch25-cxx11-cu118-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_y6bdeh54o26h6
|
3 |
+
ops = torch.ops._flash_mla_y6bdeh54o26h6
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_y6bdeh54o26h6::{op_name}"
|
build/torch25-cxx11-cu121-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch25-cxx11-cu121-x86_64-linux/flash_mla/_flash_mla_mytbuokq46mgm.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d20c322601136e544cd1995cee8b21098f6cbc4279b7ebd8e007c59150d5df9
|
3 |
+
size 2593856
|
build/torch25-cxx11-cu121-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_mytbuokq46mgm
|
3 |
+
ops = torch.ops._flash_mla_mytbuokq46mgm
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_mytbuokq46mgm::{op_name}"
|
build/torch25-cxx11-cu124-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch25-cxx11-cu124-x86_64-linux/flash_mla/_flash_mla_iohry4qbuggqa.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccb2bc37dac026a6d0aff1fa3069d07fb3e90d3a0d88bb985508549918e1b454
|
3 |
+
size 2574832
|
build/torch25-cxx11-cu124-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_iohry4qbuggqa
|
3 |
+
ops = torch.ops._flash_mla_iohry4qbuggqa
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_iohry4qbuggqa::{op_name}"
|
build/torch25-cxx98-cu118-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch25-cxx98-cu118-x86_64-linux/flash_mla/_flash_mla_ks7izefym4ha2.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:770a95aee0d760d66c11cee67284473199f49ed0a37264c9e98911e520653e5d
|
3 |
+
size 2551384
|
build/torch25-cxx98-cu118-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_ks7izefym4ha2
|
3 |
+
ops = torch.ops._flash_mla_ks7izefym4ha2
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_ks7izefym4ha2::{op_name}"
|
build/torch25-cxx98-cu121-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch25-cxx98-cu121-x86_64-linux/flash_mla/_flash_mla_cd6mvrbov7aye.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dc0f7b58d315d7f713b383732f4866df52131833afeb02fb8c8c0290b867ec9
|
3 |
+
size 2590688
|
build/torch25-cxx98-cu121-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_cd6mvrbov7aye
|
3 |
+
ops = torch.ops._flash_mla_cd6mvrbov7aye
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_cd6mvrbov7aye::{op_name}"
|
build/torch25-cxx98-cu124-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch25-cxx98-cu124-x86_64-linux/flash_mla/_flash_mla_muvqsop7ydtdg.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10bbc09993e05062a2e257ea388e3aeaee1c1c54960ac352857bb921973f5dc8
|
3 |
+
size 2571656
|
build/torch25-cxx98-cu124-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_muvqsop7ydtdg
|
3 |
+
ops = torch.ops._flash_mla_muvqsop7ydtdg
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_muvqsop7ydtdg::{op_name}"
|
build/torch26-cxx11-cu118-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch26-cxx11-cu118-x86_64-linux/flash_mla/_flash_mla_4gt6haj2eaeve.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7b1010f156fb14c68680155d12b6415ebaf818eb8f8a162b8b1cd0f89f085a4
|
3 |
+
size 2563176
|
build/torch26-cxx11-cu118-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_4gt6haj2eaeve
|
3 |
+
ops = torch.ops._flash_mla_4gt6haj2eaeve
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_4gt6haj2eaeve::{op_name}"
|
build/torch26-cxx11-cu124-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch26-cxx11-cu124-x86_64-linux/flash_mla/_flash_mla_fqlv2wdc4vwcs.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13d1bfb8d6fa769168838204b8bcc62bc4c48ed81275e8a75d18f467dab352a0
|
3 |
+
size 2575176
|
build/torch26-cxx11-cu124-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_fqlv2wdc4vwcs
|
3 |
+
ops = torch.ops._flash_mla_fqlv2wdc4vwcs
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_fqlv2wdc4vwcs::{op_name}"
|
build/torch26-cxx11-cu126-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch26-cxx11-cu126-x86_64-linux/flash_mla/_flash_mla_b4xwzd3vzzbdy.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c6cfeb75b5f9c8b88624f573fe80afc3f4d9f08828630c64c84e8100afe1101
|
3 |
+
size 2555136
|
build/torch26-cxx11-cu126-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_b4xwzd3vzzbdy
|
3 |
+
ops = torch.ops._flash_mla_b4xwzd3vzzbdy
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_b4xwzd3vzzbdy::{op_name}"
|
build/torch26-cxx98-cu118-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch26-cxx98-cu118-x86_64-linux/flash_mla/_flash_mla_iyfleyrgf34la.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25a5dc0c023db2b535161f494470ebbf0cc0c261028906259db65cfa396fd6a5
|
3 |
+
size 2551728
|
build/torch26-cxx98-cu118-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_iyfleyrgf34la
|
3 |
+
ops = torch.ops._flash_mla_iyfleyrgf34la
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_iyfleyrgf34la::{op_name}"
|
build/torch26-cxx98-cu124-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch26-cxx98-cu124-x86_64-linux/flash_mla/_flash_mla_fywd4tw2yn6ew.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59ea97594e0550f64aa3b5ad0358e68306f284344acef250e0293ccf50e87273
|
3 |
+
size 2572008
|
build/torch26-cxx98-cu124-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_fywd4tw2yn6ew
|
3 |
+
ops = torch.ops._flash_mla_fywd4tw2yn6ew
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_fywd4tw2yn6ew::{op_name}"
|
build/torch26-cxx98-cu126-x86_64-linux/flash_mla/__init__.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ._ops import ops
|
4 |
+
|
5 |
+
|
6 |
+
def get_mla_metadata(seqlens_k: torch.Tensor, s_q: int, h_kv: int):
|
7 |
+
return ops.get_mla_metadata(seqlens_k, s_q, h_kv)
|
8 |
+
|
9 |
+
|
10 |
+
def mha_fwd_kvcache_mla(
|
11 |
+
q: torch.Tensor,
|
12 |
+
kcache: torch.Tensor,
|
13 |
+
vcache_: torch.Tensor,
|
14 |
+
head_size_v: int,
|
15 |
+
seqlens_k: torch.Tensor,
|
16 |
+
block_table: torch.Tensor,
|
17 |
+
softmax_scale: float,
|
18 |
+
is_causal_: bool,
|
19 |
+
tile_scheduler_metadata: torch.Tensor,
|
20 |
+
num_splits: torch.Tensor,
|
21 |
+
) -> torch.Tensor:
|
22 |
+
# TODO: remove when resolved
|
23 |
+
unknown_param = 0
|
24 |
+
return ops.mha_fwd_kvcache_mla(
|
25 |
+
q,
|
26 |
+
kcache,
|
27 |
+
vcache_,
|
28 |
+
head_size_v,
|
29 |
+
seqlens_k,
|
30 |
+
block_table,
|
31 |
+
softmax_scale,
|
32 |
+
is_causal_,
|
33 |
+
tile_scheduler_metadata,
|
34 |
+
num_splits,
|
35 |
+
unknown_param,
|
36 |
+
)
|
build/torch26-cxx98-cu126-x86_64-linux/flash_mla/_flash_mla_duoktlzcb6q5w.abi3.so
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e85fa15e0103badddb1cf6f894e06cf42ce1300431066af499feae6e5df4c66b
|
3 |
+
size 2551968
|
build/torch26-cxx98-cu126-x86_64-linux/flash_mla/_ops.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from . import _flash_mla_duoktlzcb6q5w
|
3 |
+
ops = torch.ops._flash_mla_duoktlzcb6q5w
|
4 |
+
|
5 |
+
def add_op_namespace_prefix(op_name: str):
|
6 |
+
"""
|
7 |
+
Prefix op by namespace.
|
8 |
+
"""
|
9 |
+
return f"_flash_mla_duoktlzcb6q5w::{op_name}"
|