Update llama_xformers_attention.py
Browse files
llama_xformers_attention.py
CHANGED
@@ -5,6 +5,7 @@ from typing import Optional, Tuple
|
|
5 |
|
6 |
from transformers.models.llama.modeling_llama import LlamaAttention, apply_rotary_pos_emb, repeat_kv
|
7 |
|
|
|
8 |
from xformers.ops.fmha import (
|
9 |
memory_efficient_attention,
|
10 |
)
|
|
|
5 |
|
6 |
from transformers.models.llama.modeling_llama import LlamaAttention, apply_rotary_pos_emb, repeat_kv
|
7 |
|
8 |
+
import xformers
|
9 |
from xformers.ops.fmha import (
|
10 |
memory_efficient_attention,
|
11 |
)
|