anakin87 commited on
Commit
7613560
·
1 Parent(s): 1219a4a

try flash attention

Browse files
Files changed (1) hide show
  1. app.py +1 -0
app.py CHANGED
@@ -36,6 +36,7 @@ model = AutoModelForCausalLM.from_pretrained(
36
  model_id,
37
  device_map="auto",
38
  torch_dtype=torch.bfloat16,
 
39
  trust_remote_code=True,
40
  )
41
  model.config.sliding_window = 4096
 
36
  model_id,
37
  device_map="auto",
38
  torch_dtype=torch.bfloat16,
39
+ attn_implementation="flash_attention_2",
40
  trust_remote_code=True,
41
  )
42
  model.config.sliding_window = 4096