lord-reso commited on
Commit
9ad7760
·
1 Parent(s): b5c39fd

Flash attention test

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -21,8 +21,9 @@ app.add_middleware(
21
  model_id = "vikhyatk/moondream2"
22
  revision = "2024-08-26"
23
  model = AutoModelForCausalLM.from_pretrained(
24
- model_id, trust_remote_code=True, revision=revision
25
- )
 
26
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
27
 
28
  @app.get("/")
 
21
  model_id = "vikhyatk/moondream2"
22
  revision = "2024-08-26"
23
  model = AutoModelForCausalLM.from_pretrained(
24
+ model_id, trust_remote_code=True, revision=revision,
25
+ torch_dtype=torch.float16, attn_implementation="flash_attention_2"
26
+ ).to("cuda")
27
  tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
28
 
29
  @app.get("/")