Spaces:
Sleeping
Sleeping
Flash attention test
Browse files
app.py
CHANGED
@@ -21,8 +21,9 @@ app.add_middleware(
|
|
21 |
model_id = "vikhyatk/moondream2"
|
22 |
revision = "2024-08-26"
|
23 |
model = AutoModelForCausalLM.from_pretrained(
|
24 |
-
model_id, trust_remote_code=True, revision=revision
|
25 |
-
|
|
|
26 |
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
|
27 |
|
28 |
@app.get("/")
|
|
|
21 |
model_id = "vikhyatk/moondream2"
|
22 |
revision = "2024-08-26"
|
23 |
model = AutoModelForCausalLM.from_pretrained(
|
24 |
+
model_id, trust_remote_code=True, revision=revision,
|
25 |
+
torch_dtype=torch.float16, attn_implementation="flash_attention_2"
|
26 |
+
).to("cuda")
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
|
28 |
|
29 |
@app.get("/")
|