gmastrapas
commited on
Merge branch 'fix-flash-attn-xformers-config'
Browse files- README.md +8 -0
- config.json +3 -2
README.md
CHANGED
@@ -389,6 +389,14 @@ _, _, text_embeddings, image_embeddings = output
|
|
389 |
|
390 |
</details>
|
391 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
|
393 |
## License
|
394 |
|
|
|
389 |
|
390 |
</details>
|
391 |
|
392 |
+
### On CUDA devices
|
393 |
+
|
394 |
+
On a CUDA enabled torch environment, the model comes in `torch.bfloat16`
|
395 |
+
precision by default. When running on CUDA, it is recommended to install
|
396 |
+
[FlashAttention](https://github.com/Dao-AILab/flash-attention?tab=readme-ov-file#installation-and-features)
|
397 |
+
and [xFormers](https://github.com/facebookresearch/xformers?tab=readme-ov-file#installing-xformers)
|
398 |
+
to make use of their efficient attention mechanism implementations.
|
399 |
+
|
400 |
|
401 |
## License
|
402 |
|
config.json
CHANGED
@@ -28,7 +28,7 @@
|
|
28 |
"task_instructions": {
|
29 |
"retrieval.query": "Represent the query for retrieving evidence documents: "
|
30 |
},
|
31 |
-
"use_flash_attn":
|
32 |
},
|
33 |
"hf_model_name_or_path": "jinaai/jina-embeddings-v3",
|
34 |
"model_type": "jina_clip_text",
|
@@ -36,6 +36,7 @@
|
|
36 |
"proj_bias": false,
|
37 |
"proj_type": null
|
38 |
},
|
|
|
39 |
"truncate_dim": null,
|
40 |
"use_text_flash_attn": null,
|
41 |
"use_vision_xformers": null,
|
@@ -59,6 +60,6 @@
|
|
59 |
"rope_embeddings": true,
|
60 |
"subln": true,
|
61 |
"width": 1024,
|
62 |
-
"x_attention":
|
63 |
}
|
64 |
}
|
|
|
28 |
"task_instructions": {
|
29 |
"retrieval.query": "Represent the query for retrieving evidence documents: "
|
30 |
},
|
31 |
+
"use_flash_attn": true
|
32 |
},
|
33 |
"hf_model_name_or_path": "jinaai/jina-embeddings-v3",
|
34 |
"model_type": "jina_clip_text",
|
|
|
36 |
"proj_bias": false,
|
37 |
"proj_type": null
|
38 |
},
|
39 |
+
"torch_dtype": "bfloat16",
|
40 |
"truncate_dim": null,
|
41 |
"use_text_flash_attn": null,
|
42 |
"use_vision_xformers": null,
|
|
|
60 |
"rope_embeddings": true,
|
61 |
"subln": true,
|
62 |
"width": 1024,
|
63 |
+
"x_attention": true
|
64 |
}
|
65 |
}
|