nada5 tomaarsen HF staff commited on
Commit
2c0dadb
·
verified ·
0 Parent(s):

Super-squash branch 'main' using huggingface_hub

Browse files

Co-authored-by: tomaarsen <[email protected]>

.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 4096,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": false
10
+ }
README.md ADDED
@@ -0,0 +1,2186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - mteb
4
+ model-index:
5
+ - name: NV-Embed-v2
6
+ results:
7
+ - dataset:
8
+ config: en
9
+ name: MTEB AmazonCounterfactualClassification (en)
10
+ revision: e8379541af4e31359cca9fbcf4b00f2671dba205
11
+ split: test
12
+ type: mteb/amazon_counterfactual
13
+ metrics:
14
+ - type: accuracy
15
+ value: 94.28358208955224
16
+ - type: accuracy_stderr
17
+ value: 0.40076780842082305
18
+ - type: ap
19
+ value: 76.49097318319616
20
+ - type: ap_stderr
21
+ value: 1.2418692675183929
22
+ - type: f1
23
+ value: 91.41982003001168
24
+ - type: f1_stderr
25
+ value: 0.5043921413093579
26
+ - type: main_score
27
+ value: 94.28358208955224
28
+ task:
29
+ type: Classification
30
+ - dataset:
31
+ config: default
32
+ name: MTEB AmazonPolarityClassification
33
+ revision: e2d317d38cd51312af73b3d32a06d1a08b442046
34
+ split: test
35
+ type: mteb/amazon_polarity
36
+ metrics:
37
+ - type: accuracy
38
+ value: 97.74185000000001
39
+ - type: accuracy_stderr
40
+ value: 0.07420471683120942
41
+ - type: ap
42
+ value: 96.4737144875525
43
+ - type: ap_stderr
44
+ value: 0.2977518241541558
45
+ - type: f1
46
+ value: 97.7417581594921
47
+ - type: f1_stderr
48
+ value: 0.07428763617010377
49
+ - type: main_score
50
+ value: 97.74185000000001
51
+ task:
52
+ type: Classification
53
+ - dataset:
54
+ config: en
55
+ name: MTEB AmazonReviewsClassification (en)
56
+ revision: 1399c76144fd37290681b995c656ef9b2e06e26d
57
+ split: test
58
+ type: mteb/amazon_reviews_multi
59
+ metrics:
60
+ - type: accuracy
61
+ value: 63.96000000000001
62
+ - type: accuracy_stderr
63
+ value: 1.815555011559825
64
+ - type: f1
65
+ value: 62.49361841640459
66
+ - type: f1_stderr
67
+ value: 2.829339314126457
68
+ - type: main_score
69
+ value: 63.96000000000001
70
+ task:
71
+ type: Classification
72
+ - dataset:
73
+ config: default
74
+ name: MTEB ArguAna
75
+ revision: c22ab2a51041ffd869aaddef7af8d8215647e41a
76
+ split: test
77
+ type: mteb/arguana
78
+ metrics:
79
+ - type: map_at_1
80
+ value: 46.515
81
+ - type: map_at_10
82
+ value: 62.392
83
+ - type: map_at_100
84
+ value: 62.732
85
+ - type: map_at_1000
86
+ value: 62.733000000000004
87
+ - type: map_at_3
88
+ value: 58.701
89
+ - type: map_at_5
90
+ value: 61.027
91
+ - type: mrr_at_1
92
+ value: 0.0
93
+ - type: mrr_at_10
94
+ value: 0.0
95
+ - type: mrr_at_100
96
+ value: 0.0
97
+ - type: mrr_at_1000
98
+ value: 0.0
99
+ - type: mrr_at_3
100
+ value: 0.0
101
+ - type: mrr_at_5
102
+ value: 0.0
103
+ - type: ndcg_at_1
104
+ value: 46.515
105
+ - type: ndcg_at_10
106
+ value: 70.074
107
+ - type: ndcg_at_100
108
+ value: 71.395
109
+ - type: ndcg_at_1000
110
+ value: 71.405
111
+ - type: ndcg_at_3
112
+ value: 62.643
113
+ - type: ndcg_at_5
114
+ value: 66.803
115
+ - type: precision_at_1
116
+ value: 46.515
117
+ - type: precision_at_10
118
+ value: 9.41
119
+ - type: precision_at_100
120
+ value: 0.996
121
+ - type: precision_at_1000
122
+ value: 0.1
123
+ - type: precision_at_3
124
+ value: 24.68
125
+ - type: precision_at_5
126
+ value: 16.814
127
+ - type: recall_at_1
128
+ value: 46.515
129
+ - type: recall_at_10
130
+ value: 94.097
131
+ - type: recall_at_100
132
+ value: 99.57300000000001
133
+ - type: recall_at_1000
134
+ value: 99.644
135
+ - type: recall_at_3
136
+ value: 74.03999999999999
137
+ - type: recall_at_5
138
+ value: 84.068
139
+ - type: main_score
140
+ value: 70.074
141
+ task:
142
+ type: Retrieval
143
+ - dataset:
144
+ config: default
145
+ name: MTEB ArxivClusteringP2P
146
+ revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
147
+ split: test
148
+ type: mteb/arxiv-clustering-p2p
149
+ metrics:
150
+ - type: main_score
151
+ value: 55.79933795955242
152
+ - type: v_measure
153
+ value: 55.79933795955242
154
+ - type: v_measure_std
155
+ value: 14.575108141916148
156
+ task:
157
+ type: Clustering
158
+ - dataset:
159
+ config: default
160
+ name: MTEB ArxivClusteringS2S
161
+ revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
162
+ split: test
163
+ type: mteb/arxiv-clustering-s2s
164
+ metrics:
165
+ - type: main_score
166
+ value: 51.262845995850334
167
+ - type: v_measure
168
+ value: 51.262845995850334
169
+ - type: v_measure_std
170
+ value: 14.727824473104173
171
+ task:
172
+ type: Clustering
173
+ - dataset:
174
+ config: default
175
+ name: MTEB AskUbuntuDupQuestions
176
+ revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
177
+ split: test
178
+ type: mteb/askubuntudupquestions-reranking
179
+ metrics:
180
+ - type: map
181
+ value: 67.46477327480808
182
+ - type: mrr
183
+ value: 79.50160488941653
184
+ - type: main_score
185
+ value: 67.46477327480808
186
+ task:
187
+ type: Reranking
188
+ - dataset:
189
+ config: default
190
+ name: MTEB BIOSSES
191
+ revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
192
+ split: test
193
+ type: mteb/biosses-sts
194
+ metrics:
195
+ - type: cosine_pearson
196
+ value: 89.74311007980987
197
+ - type: cosine_spearman
198
+ value: 87.41644967443246
199
+ - type: manhattan_pearson
200
+ value: 88.57457108347744
201
+ - type: manhattan_spearman
202
+ value: 87.59295972042997
203
+ - type: euclidean_pearson
204
+ value: 88.27108977118459
205
+ - type: euclidean_spearman
206
+ value: 87.41644967443246
207
+ - type: main_score
208
+ value: 87.41644967443246
209
+ task:
210
+ type: STS
211
+ - dataset:
212
+ config: default
213
+ name: MTEB Banking77Classification
214
+ revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
215
+ split: test
216
+ type: mteb/banking77
217
+ metrics:
218
+ - type: accuracy
219
+ value: 92.41558441558443
220
+ - type: accuracy_stderr
221
+ value: 0.37701502251934443
222
+ - type: f1
223
+ value: 92.38130170447671
224
+ - type: f1_stderr
225
+ value: 0.39115151225617767
226
+ - type: main_score
227
+ value: 92.41558441558443
228
+ task:
229
+ type: Classification
230
+ - dataset:
231
+ config: default
232
+ name: MTEB BiorxivClusteringP2P
233
+ revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
234
+ split: test
235
+ type: mteb/biorxiv-clustering-p2p
236
+ metrics:
237
+ - type: main_score
238
+ value: 54.08649516394218
239
+ - type: v_measure
240
+ value: 54.08649516394218
241
+ - type: v_measure_std
242
+ value: 0.5303233693045373
243
+ task:
244
+ type: Clustering
245
+ - dataset:
246
+ config: default
247
+ name: MTEB BiorxivClusteringS2S
248
+ revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
249
+ split: test
250
+ type: mteb/biorxiv-clustering-s2s
251
+ metrics:
252
+ - type: main_score
253
+ value: 49.60352214167779
254
+ - type: v_measure
255
+ value: 49.60352214167779
256
+ - type: v_measure_std
257
+ value: 0.7176198612516721
258
+ task:
259
+ type: Clustering
260
+ - dataset:
261
+ config: default
262
+ name: MTEB CQADupstackRetrieval
263
+ revision: 46989137a86843e03a6195de44b09deda022eec7
264
+ split: test
265
+ type: CQADupstackRetrieval_is_a_combined_dataset
266
+ metrics:
267
+ - type: map_at_1
268
+ value: 31.913249999999998
269
+ - type: map_at_10
270
+ value: 43.87733333333334
271
+ - type: map_at_100
272
+ value: 45.249916666666664
273
+ - type: map_at_1000
274
+ value: 45.350583333333326
275
+ - type: map_at_3
276
+ value: 40.316833333333335
277
+ - type: map_at_5
278
+ value: 42.317083333333336
279
+ - type: mrr_at_1
280
+ value: 0.0
281
+ - type: mrr_at_10
282
+ value: 0.0
283
+ - type: mrr_at_100
284
+ value: 0.0
285
+ - type: mrr_at_1000
286
+ value: 0.0
287
+ - type: mrr_at_3
288
+ value: 0.0
289
+ - type: mrr_at_5
290
+ value: 0.0
291
+ - type: ndcg_at_1
292
+ value: 38.30616666666667
293
+ - type: ndcg_at_10
294
+ value: 50.24175000000001
295
+ - type: ndcg_at_100
296
+ value: 55.345333333333336
297
+ - type: ndcg_at_1000
298
+ value: 56.91225000000001
299
+ - type: ndcg_at_3
300
+ value: 44.67558333333333
301
+ - type: ndcg_at_5
302
+ value: 47.32333333333334
303
+ - type: precision_at_1
304
+ value: 38.30616666666667
305
+ - type: precision_at_10
306
+ value: 9.007416666666666
307
+ - type: precision_at_100
308
+ value: 1.3633333333333333
309
+ - type: precision_at_1000
310
+ value: 0.16691666666666666
311
+ - type: precision_at_3
312
+ value: 20.895666666666667
313
+ - type: precision_at_5
314
+ value: 14.871666666666666
315
+ - type: recall_at_1
316
+ value: 31.913249999999998
317
+ - type: recall_at_10
318
+ value: 64.11891666666666
319
+ - type: recall_at_100
320
+ value: 85.91133333333333
321
+ - type: recall_at_1000
322
+ value: 96.28225
323
+ - type: recall_at_3
324
+ value: 48.54749999999999
325
+ - type: recall_at_5
326
+ value: 55.44283333333334
327
+ - type: main_score
328
+ value: 50.24175000000001
329
+ task:
330
+ type: Retrieval
331
+ - dataset:
332
+ config: default
333
+ name: MTEB ClimateFEVER
334
+ revision: 47f2ac6acb640fc46020b02a5b59fdda04d39380
335
+ split: test
336
+ type: mteb/climate-fever
337
+ metrics:
338
+ - type: map_at_1
339
+ value: 19.556
340
+ - type: map_at_10
341
+ value: 34.623
342
+ - type: map_at_100
343
+ value: 36.97
344
+ - type: map_at_1000
345
+ value: 37.123
346
+ - type: map_at_3
347
+ value: 28.904999999999998
348
+ - type: map_at_5
349
+ value: 31.955
350
+ - type: mrr_at_1
351
+ value: 0.0
352
+ - type: mrr_at_10
353
+ value: 0.0
354
+ - type: mrr_at_100
355
+ value: 0.0
356
+ - type: mrr_at_1000
357
+ value: 0.0
358
+ - type: mrr_at_3
359
+ value: 0.0
360
+ - type: mrr_at_5
361
+ value: 0.0
362
+ - type: ndcg_at_1
363
+ value: 44.104
364
+ - type: ndcg_at_10
365
+ value: 45.388
366
+ - type: ndcg_at_100
367
+ value: 52.793
368
+ - type: ndcg_at_1000
369
+ value: 55.108999999999995
370
+ - type: ndcg_at_3
371
+ value: 38.604
372
+ - type: ndcg_at_5
373
+ value: 40.806
374
+ - type: precision_at_1
375
+ value: 44.104
376
+ - type: precision_at_10
377
+ value: 14.143
378
+ - type: precision_at_100
379
+ value: 2.2190000000000003
380
+ - type: precision_at_1000
381
+ value: 0.266
382
+ - type: precision_at_3
383
+ value: 29.316
384
+ - type: precision_at_5
385
+ value: 21.98
386
+ - type: recall_at_1
387
+ value: 19.556
388
+ - type: recall_at_10
389
+ value: 52.120999999999995
390
+ - type: recall_at_100
391
+ value: 76.509
392
+ - type: recall_at_1000
393
+ value: 89.029
394
+ - type: recall_at_3
395
+ value: 34.919
396
+ - type: recall_at_5
397
+ value: 42.18
398
+ - type: main_score
399
+ value: 45.388
400
+ task:
401
+ type: Retrieval
402
+ - dataset:
403
+ config: default
404
+ name: MTEB DBPedia
405
+ revision: c0f706b76e590d620bd6618b3ca8efdd34e2d659
406
+ split: test
407
+ type: mteb/dbpedia
408
+ metrics:
409
+ - type: map_at_1
410
+ value: 10.714
411
+ - type: map_at_10
412
+ value: 25.814999999999998
413
+ - type: map_at_100
414
+ value: 37.845
415
+ - type: map_at_1000
416
+ value: 39.974
417
+ - type: map_at_3
418
+ value: 17.201
419
+ - type: map_at_5
420
+ value: 21.062
421
+ - type: mrr_at_1
422
+ value: 0.0
423
+ - type: mrr_at_10
424
+ value: 0.0
425
+ - type: mrr_at_100
426
+ value: 0.0
427
+ - type: mrr_at_1000
428
+ value: 0.0
429
+ - type: mrr_at_3
430
+ value: 0.0
431
+ - type: mrr_at_5
432
+ value: 0.0
433
+ - type: ndcg_at_1
434
+ value: 66.0
435
+ - type: ndcg_at_10
436
+ value: 53.496
437
+ - type: ndcg_at_100
438
+ value: 58.053
439
+ - type: ndcg_at_1000
440
+ value: 64.886
441
+ - type: ndcg_at_3
442
+ value: 57.656
443
+ - type: ndcg_at_5
444
+ value: 55.900000000000006
445
+ - type: precision_at_1
446
+ value: 77.25
447
+ - type: precision_at_10
448
+ value: 43.65
449
+ - type: precision_at_100
450
+ value: 13.76
451
+ - type: precision_at_1000
452
+ value: 2.5940000000000003
453
+ - type: precision_at_3
454
+ value: 61.0
455
+ - type: precision_at_5
456
+ value: 54.65
457
+ - type: recall_at_1
458
+ value: 10.714
459
+ - type: recall_at_10
460
+ value: 31.173000000000002
461
+ - type: recall_at_100
462
+ value: 63.404
463
+ - type: recall_at_1000
464
+ value: 85.874
465
+ - type: recall_at_3
466
+ value: 18.249000000000002
467
+ - type: recall_at_5
468
+ value: 23.69
469
+ - type: main_score
470
+ value: 53.496
471
+ task:
472
+ type: Retrieval
473
+ - dataset:
474
+ config: default
475
+ name: MTEB EmotionClassification
476
+ revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
477
+ split: test
478
+ type: mteb/emotion
479
+ metrics:
480
+ - type: accuracy
481
+ value: 93.38499999999999
482
+ - type: accuracy_stderr
483
+ value: 0.13793114224133846
484
+ - type: f1
485
+ value: 90.12141028353496
486
+ - type: f1_stderr
487
+ value: 0.174640257706043
488
+ - type: main_score
489
+ value: 93.38499999999999
490
+ task:
491
+ type: Classification
492
+ - dataset:
493
+ config: default
494
+ name: MTEB FEVER
495
+ revision: bea83ef9e8fb933d90a2f1d5515737465d613e12
496
+ split: test
497
+ type: mteb/fever
498
+ metrics:
499
+ - type: map_at_1
500
+ value: 84.66900000000001
501
+ - type: map_at_10
502
+ value: 91.52799999999999
503
+ - type: map_at_100
504
+ value: 91.721
505
+ - type: map_at_1000
506
+ value: 91.73
507
+ - type: map_at_3
508
+ value: 90.752
509
+ - type: map_at_5
510
+ value: 91.262
511
+ - type: mrr_at_1
512
+ value: 0.0
513
+ - type: mrr_at_10
514
+ value: 0.0
515
+ - type: mrr_at_100
516
+ value: 0.0
517
+ - type: mrr_at_1000
518
+ value: 0.0
519
+ - type: mrr_at_3
520
+ value: 0.0
521
+ - type: mrr_at_5
522
+ value: 0.0
523
+ - type: ndcg_at_1
524
+ value: 91.20899999999999
525
+ - type: ndcg_at_10
526
+ value: 93.74900000000001
527
+ - type: ndcg_at_100
528
+ value: 94.279
529
+ - type: ndcg_at_1000
530
+ value: 94.408
531
+ - type: ndcg_at_3
532
+ value: 92.923
533
+ - type: ndcg_at_5
534
+ value: 93.376
535
+ - type: precision_at_1
536
+ value: 91.20899999999999
537
+ - type: precision_at_10
538
+ value: 11.059
539
+ - type: precision_at_100
540
+ value: 1.1560000000000001
541
+ - type: precision_at_1000
542
+ value: 0.11800000000000001
543
+ - type: precision_at_3
544
+ value: 35.129
545
+ - type: precision_at_5
546
+ value: 21.617
547
+ - type: recall_at_1
548
+ value: 84.66900000000001
549
+ - type: recall_at_10
550
+ value: 97.03399999999999
551
+ - type: recall_at_100
552
+ value: 98.931
553
+ - type: recall_at_1000
554
+ value: 99.65899999999999
555
+ - type: recall_at_3
556
+ value: 94.76299999999999
557
+ - type: recall_at_5
558
+ value: 95.968
559
+ - type: main_score
560
+ value: 93.74900000000001
561
+ task:
562
+ type: Retrieval
563
+ - dataset:
564
+ config: default
565
+ name: MTEB FiQA2018
566
+ revision: 27a168819829fe9bcd655c2df245fb19452e8e06
567
+ split: test
568
+ type: mteb/fiqa
569
+ metrics:
570
+ - type: map_at_1
571
+ value: 34.866
572
+ - type: map_at_10
573
+ value: 58.06099999999999
574
+ - type: map_at_100
575
+ value: 60.028999999999996
576
+ - type: map_at_1000
577
+ value: 60.119
578
+ - type: map_at_3
579
+ value: 51.304
580
+ - type: map_at_5
581
+ value: 55.054
582
+ - type: mrr_at_1
583
+ value: 0.0
584
+ - type: mrr_at_10
585
+ value: 0.0
586
+ - type: mrr_at_100
587
+ value: 0.0
588
+ - type: mrr_at_1000
589
+ value: 0.0
590
+ - type: mrr_at_3
591
+ value: 0.0
592
+ - type: mrr_at_5
593
+ value: 0.0
594
+ - type: ndcg_at_1
595
+ value: 64.815
596
+ - type: ndcg_at_10
597
+ value: 65.729
598
+ - type: ndcg_at_100
599
+ value: 71.14
600
+ - type: ndcg_at_1000
601
+ value: 72.336
602
+ - type: ndcg_at_3
603
+ value: 61.973
604
+ - type: ndcg_at_5
605
+ value: 62.858000000000004
606
+ - type: precision_at_1
607
+ value: 64.815
608
+ - type: precision_at_10
609
+ value: 17.87
610
+ - type: precision_at_100
611
+ value: 2.373
612
+ - type: precision_at_1000
613
+ value: 0.258
614
+ - type: precision_at_3
615
+ value: 41.152
616
+ - type: precision_at_5
617
+ value: 29.568
618
+ - type: recall_at_1
619
+ value: 34.866
620
+ - type: recall_at_10
621
+ value: 72.239
622
+ - type: recall_at_100
623
+ value: 91.19
624
+ - type: recall_at_1000
625
+ value: 98.154
626
+ - type: recall_at_3
627
+ value: 56.472
628
+ - type: recall_at_5
629
+ value: 63.157
630
+ - type: main_score
631
+ value: 65.729
632
+ task:
633
+ type: Retrieval
634
+ - dataset:
635
+ config: default
636
+ name: MTEB HotpotQA
637
+ revision: ab518f4d6fcca38d87c25209f94beba119d02014
638
+ split: test
639
+ type: mteb/hotpotqa
640
+ metrics:
641
+ - type: map_at_1
642
+ value: 44.651999999999994
643
+ - type: map_at_10
644
+ value: 79.95100000000001
645
+ - type: map_at_100
646
+ value: 80.51700000000001
647
+ - type: map_at_1000
648
+ value: 80.542
649
+ - type: map_at_3
650
+ value: 77.008
651
+ - type: map_at_5
652
+ value: 78.935
653
+ - type: mrr_at_1
654
+ value: 0.0
655
+ - type: mrr_at_10
656
+ value: 0.0
657
+ - type: mrr_at_100
658
+ value: 0.0
659
+ - type: mrr_at_1000
660
+ value: 0.0
661
+ - type: mrr_at_3
662
+ value: 0.0
663
+ - type: mrr_at_5
664
+ value: 0.0
665
+ - type: ndcg_at_1
666
+ value: 89.305
667
+ - type: ndcg_at_10
668
+ value: 85.479
669
+ - type: ndcg_at_100
670
+ value: 87.235
671
+ - type: ndcg_at_1000
672
+ value: 87.669
673
+ - type: ndcg_at_3
674
+ value: 81.648
675
+ - type: ndcg_at_5
676
+ value: 83.88600000000001
677
+ - type: precision_at_1
678
+ value: 89.305
679
+ - type: precision_at_10
680
+ value: 17.807000000000002
681
+ - type: precision_at_100
682
+ value: 1.9140000000000001
683
+ - type: precision_at_1000
684
+ value: 0.197
685
+ - type: precision_at_3
686
+ value: 53.756
687
+ - type: precision_at_5
688
+ value: 34.018
689
+ - type: recall_at_1
690
+ value: 44.651999999999994
691
+ - type: recall_at_10
692
+ value: 89.034
693
+ - type: recall_at_100
694
+ value: 95.719
695
+ - type: recall_at_1000
696
+ value: 98.535
697
+ - type: recall_at_3
698
+ value: 80.635
699
+ - type: recall_at_5
700
+ value: 85.044
701
+ - type: main_score
702
+ value: 85.479
703
+ task:
704
+ type: Retrieval
705
+ - dataset:
706
+ config: default
707
+ name: MTEB ImdbClassification
708
+ revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
709
+ split: test
710
+ type: mteb/imdb
711
+ metrics:
712
+ - type: accuracy
713
+ value: 97.1376
714
+ - type: accuracy_stderr
715
+ value: 0.04571914259913447
716
+ - type: ap
717
+ value: 95.92783808558808
718
+ - type: ap_stderr
719
+ value: 0.05063782483358255
720
+ - type: f1
721
+ value: 97.13755519177172
722
+ - type: f1_stderr
723
+ value: 0.04575943074086138
724
+ - type: main_score
725
+ value: 97.1376
726
+ task:
727
+ type: Classification
728
+ - dataset:
729
+ config: default
730
+ name: MTEB MSMARCO
731
+ revision: c5a29a104738b98a9e76336939199e264163d4a0
732
+ split: dev
733
+ type: mteb/msmarco
734
+ metrics:
735
+ - type: map_at_1
736
+ value: 0.0
737
+ - type: map_at_10
738
+ value: 38.342
739
+ - type: map_at_100
740
+ value: 0.0
741
+ - type: map_at_1000
742
+ value: 0.0
743
+ - type: map_at_3
744
+ value: 0.0
745
+ - type: map_at_5
746
+ value: 0.0
747
+ - type: mrr_at_1
748
+ value: 0.0
749
+ - type: mrr_at_10
750
+ value: 0.0
751
+ - type: mrr_at_100
752
+ value: 0.0
753
+ - type: mrr_at_1000
754
+ value: 0.0
755
+ - type: mrr_at_3
756
+ value: 0.0
757
+ - type: mrr_at_5
758
+ value: 0.0
759
+ - type: ndcg_at_1
760
+ value: 0.0
761
+ - type: ndcg_at_10
762
+ value: 45.629999999999995
763
+ - type: ndcg_at_100
764
+ value: 0.0
765
+ - type: ndcg_at_1000
766
+ value: 0.0
767
+ - type: ndcg_at_3
768
+ value: 0.0
769
+ - type: ndcg_at_5
770
+ value: 0.0
771
+ - type: precision_at_1
772
+ value: 0.0
773
+ - type: precision_at_10
774
+ value: 7.119000000000001
775
+ - type: precision_at_100
776
+ value: 0.0
777
+ - type: precision_at_1000
778
+ value: 0.0
779
+ - type: precision_at_3
780
+ value: 0.0
781
+ - type: precision_at_5
782
+ value: 0.0
783
+ - type: recall_at_1
784
+ value: 0.0
785
+ - type: recall_at_10
786
+ value: 67.972
787
+ - type: recall_at_100
788
+ value: 0.0
789
+ - type: recall_at_1000
790
+ value: 0.0
791
+ - type: recall_at_3
792
+ value: 0.0
793
+ - type: recall_at_5
794
+ value: 0.0
795
+ - type: main_score
796
+ value: 45.629999999999995
797
+ task:
798
+ type: Retrieval
799
+ - dataset:
800
+ config: en
801
+ name: MTEB MTOPDomainClassification (en)
802
+ revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
803
+ split: test
804
+ type: mteb/mtop_domain
805
+ metrics:
806
+ - type: accuracy
807
+ value: 99.24988600091199
808
+ - type: accuracy_stderr
809
+ value: 0.04496826931900734
810
+ - type: f1
811
+ value: 99.15933275095276
812
+ - type: f1_stderr
813
+ value: 0.05565039139747446
814
+ - type: main_score
815
+ value: 99.24988600091199
816
+ task:
817
+ type: Classification
818
+ - dataset:
819
+ config: en
820
+ name: MTEB MTOPIntentClassification (en)
821
+ revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
822
+ split: test
823
+ type: mteb/mtop_intent
824
+ metrics:
825
+ - type: accuracy
826
+ value: 94.3684450524396
827
+ - type: accuracy_stderr
828
+ value: 0.8436548701322188
829
+ - type: f1
830
+ value: 77.33022623133307
831
+ - type: f1_stderr
832
+ value: 0.9228425861187275
833
+ - type: main_score
834
+ value: 94.3684450524396
835
+ task:
836
+ type: Classification
837
+ - dataset:
838
+ config: en
839
+ name: MTEB MassiveIntentClassification (en)
840
+ revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
841
+ split: test
842
+ type: mteb/amazon_massive_intent
843
+ metrics:
844
+ - type: accuracy
845
+ value: 86.09616677874916
846
+ - type: accuracy_stderr
847
+ value: 0.9943208055590853
848
+ - type: f1
849
+ value: 83.4902056490062
850
+ - type: f1_stderr
851
+ value: 0.7626189310074184
852
+ - type: main_score
853
+ value: 86.09616677874916
854
+ task:
855
+ type: Classification
856
+ - dataset:
857
+ config: en
858
+ name: MTEB MassiveScenarioClassification (en)
859
+ revision: 7d571f92784cd94a019292a1f45445077d0ef634
860
+ split: test
861
+ type: mteb/amazon_massive_scenario
862
+ metrics:
863
+ - type: accuracy
864
+ value: 92.17215870880968
865
+ - type: accuracy_stderr
866
+ value: 0.25949941333658166
867
+ - type: f1
868
+ value: 91.36757392422702
869
+ - type: f1_stderr
870
+ value: 0.29139507298154815
871
+ - type: main_score
872
+ value: 92.17215870880968
873
+ task:
874
+ type: Classification
875
+ - dataset:
876
+ config: default
877
+ name: MTEB MedrxivClusteringP2P
878
+ revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
879
+ split: test
880
+ type: mteb/medrxiv-clustering-p2p
881
+ metrics:
882
+ - type: main_score
883
+ value: 46.09497344077905
884
+ - type: v_measure
885
+ value: 46.09497344077905
886
+ - type: v_measure_std
887
+ value: 1.44871520869784
888
+ task:
889
+ type: Clustering
890
+ - dataset:
891
+ config: default
892
+ name: MTEB MedrxivClusteringS2S
893
+ revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
894
+ split: test
895
+ type: mteb/medrxiv-clustering-s2s
896
+ metrics:
897
+ - type: main_score
898
+ value: 44.861049989560684
899
+ - type: v_measure
900
+ value: 44.861049989560684
901
+ - type: v_measure_std
902
+ value: 1.432199293162203
903
+ task:
904
+ type: Clustering
905
+ - dataset:
906
+ config: default
907
+ name: MTEB MindSmallReranking
908
+ revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
909
+ split: test
910
+ type: mteb/mind_small
911
+ metrics:
912
+ - type: map
913
+ value: 31.75936162919999
914
+ - type: mrr
915
+ value: 32.966812736541236
916
+ - type: main_score
917
+ value: 31.75936162919999
918
+ task:
919
+ type: Reranking
920
+ - dataset:
921
+ config: default
922
+ name: MTEB NFCorpus
923
+ revision: ec0fa4fe99da2ff19ca1214b7966684033a58814
924
+ split: test
925
+ type: mteb/nfcorpus
926
+ metrics:
927
+ - type: map_at_1
928
+ value: 7.893999999999999
929
+ - type: map_at_10
930
+ value: 17.95
931
+ - type: map_at_100
932
+ value: 23.474
933
+ - type: map_at_1000
934
+ value: 25.412000000000003
935
+ - type: map_at_3
936
+ value: 12.884
937
+ - type: map_at_5
938
+ value: 15.171000000000001
939
+ - type: mrr_at_1
940
+ value: 0.0
941
+ - type: mrr_at_10
942
+ value: 0.0
943
+ - type: mrr_at_100
944
+ value: 0.0
945
+ - type: mrr_at_1000
946
+ value: 0.0
947
+ - type: mrr_at_3
948
+ value: 0.0
949
+ - type: mrr_at_5
950
+ value: 0.0
951
+ - type: ndcg_at_1
952
+ value: 55.728
953
+ - type: ndcg_at_10
954
+ value: 45.174
955
+ - type: ndcg_at_100
956
+ value: 42.18
957
+ - type: ndcg_at_1000
958
+ value: 50.793
959
+ - type: ndcg_at_3
960
+ value: 50.322
961
+ - type: ndcg_at_5
962
+ value: 48.244
963
+ - type: precision_at_1
964
+ value: 57.276
965
+ - type: precision_at_10
966
+ value: 33.437
967
+ - type: precision_at_100
968
+ value: 10.671999999999999
969
+ - type: precision_at_1000
970
+ value: 2.407
971
+ - type: precision_at_3
972
+ value: 46.646
973
+ - type: precision_at_5
974
+ value: 41.672
975
+ - type: recall_at_1
976
+ value: 7.893999999999999
977
+ - type: recall_at_10
978
+ value: 22.831000000000003
979
+ - type: recall_at_100
980
+ value: 43.818
981
+ - type: recall_at_1000
982
+ value: 75.009
983
+ - type: recall_at_3
984
+ value: 14.371
985
+ - type: recall_at_5
986
+ value: 17.752000000000002
987
+ - type: main_score
988
+ value: 45.174
989
+ task:
990
+ type: Retrieval
991
+ - dataset:
992
+ config: default
993
+ name: MTEB NQ
994
+ revision: b774495ed302d8c44a3a7ea25c90dbce03968f31
995
+ split: test
996
+ type: mteb/nq
997
+ metrics:
998
+ - type: map_at_1
999
+ value: 49.351
1000
+ - type: map_at_10
1001
+ value: 66.682
1002
+ - type: map_at_100
1003
+ value: 67.179
1004
+ - type: map_at_1000
1005
+ value: 67.18499999999999
1006
+ - type: map_at_3
1007
+ value: 62.958999999999996
1008
+ - type: map_at_5
1009
+ value: 65.364
1010
+ - type: mrr_at_1
1011
+ value: 0.0
1012
+ - type: mrr_at_10
1013
+ value: 0.0
1014
+ - type: mrr_at_100
1015
+ value: 0.0
1016
+ - type: mrr_at_1000
1017
+ value: 0.0
1018
+ - type: mrr_at_3
1019
+ value: 0.0
1020
+ - type: mrr_at_5
1021
+ value: 0.0
1022
+ - type: ndcg_at_1
1023
+ value: 55.417
1024
+ - type: ndcg_at_10
1025
+ value: 73.568
1026
+ - type: ndcg_at_100
1027
+ value: 75.35
1028
+ - type: ndcg_at_1000
1029
+ value: 75.478
1030
+ - type: ndcg_at_3
1031
+ value: 67.201
1032
+ - type: ndcg_at_5
1033
+ value: 70.896
1034
+ - type: precision_at_1
1035
+ value: 55.417
1036
+ - type: precision_at_10
1037
+ value: 11.036999999999999
1038
+ - type: precision_at_100
1039
+ value: 1.204
1040
+ - type: precision_at_1000
1041
+ value: 0.121
1042
+ - type: precision_at_3
1043
+ value: 29.654000000000003
1044
+ - type: precision_at_5
1045
+ value: 20.006
1046
+ - type: recall_at_1
1047
+ value: 49.351
1048
+ - type: recall_at_10
1049
+ value: 91.667
1050
+ - type: recall_at_100
1051
+ value: 98.89
1052
+ - type: recall_at_1000
1053
+ value: 99.812
1054
+ - type: recall_at_3
1055
+ value: 75.715
1056
+ - type: recall_at_5
1057
+ value: 84.072
1058
+ - type: main_score
1059
+ value: 73.568
1060
+ task:
1061
+ type: Retrieval
1062
+ - dataset:
1063
+ config: default
1064
+ name: MTEB QuoraRetrieval
1065
+ revision: e4e08e0b7dbe3c8700f0daef558ff32256715259
1066
+ split: test
1067
+ type: mteb/quora
1068
+ metrics:
1069
+ - type: map_at_1
1070
+ value: 71.358
1071
+ - type: map_at_10
1072
+ value: 85.474
1073
+ - type: map_at_100
1074
+ value: 86.101
1075
+ - type: map_at_1000
1076
+ value: 86.114
1077
+ - type: map_at_3
1078
+ value: 82.562
1079
+ - type: map_at_5
1080
+ value: 84.396
1081
+ - type: mrr_at_1
1082
+ value: 0.0
1083
+ - type: mrr_at_10
1084
+ value: 0.0
1085
+ - type: mrr_at_100
1086
+ value: 0.0
1087
+ - type: mrr_at_1000
1088
+ value: 0.0
1089
+ - type: mrr_at_3
1090
+ value: 0.0
1091
+ - type: mrr_at_5
1092
+ value: 0.0
1093
+ - type: ndcg_at_1
1094
+ value: 82.12
1095
+ - type: ndcg_at_10
1096
+ value: 89.035
1097
+ - type: ndcg_at_100
1098
+ value: 90.17399999999999
1099
+ - type: ndcg_at_1000
1100
+ value: 90.243
1101
+ - type: ndcg_at_3
1102
+ value: 86.32300000000001
1103
+ - type: ndcg_at_5
1104
+ value: 87.85
1105
+ - type: precision_at_1
1106
+ value: 82.12
1107
+ - type: precision_at_10
1108
+ value: 13.55
1109
+ - type: precision_at_100
1110
+ value: 1.54
1111
+ - type: precision_at_1000
1112
+ value: 0.157
1113
+ - type: precision_at_3
1114
+ value: 37.89
1115
+ - type: precision_at_5
1116
+ value: 24.9
1117
+ - type: recall_at_1
1118
+ value: 71.358
1119
+ - type: recall_at_10
1120
+ value: 95.855
1121
+ - type: recall_at_100
1122
+ value: 99.711
1123
+ - type: recall_at_1000
1124
+ value: 99.994
1125
+ - type: recall_at_3
1126
+ value: 88.02
1127
+ - type: recall_at_5
1128
+ value: 92.378
1129
+ - type: main_score
1130
+ value: 89.035
1131
+ task:
1132
+ type: Retrieval
1133
+ - dataset:
1134
+ config: default
1135
+ name: MTEB RedditClustering
1136
+ revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
1137
+ split: test
1138
+ type: mteb/reddit-clustering
1139
+ metrics:
1140
+ - type: main_score
1141
+ value: 71.0984522742521
1142
+ - type: v_measure
1143
+ value: 71.0984522742521
1144
+ - type: v_measure_std
1145
+ value: 3.5668139917058044
1146
+ task:
1147
+ type: Clustering
1148
+ - dataset:
1149
+ config: default
1150
+ name: MTEB RedditClusteringP2P
1151
+ revision: 385e3cb46b4cfa89021f56c4380204149d0efe33
1152
+ split: test
1153
+ type: mteb/reddit-clustering-p2p
1154
+ metrics:
1155
+ - type: main_score
1156
+ value: 74.94499641904133
1157
+ - type: v_measure
1158
+ value: 74.94499641904133
1159
+ - type: v_measure_std
1160
+ value: 11.419672879389248
1161
+ task:
1162
+ type: Clustering
1163
+ - dataset:
1164
+ config: default
1165
+ name: MTEB SCIDOCS
1166
+ revision: f8c2fcf00f625baaa80f62ec5bd9e1fff3b8ae88
1167
+ split: test
1168
+ type: mteb/scidocs
1169
+ metrics:
1170
+ - type: map_at_1
1171
+ value: 5.343
1172
+ - type: map_at_10
1173
+ value: 13.044
1174
+ - type: map_at_100
1175
+ value: 15.290999999999999
1176
+ - type: map_at_1000
1177
+ value: 15.609
1178
+ - type: map_at_3
1179
+ value: 9.227
1180
+ - type: map_at_5
1181
+ value: 11.158
1182
+ - type: mrr_at_1
1183
+ value: 0.0
1184
+ - type: mrr_at_10
1185
+ value: 0.0
1186
+ - type: mrr_at_100
1187
+ value: 0.0
1188
+ - type: mrr_at_1000
1189
+ value: 0.0
1190
+ - type: mrr_at_3
1191
+ value: 0.0
1192
+ - type: mrr_at_5
1193
+ value: 0.0
1194
+ - type: ndcg_at_1
1195
+ value: 26.3
1196
+ - type: ndcg_at_10
1197
+ value: 21.901
1198
+ - type: ndcg_at_100
1199
+ value: 30.316
1200
+ - type: ndcg_at_1000
1201
+ value: 35.547000000000004
1202
+ - type: ndcg_at_3
1203
+ value: 20.560000000000002
1204
+ - type: ndcg_at_5
1205
+ value: 18.187
1206
+ - type: precision_at_1
1207
+ value: 26.3
1208
+ - type: precision_at_10
1209
+ value: 11.34
1210
+ - type: precision_at_100
1211
+ value: 2.344
1212
+ - type: precision_at_1000
1213
+ value: 0.359
1214
+ - type: precision_at_3
1215
+ value: 18.967
1216
+ - type: precision_at_5
1217
+ value: 15.920000000000002
1218
+ - type: recall_at_1
1219
+ value: 5.343
1220
+ - type: recall_at_10
1221
+ value: 22.997
1222
+ - type: recall_at_100
1223
+ value: 47.562
1224
+ - type: recall_at_1000
1225
+ value: 72.94500000000001
1226
+ - type: recall_at_3
1227
+ value: 11.533
1228
+ - type: recall_at_5
1229
+ value: 16.148
1230
+ - type: main_score
1231
+ value: 21.901
1232
+ task:
1233
+ type: Retrieval
1234
+ - dataset:
1235
+ config: default
1236
+ name: MTEB SICK-R
1237
+ revision: 20a6d6f312dd54037fe07a32d58e5e168867909d
1238
+ split: test
1239
+ type: mteb/sickr-sts
1240
+ metrics:
1241
+ - type: cosine_pearson
1242
+ value: 87.3054603493591
1243
+ - type: cosine_spearman
1244
+ value: 82.14763206055602
1245
+ - type: manhattan_pearson
1246
+ value: 84.78737790237557
1247
+ - type: manhattan_spearman
1248
+ value: 81.88455356002758
1249
+ - type: euclidean_pearson
1250
+ value: 85.00668629311117
1251
+ - type: euclidean_spearman
1252
+ value: 82.14763037860851
1253
+ - type: main_score
1254
+ value: 82.14763206055602
1255
+ task:
1256
+ type: STS
1257
+ - dataset:
1258
+ config: default
1259
+ name: MTEB STS12
1260
+ revision: a0d554a64d88156834ff5ae9920b964011b16384
1261
+ split: test
1262
+ type: mteb/sts12-sts
1263
+ metrics:
1264
+ - type: cosine_pearson
1265
+ value: 86.6911864687294
1266
+ - type: cosine_spearman
1267
+ value: 77.89286260403269
1268
+ - type: manhattan_pearson
1269
+ value: 82.87240347680857
1270
+ - type: manhattan_spearman
1271
+ value: 78.10055393740326
1272
+ - type: euclidean_pearson
1273
+ value: 82.72282535777123
1274
+ - type: euclidean_spearman
1275
+ value: 77.89256648406325
1276
+ - type: main_score
1277
+ value: 77.89286260403269
1278
+ task:
1279
+ type: STS
1280
+ - dataset:
1281
+ config: default
1282
+ name: MTEB STS13
1283
+ revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
1284
+ split: test
1285
+ type: mteb/sts13-sts
1286
+ metrics:
1287
+ - type: cosine_pearson
1288
+ value: 87.7220832598633
1289
+ - type: cosine_spearman
1290
+ value: 88.30238972017452
1291
+ - type: manhattan_pearson
1292
+ value: 87.88214789140248
1293
+ - type: manhattan_spearman
1294
+ value: 88.24770220032391
1295
+ - type: euclidean_pearson
1296
+ value: 87.98610386257103
1297
+ - type: euclidean_spearman
1298
+ value: 88.30238972017452
1299
+ - type: main_score
1300
+ value: 88.30238972017452
1301
+ task:
1302
+ type: STS
1303
+ - dataset:
1304
+ config: default
1305
+ name: MTEB STS14
1306
+ revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
1307
+ split: test
1308
+ type: mteb/sts14-sts
1309
+ metrics:
1310
+ - type: cosine_pearson
1311
+ value: 85.70614623247714
1312
+ - type: cosine_spearman
1313
+ value: 84.29920990970672
1314
+ - type: manhattan_pearson
1315
+ value: 84.9836190531721
1316
+ - type: manhattan_spearman
1317
+ value: 84.40933470597638
1318
+ - type: euclidean_pearson
1319
+ value: 84.96652336693347
1320
+ - type: euclidean_spearman
1321
+ value: 84.29920989531965
1322
+ - type: main_score
1323
+ value: 84.29920990970672
1324
+ task:
1325
+ type: STS
1326
+ - dataset:
1327
+ config: default
1328
+ name: MTEB STS15
1329
+ revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
1330
+ split: test
1331
+ type: mteb/sts15-sts
1332
+ metrics:
1333
+ - type: cosine_pearson
1334
+ value: 88.4169972425264
1335
+ - type: cosine_spearman
1336
+ value: 89.03555007807218
1337
+ - type: manhattan_pearson
1338
+ value: 88.83068699455478
1339
+ - type: manhattan_spearman
1340
+ value: 89.21877175674125
1341
+ - type: euclidean_pearson
1342
+ value: 88.7251052947544
1343
+ - type: euclidean_spearman
1344
+ value: 89.03557389893083
1345
+ - type: main_score
1346
+ value: 89.03555007807218
1347
+ task:
1348
+ type: STS
1349
+ - dataset:
1350
+ config: default
1351
+ name: MTEB STS16
1352
+ revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
1353
+ split: test
1354
+ type: mteb/sts16-sts
1355
+ metrics:
1356
+ - type: cosine_pearson
1357
+ value: 85.63830579034632
1358
+ - type: cosine_spearman
1359
+ value: 86.77353371581373
1360
+ - type: manhattan_pearson
1361
+ value: 86.24830492396637
1362
+ - type: manhattan_spearman
1363
+ value: 86.96754348626189
1364
+ - type: euclidean_pearson
1365
+ value: 86.09837038778359
1366
+ - type: euclidean_spearman
1367
+ value: 86.77353371581373
1368
+ - type: main_score
1369
+ value: 86.77353371581373
1370
+ task:
1371
+ type: STS
1372
+ - dataset:
1373
+ config: en-en
1374
+ name: MTEB STS17 (en-en)
1375
+ revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
1376
+ split: test
1377
+ type: mteb/sts17-crosslingual-sts
1378
+ metrics:
1379
+ - type: cosine_pearson
1380
+ value: 91.2204675588959
1381
+ - type: cosine_spearman
1382
+ value: 90.66976712249057
1383
+ - type: manhattan_pearson
1384
+ value: 91.11007808242346
1385
+ - type: manhattan_spearman
1386
+ value: 90.51739232964488
1387
+ - type: euclidean_pearson
1388
+ value: 91.19588941007903
1389
+ - type: euclidean_spearman
1390
+ value: 90.66976712249057
1391
+ - type: main_score
1392
+ value: 90.66976712249057
1393
+ task:
1394
+ type: STS
1395
+ - dataset:
1396
+ config: en
1397
+ name: MTEB STS22 (en)
1398
+ revision: eea2b4fe26a775864c896887d910b76a8098ad3f
1399
+ split: test
1400
+ type: mteb/sts22-crosslingual-sts
1401
+ metrics:
1402
+ - type: cosine_pearson
1403
+ value: 69.34416749707114
1404
+ - type: cosine_spearman
1405
+ value: 68.11632448161046
1406
+ - type: manhattan_pearson
1407
+ value: 68.99243488935281
1408
+ - type: manhattan_spearman
1409
+ value: 67.8398546438258
1410
+ - type: euclidean_pearson
1411
+ value: 69.06376010216088
1412
+ - type: euclidean_spearman
1413
+ value: 68.11632448161046
1414
+ - type: main_score
1415
+ value: 68.11632448161046
1416
+ task:
1417
+ type: STS
1418
+ - dataset:
1419
+ config: default
1420
+ name: MTEB STSBenchmark
1421
+ revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
1422
+ split: test
1423
+ type: mteb/stsbenchmark-sts
1424
+ metrics:
1425
+ - type: cosine_pearson
1426
+ value: 88.10309739429758
1427
+ - type: cosine_spearman
1428
+ value: 88.40520383147418
1429
+ - type: manhattan_pearson
1430
+ value: 88.50753383813232
1431
+ - type: manhattan_spearman
1432
+ value: 88.66382629460927
1433
+ - type: euclidean_pearson
1434
+ value: 88.35050664609376
1435
+ - type: euclidean_spearman
1436
+ value: 88.40520383147418
1437
+ - type: main_score
1438
+ value: 88.40520383147418
1439
+ task:
1440
+ type: STS
1441
+ - dataset:
1442
+ config: default
1443
+ name: MTEB SciDocsRR
1444
+ revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
1445
+ split: test
1446
+ type: mteb/scidocs-reranking
1447
+ metrics:
1448
+ - type: map
1449
+ value: 87.58627126942797
1450
+ - type: mrr
1451
+ value: 97.01098103058887
1452
+ - type: main_score
1453
+ value: 87.58627126942797
1454
+ task:
1455
+ type: Reranking
1456
+ - dataset:
1457
+ config: default
1458
+ name: MTEB SciFact
1459
+ revision: 0228b52cf27578f30900b9e5271d331663a030d7
1460
+ split: test
1461
+ type: mteb/scifact
1462
+ metrics:
1463
+ - type: map_at_1
1464
+ value: 62.883
1465
+ - type: map_at_10
1466
+ value: 75.371
1467
+ - type: map_at_100
1468
+ value: 75.66000000000001
1469
+ - type: map_at_1000
1470
+ value: 75.667
1471
+ - type: map_at_3
1472
+ value: 72.741
1473
+ - type: map_at_5
1474
+ value: 74.74
1475
+ - type: mrr_at_1
1476
+ value: 0.0
1477
+ - type: mrr_at_10
1478
+ value: 0.0
1479
+ - type: mrr_at_100
1480
+ value: 0.0
1481
+ - type: mrr_at_1000
1482
+ value: 0.0
1483
+ - type: mrr_at_3
1484
+ value: 0.0
1485
+ - type: mrr_at_5
1486
+ value: 0.0
1487
+ - type: ndcg_at_1
1488
+ value: 66.0
1489
+ - type: ndcg_at_10
1490
+ value: 80.12700000000001
1491
+ - type: ndcg_at_100
1492
+ value: 81.291
1493
+ - type: ndcg_at_1000
1494
+ value: 81.464
1495
+ - type: ndcg_at_3
1496
+ value: 76.19
1497
+ - type: ndcg_at_5
1498
+ value: 78.827
1499
+ - type: precision_at_1
1500
+ value: 66.0
1501
+ - type: precision_at_10
1502
+ value: 10.567
1503
+ - type: precision_at_100
1504
+ value: 1.117
1505
+ - type: precision_at_1000
1506
+ value: 0.11299999999999999
1507
+ - type: precision_at_3
1508
+ value: 30.333
1509
+ - type: precision_at_5
1510
+ value: 20.133000000000003
1511
+ - type: recall_at_1
1512
+ value: 62.883
1513
+ - type: recall_at_10
1514
+ value: 93.556
1515
+ - type: recall_at_100
1516
+ value: 98.667
1517
+ - type: recall_at_1000
1518
+ value: 100.0
1519
+ - type: recall_at_3
1520
+ value: 83.322
1521
+ - type: recall_at_5
1522
+ value: 89.756
1523
+ - type: main_score
1524
+ value: 80.12700000000001
1525
+ task:
1526
+ type: Retrieval
1527
+ - dataset:
1528
+ config: default
1529
+ name: MTEB SprintDuplicateQuestions
1530
+ revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
1531
+ split: test
1532
+ type: mteb/sprintduplicatequestions-pairclassification
1533
+ metrics:
1534
+ - type: cos_sim_accuracy
1535
+ value: 99.87524752475248
1536
+ - type: cos_sim_accuracy_threshold
1537
+ value: 74.86587762832642
1538
+ - type: cos_sim_ap
1539
+ value: 97.02222446606328
1540
+ - type: cos_sim_f1
1541
+ value: 93.66197183098592
1542
+ - type: cos_sim_f1_threshold
1543
+ value: 74.74223375320435
1544
+ - type: cos_sim_precision
1545
+ value: 94.23076923076923
1546
+ - type: cos_sim_recall
1547
+ value: 93.10000000000001
1548
+ - type: dot_accuracy
1549
+ value: 99.87524752475248
1550
+ - type: dot_accuracy_threshold
1551
+ value: 74.86587762832642
1552
+ - type: dot_ap
1553
+ value: 97.02222688043362
1554
+ - type: dot_f1
1555
+ value: 93.66197183098592
1556
+ - type: dot_f1_threshold
1557
+ value: 74.74223375320435
1558
+ - type: dot_precision
1559
+ value: 94.23076923076923
1560
+ - type: dot_recall
1561
+ value: 93.10000000000001
1562
+ - type: euclidean_accuracy
1563
+ value: 99.87524752475248
1564
+ - type: euclidean_accuracy_threshold
1565
+ value: 70.9000825881958
1566
+ - type: euclidean_ap
1567
+ value: 97.02222446606329
1568
+ - type: euclidean_f1
1569
+ value: 93.66197183098592
1570
+ - type: euclidean_f1_threshold
1571
+ value: 71.07426524162292
1572
+ - type: euclidean_precision
1573
+ value: 94.23076923076923
1574
+ - type: euclidean_recall
1575
+ value: 93.10000000000001
1576
+ - type: manhattan_accuracy
1577
+ value: 99.87623762376238
1578
+ - type: manhattan_accuracy_threshold
1579
+ value: 3588.5040283203125
1580
+ - type: manhattan_ap
1581
+ value: 97.09194643777883
1582
+ - type: manhattan_f1
1583
+ value: 93.7375745526839
1584
+ - type: manhattan_f1_threshold
1585
+ value: 3664.3760681152344
1586
+ - type: manhattan_precision
1587
+ value: 93.18181818181817
1588
+ - type: manhattan_recall
1589
+ value: 94.3
1590
+ - type: max_accuracy
1591
+ value: 99.87623762376238
1592
+ - type: max_ap
1593
+ value: 97.09194643777883
1594
+ - type: max_f1
1595
+ value: 93.7375745526839
1596
+ task:
1597
+ type: PairClassification
1598
+ - dataset:
1599
+ config: default
1600
+ name: MTEB StackExchangeClustering
1601
+ revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
1602
+ split: test
1603
+ type: mteb/stackexchange-clustering
1604
+ metrics:
1605
+ - type: main_score
1606
+ value: 82.10134099988541
1607
+ - type: v_measure
1608
+ value: 82.10134099988541
1609
+ - type: v_measure_std
1610
+ value: 2.7926349897769533
1611
+ task:
1612
+ type: Clustering
1613
+ - dataset:
1614
+ config: default
1615
+ name: MTEB StackExchangeClusteringP2P
1616
+ revision: 815ca46b2622cec33ccafc3735d572c266efdb44
1617
+ split: test
1618
+ type: mteb/stackexchange-clustering-p2p
1619
+ metrics:
1620
+ - type: main_score
1621
+ value: 48.357450742397404
1622
+ - type: v_measure
1623
+ value: 48.357450742397404
1624
+ - type: v_measure_std
1625
+ value: 1.520118876440547
1626
+ task:
1627
+ type: Clustering
1628
+ - dataset:
1629
+ config: default
1630
+ name: MTEB StackOverflowDupQuestions
1631
+ revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
1632
+ split: test
1633
+ type: mteb/stackoverflowdupquestions-reranking
1634
+ metrics:
1635
+ - type: map
1636
+ value: 55.79277200802986
1637
+ - type: mrr
1638
+ value: 56.742517082590616
1639
+ - type: main_score
1640
+ value: 55.79277200802986
1641
+ task:
1642
+ type: Reranking
1643
+ - dataset:
1644
+ config: default
1645
+ name: MTEB SummEval
1646
+ revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
1647
+ split: test
1648
+ type: mteb/summeval
1649
+ metrics:
1650
+ - type: cosine_spearman
1651
+ value: 30.701215774712693
1652
+ - type: cosine_pearson
1653
+ value: 31.26740037278488
1654
+ - type: dot_spearman
1655
+ value: 30.701215774712693
1656
+ - type: dot_pearson
1657
+ value: 31.267404144879997
1658
+ - type: main_score
1659
+ value: 30.701215774712693
1660
+ task:
1661
+ type: Summarization
1662
+ - dataset:
1663
+ config: default
1664
+ name: MTEB TRECCOVID
1665
+ revision: bb9466bac8153a0349341eb1b22e06409e78ef4e
1666
+ split: test
1667
+ type: mteb/trec-covid
1668
+ metrics:
1669
+ - type: map_at_1
1670
+ value: 0.23800000000000002
1671
+ - type: map_at_10
1672
+ value: 2.31
1673
+ - type: map_at_100
1674
+ value: 15.495000000000001
1675
+ - type: map_at_1000
1676
+ value: 38.829
1677
+ - type: map_at_3
1678
+ value: 0.72
1679
+ - type: map_at_5
1680
+ value: 1.185
1681
+ - type: mrr_at_1
1682
+ value: 0.0
1683
+ - type: mrr_at_10
1684
+ value: 0.0
1685
+ - type: mrr_at_100
1686
+ value: 0.0
1687
+ - type: mrr_at_1000
1688
+ value: 0.0
1689
+ - type: mrr_at_3
1690
+ value: 0.0
1691
+ - type: mrr_at_5
1692
+ value: 0.0
1693
+ - type: ndcg_at_1
1694
+ value: 91.0
1695
+ - type: ndcg_at_10
1696
+ value: 88.442
1697
+ - type: ndcg_at_100
1698
+ value: 71.39
1699
+ - type: ndcg_at_1000
1700
+ value: 64.153
1701
+ - type: ndcg_at_3
1702
+ value: 89.877
1703
+ - type: ndcg_at_5
1704
+ value: 89.562
1705
+ - type: precision_at_1
1706
+ value: 92.0
1707
+ - type: precision_at_10
1708
+ value: 92.60000000000001
1709
+ - type: precision_at_100
1710
+ value: 73.74000000000001
1711
+ - type: precision_at_1000
1712
+ value: 28.222
1713
+ - type: precision_at_3
1714
+ value: 94.0
1715
+ - type: precision_at_5
1716
+ value: 93.60000000000001
1717
+ - type: recall_at_1
1718
+ value: 0.23800000000000002
1719
+ - type: recall_at_10
1720
+ value: 2.428
1721
+ - type: recall_at_100
1722
+ value: 18.099999999999998
1723
+ - type: recall_at_1000
1724
+ value: 60.79599999999999
1725
+ - type: recall_at_3
1726
+ value: 0.749
1727
+ - type: recall_at_5
1728
+ value: 1.238
1729
+ - type: main_score
1730
+ value: 88.442
1731
+ task:
1732
+ type: Retrieval
1733
+ - dataset:
1734
+ config: default
1735
+ name: MTEB Touche2020
1736
+ revision: a34f9a33db75fa0cbb21bb5cfc3dae8dc8bec93f
1737
+ split: test
1738
+ type: mteb/touche2020
1739
+ metrics:
1740
+ - type: map_at_1
1741
+ value: 3.4939999999999998
1742
+ - type: map_at_10
1743
+ value: 12.531999999999998
1744
+ - type: map_at_100
1745
+ value: 19.147
1746
+ - type: map_at_1000
1747
+ value: 20.861
1748
+ - type: map_at_3
1749
+ value: 7.558
1750
+ - type: map_at_5
1751
+ value: 9.49
1752
+ - type: mrr_at_1
1753
+ value: 0.0
1754
+ - type: mrr_at_10
1755
+ value: 0.0
1756
+ - type: mrr_at_100
1757
+ value: 0.0
1758
+ - type: mrr_at_1000
1759
+ value: 0.0
1760
+ - type: mrr_at_3
1761
+ value: 0.0
1762
+ - type: mrr_at_5
1763
+ value: 0.0
1764
+ - type: ndcg_at_1
1765
+ value: 47.959
1766
+ - type: ndcg_at_10
1767
+ value: 31.781
1768
+ - type: ndcg_at_100
1769
+ value: 42.131
1770
+ - type: ndcg_at_1000
1771
+ value: 53.493
1772
+ - type: ndcg_at_3
1773
+ value: 39.204
1774
+ - type: ndcg_at_5
1775
+ value: 34.635
1776
+ - type: precision_at_1
1777
+ value: 48.980000000000004
1778
+ - type: precision_at_10
1779
+ value: 27.143
1780
+ - type: precision_at_100
1781
+ value: 8.224
1782
+ - type: precision_at_1000
1783
+ value: 1.584
1784
+ - type: precision_at_3
1785
+ value: 38.775999999999996
1786
+ - type: precision_at_5
1787
+ value: 33.061
1788
+ - type: recall_at_1
1789
+ value: 3.4939999999999998
1790
+ - type: recall_at_10
1791
+ value: 18.895
1792
+ - type: recall_at_100
1793
+ value: 50.192
1794
+ - type: recall_at_1000
1795
+ value: 85.167
1796
+ - type: recall_at_3
1797
+ value: 8.703
1798
+ - type: recall_at_5
1799
+ value: 11.824
1800
+ - type: main_score
1801
+ value: 31.781
1802
+ task:
1803
+ type: Retrieval
1804
+ - dataset:
1805
+ config: default
1806
+ name: MTEB ToxicConversationsClassification
1807
+ revision: edfaf9da55d3dd50d43143d90c1ac476895ae6de
1808
+ split: test
1809
+ type: mteb/toxic_conversations_50k
1810
+ metrics:
1811
+ - type: accuracy
1812
+ value: 92.7402
1813
+ - type: accuracy_stderr
1814
+ value: 1.020764595781027
1815
+ - type: ap
1816
+ value: 44.38594756333084
1817
+ - type: ap_stderr
1818
+ value: 1.817150701258273
1819
+ - type: f1
1820
+ value: 79.95699280019547
1821
+ - type: f1_stderr
1822
+ value: 1.334582498702029
1823
+ - type: main_score
1824
+ value: 92.7402
1825
+ task:
1826
+ type: Classification
1827
+ - dataset:
1828
+ config: default
1829
+ name: MTEB TweetSentimentExtractionClassification
1830
+ revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
1831
+ split: test
1832
+ type: mteb/tweet_sentiment_extraction
1833
+ metrics:
1834
+ - type: accuracy
1835
+ value: 80.86870401810978
1836
+ - type: accuracy_stderr
1837
+ value: 0.22688467782004712
1838
+ - type: f1
1839
+ value: 81.1829040745744
1840
+ - type: f1_stderr
1841
+ value: 0.19774920574849694
1842
+ - type: main_score
1843
+ value: 80.86870401810978
1844
+ task:
1845
+ type: Classification
1846
+ - dataset:
1847
+ config: default
1848
+ name: MTEB TwentyNewsgroupsClustering
1849
+ revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
1850
+ split: test
1851
+ type: mteb/twentynewsgroups-clustering
1852
+ metrics:
1853
+ - type: main_score
1854
+ value: 64.82048869927482
1855
+ - type: v_measure
1856
+ value: 64.82048869927482
1857
+ - type: v_measure_std
1858
+ value: 0.9170394252450564
1859
+ task:
1860
+ type: Clustering
1861
+ - dataset:
1862
+ config: default
1863
+ name: MTEB TwitterSemEval2015
1864
+ revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
1865
+ split: test
1866
+ type: mteb/twittersemeval2015-pairclassification
1867
+ metrics:
1868
+ - type: cos_sim_accuracy
1869
+ value: 88.44251057996067
1870
+ - type: cos_sim_accuracy_threshold
1871
+ value: 70.2150285243988
1872
+ - type: cos_sim_ap
1873
+ value: 81.11422351199913
1874
+ - type: cos_sim_f1
1875
+ value: 73.71062868615887
1876
+ - type: cos_sim_f1_threshold
1877
+ value: 66.507488489151
1878
+ - type: cos_sim_precision
1879
+ value: 70.2799712849964
1880
+ - type: cos_sim_recall
1881
+ value: 77.4934036939314
1882
+ - type: dot_accuracy
1883
+ value: 88.44251057996067
1884
+ - type: dot_accuracy_threshold
1885
+ value: 70.2150285243988
1886
+ - type: dot_ap
1887
+ value: 81.11420529068658
1888
+ - type: dot_f1
1889
+ value: 73.71062868615887
1890
+ - type: dot_f1_threshold
1891
+ value: 66.50749444961548
1892
+ - type: dot_precision
1893
+ value: 70.2799712849964
1894
+ - type: dot_recall
1895
+ value: 77.4934036939314
1896
+ - type: euclidean_accuracy
1897
+ value: 88.44251057996067
1898
+ - type: euclidean_accuracy_threshold
1899
+ value: 77.18156576156616
1900
+ - type: euclidean_ap
1901
+ value: 81.11422421732487
1902
+ - type: euclidean_f1
1903
+ value: 73.71062868615887
1904
+ - type: euclidean_f1_threshold
1905
+ value: 81.84436559677124
1906
+ - type: euclidean_precision
1907
+ value: 70.2799712849964
1908
+ - type: euclidean_recall
1909
+ value: 77.4934036939314
1910
+ - type: manhattan_accuracy
1911
+ value: 88.26369434344639
1912
+ - type: manhattan_accuracy_threshold
1913
+ value: 3837.067413330078
1914
+ - type: manhattan_ap
1915
+ value: 80.81442360477725
1916
+ - type: manhattan_f1
1917
+ value: 73.39883099117024
1918
+ - type: manhattan_f1_threshold
1919
+ value: 4098.833847045898
1920
+ - type: manhattan_precision
1921
+ value: 69.41896024464832
1922
+ - type: manhattan_recall
1923
+ value: 77.86279683377309
1924
+ - type: max_accuracy
1925
+ value: 88.44251057996067
1926
+ - type: max_ap
1927
+ value: 81.11422421732487
1928
+ - type: max_f1
1929
+ value: 73.71062868615887
1930
+ task:
1931
+ type: PairClassification
1932
+ - dataset:
1933
+ config: default
1934
+ name: MTEB TwitterURLCorpus
1935
+ revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
1936
+ split: test
1937
+ type: mteb/twitterurlcorpus-pairclassification
1938
+ metrics:
1939
+ - type: cos_sim_accuracy
1940
+ value: 90.03182365040556
1941
+ - type: cos_sim_accuracy_threshold
1942
+ value: 64.46443796157837
1943
+ - type: cos_sim_ap
1944
+ value: 87.86649113691112
1945
+ - type: cos_sim_f1
1946
+ value: 80.45644844577821
1947
+ - type: cos_sim_f1_threshold
1948
+ value: 61.40774488449097
1949
+ - type: cos_sim_precision
1950
+ value: 77.54052702992216
1951
+ - type: cos_sim_recall
1952
+ value: 83.60024638127503
1953
+ - type: dot_accuracy
1954
+ value: 90.03182365040556
1955
+ - type: dot_accuracy_threshold
1956
+ value: 64.46444988250732
1957
+ - type: dot_ap
1958
+ value: 87.86649011954319
1959
+ - type: dot_f1
1960
+ value: 80.45644844577821
1961
+ - type: dot_f1_threshold
1962
+ value: 61.407750844955444
1963
+ - type: dot_precision
1964
+ value: 77.54052702992216
1965
+ - type: dot_recall
1966
+ value: 83.60024638127503
1967
+ - type: euclidean_accuracy
1968
+ value: 90.03182365040556
1969
+ - type: euclidean_accuracy_threshold
1970
+ value: 84.30368900299072
1971
+ - type: euclidean_ap
1972
+ value: 87.86649114275045
1973
+ - type: euclidean_f1
1974
+ value: 80.45644844577821
1975
+ - type: euclidean_f1_threshold
1976
+ value: 87.8547191619873
1977
+ - type: euclidean_precision
1978
+ value: 77.54052702992216
1979
+ - type: euclidean_recall
1980
+ value: 83.60024638127503
1981
+ - type: manhattan_accuracy
1982
+ value: 89.99883572010712
1983
+ - type: manhattan_accuracy_threshold
1984
+ value: 4206.838607788086
1985
+ - type: manhattan_ap
1986
+ value: 87.8600826607838
1987
+ - type: manhattan_f1
1988
+ value: 80.44054508120217
1989
+ - type: manhattan_f1_threshold
1990
+ value: 4372.755432128906
1991
+ - type: manhattan_precision
1992
+ value: 78.08219178082192
1993
+ - type: manhattan_recall
1994
+ value: 82.94579611949491
1995
+ - type: max_accuracy
1996
+ value: 90.03182365040556
1997
+ - type: max_ap
1998
+ value: 87.86649114275045
1999
+ - type: max_f1
2000
+ value: 80.45644844577821
2001
+ task:
2002
+ type: PairClassification
2003
+ language:
2004
+ - en
2005
+ license: cc-by-nc-4.0
2006
+ ---
2007
+ ## Introduction
2008
+ We present NV-Embed-v2, a generalist embedding model that ranks No. 1 on the Massive Text Embedding Benchmark ([MTEB benchmark](https://huggingface.co/spaces/mteb/leaderboard))(as of Aug 30, 2024) with a score of 72.31 across 56 text embedding tasks. It also holds the No. 1 in the retrieval sub-category (a score of 62.65 across 15 tasks) in the leaderboard, which is essential to the development of RAG technology.
2009
+
2010
+ NV-Embed-v2 presents several new designs, including having the LLM attend to latent vectors for better pooled embedding output, and demonstrating a two-staged instruction tuning method to enhance the accuracy of both retrieval and non-retrieval tasks. Additionally, NV-Embed-v2 incorporates a novel hard-negative mining methods that take into account the positive relevance score for better false negatives removal.
2011
+
2012
+ For more technical details, refer to our paper: [NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models](https://arxiv.org/pdf/2405.17428).
2013
+
2014
+ ## Model Details
2015
+ - Base Decoder-only LLM: [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
2016
+ - Pooling Type: Latent-Attention
2017
+ - Embedding Dimension: 4096
2018
+
2019
+ ## How to use
2020
+
2021
+ Here is an example of how to encode queries and passages using Huggingface-transformer and Sentence-transformer. Please find the required package version [here](https://huggingface.co/nvidia/NV-Embed-v2#2-required-packages).
2022
+
2023
+ ### Usage (HuggingFace Transformers)
2024
+
2025
+ ```python
2026
+ import torch
2027
+ import torch.nn.functional as F
2028
+ from transformers import AutoTokenizer, AutoModel
2029
+
2030
+ # Each query needs to be accompanied by an corresponding instruction describing the task.
2031
+ task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
2032
+
2033
+ query_prefix = "Instruct: "+task_name_to_instruct["example"]+"\nQuery: "
2034
+ queries = [
2035
+ 'are judo throws allowed in wrestling?',
2036
+ 'how to become a radiology technician in michigan?'
2037
+ ]
2038
+
2039
+ # No instruction needed for retrieval passages
2040
+ passage_prefix = ""
2041
+ passages = [
2042
+ "Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
2043
+ "Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan."
2044
+ ]
2045
+
2046
+ # load model with tokenizer
2047
+ model = AutoModel.from_pretrained('nvidia/NV-Embed-v2', trust_remote_code=True)
2048
+
2049
+ # get the embeddings
2050
+ max_length = 32768
2051
+ query_embeddings = model.encode(queries, instruction=query_prefix, max_length=max_length)
2052
+ passage_embeddings = model.encode(passages, instruction=passage_prefix, max_length=max_length)
2053
+
2054
+ # normalize embeddings
2055
+ query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
2056
+ passage_embeddings = F.normalize(passage_embeddings, p=2, dim=1)
2057
+
2058
+ # get the embeddings with DataLoader (spliting the datasets into multiple mini-batches)
2059
+ # batch_size=2
2060
+ # query_embeddings = model._do_encode(queries, batch_size=batch_size, instruction=query_prefix, max_length=max_length, num_workers=32, return_numpy=True)
2061
+ # passage_embeddings = model._do_encode(passages, batch_size=batch_size, instruction=passage_prefix, max_length=max_length, num_workers=32, return_numpy=True)
2062
+
2063
+ scores = (query_embeddings @ passage_embeddings.T) * 100
2064
+ print(scores.tolist())
2065
+ # [[87.42693328857422, 0.46283677220344543], [0.965264618396759, 86.03721618652344]]
2066
+ ```
2067
+
2068
+
2069
+ ### Usage (Sentence-Transformers)
2070
+
2071
+ ```python
2072
+ import torch
2073
+ from sentence_transformers import SentenceTransformer
2074
+
2075
+ # Each query needs to be accompanied by an corresponding instruction describing the task.
2076
+ task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
2077
+
2078
+ query_prefix = "Instruct: "+task_name_to_instruct["example"]+"\nQuery: "
2079
+ queries = [
2080
+ 'are judo throws allowed in wrestling?',
2081
+ 'how to become a radiology technician in michigan?'
2082
+ ]
2083
+
2084
+ # No instruction needed for retrieval passages
2085
+ passages = [
2086
+ "Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
2087
+ "Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan."
2088
+ ]
2089
+
2090
+ # load model with tokenizer
2091
+ model = SentenceTransformer('nvidia/NV-Embed-v2', trust_remote_code=True)
2092
+ model.max_seq_length = 32768
2093
+ model.tokenizer.padding_side="right"
2094
+
2095
+ def add_eos(input_examples):
2096
+ input_examples = [input_example + model.tokenizer.eos_token for input_example in input_examples]
2097
+ return input_examples
2098
+
2099
+ # get the embeddings
2100
+ batch_size = 2
2101
+ query_embeddings = model.encode(add_eos(queries), batch_size=batch_size, prompt=query_prefix, normalize_embeddings=True)
2102
+ passage_embeddings = model.encode(add_eos(passages), batch_size=batch_size, normalize_embeddings=True)
2103
+
2104
+ scores = (query_embeddings @ passage_embeddings.T) * 100
2105
+ print(scores.tolist())
2106
+ ```
2107
+
2108
+ ## License
2109
+ This model should not be used for any commercial purpose. Refer the [license](https://spdx.org/licenses/CC-BY-NC-4.0) for the detailed terms.
2110
+
2111
+ For commercial purpose, we recommend you to use the models of [NeMo Retriever Microservices (NIMs)](https://build.nvidia.com/explore/retrieval).
2112
+
2113
+
2114
+ ## Correspondence to
2115
+ Chankyu Lee ([email protected]), Wei Ping ([email protected])
2116
+
2117
+
2118
+ ## Citation
2119
+ If you find this code useful in your research, please consider citing:
2120
+
2121
+ ```bibtex
2122
+ @article{lee2024nv,
2123
+ title={NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models},
2124
+ author={Lee, Chankyu and Roy, Rajarshi and Xu, Mengyao and Raiman, Jonathan and Shoeybi, Mohammad and Catanzaro, Bryan and Ping, Wei},
2125
+ journal={arXiv preprint arXiv:2405.17428},
2126
+ year={2024}
2127
+ }
2128
+ ```
2129
+ ```bibtex
2130
+ @article{moreira2024nv,
2131
+ title={NV-Retriever: Improving text embedding models with effective hard-negative mining},
2132
+ author={Moreira, Gabriel de Souza P and Osmulski, Radek and Xu, Mengyao and Ak, Ronay and Schifferer, Benedikt and Oldridge, Even},
2133
+ journal={arXiv preprint arXiv:2407.15831},
2134
+ year={2024}
2135
+ }
2136
+ ```
2137
+
2138
+
2139
+ ## Troubleshooting
2140
+
2141
+ #### 1. Instruction template for MTEB benchmarks
2142
+
2143
+ For MTEB sub-tasks for retrieval, STS, summarization, please use the instruction prefix template in [instructions.json](https://huggingface.co/nvidia/NV-Embed-v2/blob/main/instructions.json). For classification, clustering and reranking, please use the instructions provided in Table. 7 in [NV-Embed paper](https://arxiv.org/pdf/2405.17428).
2144
+
2145
+ #### 2. Required Packages
2146
+
2147
+ If you have trouble, try installing the python packages as below
2148
+ ```python
2149
+ pip uninstall -y transformer-engine
2150
+ pip install torch==2.2.0
2151
+ pip install transformers==4.42.4
2152
+ pip install flash-attn==2.2.0
2153
+ pip install sentence-transformers==2.7.0
2154
+ ```
2155
+
2156
+ #### 3. How to enable Multi-GPU (Note, this is the case for HuggingFace Transformers)
2157
+ ```python
2158
+ from transformers import AutoModel
2159
+ from torch.nn import DataParallel
2160
+
2161
+ embedding_model = AutoModel.from_pretrained("nvidia/NV-Embed-v2")
2162
+ for module_key, module in embedding_model._modules.items():
2163
+ embedding_model._modules[module_key] = DataParallel(module)
2164
+ ```
2165
+
2166
+ #### 4. Fixing "nvidia/NV-Embed-v2 is not the path to a directory containing a file named config.json"
2167
+
2168
+ Switch to your local model path,and open config.json and change the value of **"_name_or_path"** and replace it with your local model path.
2169
+
2170
+
2171
+ #### 5. Access to model nvidia/NV-Embed-v2 is restricted. You must be authenticated to access it
2172
+
2173
+ Use your huggingface access [token](https://huggingface.co/settings/tokens) to execute *"huggingface-cli login"*.
2174
+
2175
+ #### 6. How to resolve slight mismatch in Sentence transformer results.
2176
+
2177
+ A slight mismatch in the Sentence Transformer implementation is caused by a discrepancy in the calculation of the instruction prefix length within the Sentence Transformer package.
2178
+
2179
+ To fix this issue, you need to build the Sentence Transformer package from source, making the necessary modification in this [line](https://github.com/UKPLab/sentence-transformers/blob/v2.7-release/sentence_transformers/SentenceTransformer.py#L353) as below.
2180
+ ```python
2181
+ git clone https://github.com/UKPLab/sentence-transformers.git
2182
+ cd sentence-transformers
2183
+ git checkout v2.7-release
2184
+ # Modify L353 in SentenceTransformer.py to **'extra_features["prompt_length"] = tokenized_prompt["input_ids"].shape[-1]'**.
2185
+ pip install -e .
2186
+ ```
config.json ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "nvidia/NV-Embed-v2",
3
+ "add_eos": true,
4
+ "add_pad_token": true,
5
+ "architectures": [
6
+ "NVEmbedModel"
7
+ ],
8
+ "auto_map": {
9
+ "AutoConfig": "configuration_nvembed.NVEmbedConfig",
10
+ "AutoModel": "modeling_nvembed.NVEmbedModel"
11
+ },
12
+ "hidden_size": 4096,
13
+ "is_mask_instruction": true,
14
+ "latent_attention_config": {
15
+ "model_type": "latent_attention"
16
+ },
17
+ "mask_type": "b",
18
+ "model_type": "nvembed",
19
+ "padding_side": "right",
20
+ "text_config": {
21
+ "_name_or_path": "nvidia/NV-Embed-v2",
22
+ "add_cross_attention": false,
23
+ "architectures": [
24
+ "MistralModel"
25
+ ],
26
+ "attention_dropout": 0.0,
27
+ "bad_words_ids": null,
28
+ "begin_suppress_tokens": null,
29
+ "bos_token_id": 1,
30
+ "chunk_size_feed_forward": 0,
31
+ "cross_attention_hidden_size": null,
32
+ "decoder_start_token_id": null,
33
+ "diversity_penalty": 0.0,
34
+ "do_sample": false,
35
+ "early_stopping": false,
36
+ "encoder_no_repeat_ngram_size": 0,
37
+ "eos_token_id": 2,
38
+ "exponential_decay_length_penalty": null,
39
+ "finetuning_task": null,
40
+ "forced_bos_token_id": null,
41
+ "forced_eos_token_id": null,
42
+ "hidden_act": "silu",
43
+ "hidden_size": 4096,
44
+ "id2label": {
45
+ "0": "LABEL_0",
46
+ "1": "LABEL_1"
47
+ },
48
+ "initializer_range": 0.02,
49
+ "intermediate_size": 14336,
50
+ "is_decoder": false,
51
+ "is_encoder_decoder": false,
52
+ "label2id": {
53
+ "LABEL_0": 0,
54
+ "LABEL_1": 1
55
+ },
56
+ "length_penalty": 1.0,
57
+ "max_length": 20,
58
+ "max_position_embeddings": 32768,
59
+ "min_length": 0,
60
+ "model_type": "bidir_mistral",
61
+ "no_repeat_ngram_size": 0,
62
+ "num_attention_heads": 32,
63
+ "num_beam_groups": 1,
64
+ "num_beams": 1,
65
+ "num_hidden_layers": 32,
66
+ "num_key_value_heads": 8,
67
+ "num_return_sequences": 1,
68
+ "output_attentions": false,
69
+ "output_hidden_states": false,
70
+ "output_scores": false,
71
+ "pad_token_id": null,
72
+ "prefix": null,
73
+ "problem_type": null,
74
+ "pruned_heads": {},
75
+ "remove_invalid_values": false,
76
+ "repetition_penalty": 1.0,
77
+ "return_dict": true,
78
+ "return_dict_in_generate": false,
79
+ "rms_norm_eps": 1e-05,
80
+ "rope_theta": 10000.0,
81
+ "sep_token_id": null,
82
+ "sliding_window": 4096,
83
+ "suppress_tokens": null,
84
+ "task_specific_params": null,
85
+ "temperature": 1.0,
86
+ "tf_legacy_loss": false,
87
+ "tie_encoder_decoder": false,
88
+ "tie_word_embeddings": false,
89
+ "tokenizer_class": null,
90
+ "top_k": 50,
91
+ "top_p": 1.0,
92
+ "torch_dtype": "float32",
93
+ "torchscript": false,
94
+ "typical_p": 1.0,
95
+ "use_bfloat16": false,
96
+ "use_cache": true,
97
+ "vocab_size": 32000
98
+ },
99
+ "torch_dtype": "float16",
100
+ "transformers_version": "4.42.4"
101
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.7.0",
4
+ "transformers": "4.37.2",
5
+ "pytorch": "2.2.0+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null
9
+ }
configuration_nvembed.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from typing import Literal
3
+ from transformers import AutoConfig
4
+ from transformers.configuration_utils import PretrainedConfig
5
+ from transformers.models.auto import CONFIG_MAPPING
6
+ from transformers.models.mistral import MistralConfig
7
+
8
+ NVEMBED_TYPE = "nvembed"
9
+ LATENT_ATTENTION_TYPE = "latent_attention"
10
+ BIDIR_MISTRAL_TYPE = "bidir_mistral"
11
+
12
+ class NVEmbedConfig(PretrainedConfig):
13
+ model_type = "nvembed"
14
+ is_composition = False
15
+
16
+ def __init__(
17
+ self,
18
+ latent_attention_config=None,
19
+ text_config=None,
20
+ padding_side: Literal["right", "left"]="right",
21
+ add_pad_token: bool=True,
22
+ is_mask_instruction: bool = True,
23
+ add_eos: bool=True,
24
+ mask_type: str="b",
25
+ **kwargs,
26
+ ):
27
+ if isinstance(latent_attention_config, dict):
28
+ latent_attention_config["model_type"] = (
29
+ latent_attention_config["model_type"] if "model_type" in latent_attention_config else LATENT_ATTENTION_TYPE
30
+ )
31
+ latent_attention_config = CONFIG_MAPPING[latent_attention_config["model_type"]](**latent_attention_config)
32
+ elif latent_attention_config is None:
33
+ latent_attention_config = CONFIG_MAPPING[LATENT_ATTENTION_TYPE]()
34
+
35
+ self.latent_attention_config = latent_attention_config
36
+
37
+ if isinstance(text_config, dict):
38
+ text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama"
39
+ text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
40
+ elif text_config is None:
41
+ text_config = None
42
+
43
+ self.text_config = text_config
44
+ self.padding_side = padding_side
45
+ self.is_mask_instruction = is_mask_instruction
46
+ self.add_pad_token = add_pad_token
47
+ self.add_eos = add_eos
48
+ self.mask_type = mask_type
49
+ if "hidden_size" in kwargs:
50
+ self.hidden_size = kwargs["hidden_size"]
51
+ else:
52
+ self.hidden_size = 4096
53
+
54
+ super().__init__(**kwargs)
55
+
56
+
57
+ class LatentAttentionConfig(PretrainedConfig):
58
+ model_type = LATENT_ATTENTION_TYPE
59
+ is_composition = False
60
+ _name_or_path = "latent_attention"
61
+
62
+ def __init__(
63
+ self,
64
+ num_latents_value: int=512,
65
+ num_cross_heads: int=8,
66
+ output_normalize: bool=True,
67
+ hidden_dim: int=4096,
68
+ latent_dim: int=4096,
69
+ cross_dim_head: int=4096,
70
+ **kwargs,
71
+ ):
72
+ self.num_latents_value = num_latents_value
73
+ self.num_cross_heads = num_cross_heads
74
+ self.output_normalize = output_normalize
75
+ self.hidden_dim = hidden_dim
76
+ self.latent_dim = latent_dim
77
+ self.cross_dim_head = cross_dim_head
78
+
79
+
80
+ class BidirectionalMistralConfig(MistralConfig):
81
+ model_type = BIDIR_MISTRAL_TYPE
82
+ keys_to_ignore_at_inference = ["past_key_values"]
83
+
84
+ AutoConfig.register(NVEMBED_TYPE, NVEmbedConfig)
85
+ AutoConfig.register(LATENT_ATTENTION_TYPE, LatentAttentionConfig)
86
+ AutoConfig.register(BIDIR_MISTRAL_TYPE, BidirectionalMistralConfig)
87
+
88
+ NVEmbedConfig.register_for_auto_class()
89
+ LatentAttentionConfig.register_for_auto_class()
90
+ BidirectionalMistralConfig.register_for_auto_class()
instructions.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ClimateFEVER":
3
+ {
4
+ "query": "Given a claim about climate change, retrieve documents that support or refute the claim",
5
+ "corpus": ""
6
+ },
7
+ "HotpotQA":
8
+ {
9
+ "query": "Given a multi-hop question, retrieve documents that can help answer the question",
10
+ "corpus": ""
11
+ },
12
+ "FEVER":
13
+ {
14
+ "query": "Given a claim, retrieve documents that support or refute the claim",
15
+ "corpus": ""
16
+ },
17
+ "MSMARCO":
18
+ {
19
+ "query": "Given a web search query, retrieve relevant passages that answer the query",
20
+ "corpus": ""
21
+ },
22
+ "DBPedia":
23
+ {
24
+ "query": "Given a query, retrieve relevant entity descriptions from DBPedia",
25
+ "corpus": ""
26
+ },
27
+ "NQ":
28
+ {
29
+ "query": "Given a question, retrieve passages that answer the question",
30
+ "corpus": ""
31
+ },
32
+ "QuoraRetrieval":
33
+ {
34
+ "query": "Given a question, retrieve questions that are semantically equivalent to the given question",
35
+ "corpus": "Given a question, retrieve questions that are semantically equivalent to the given question"
36
+ },
37
+ "SCIDOCS":
38
+ {
39
+ "query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper",
40
+ "corpus": ""
41
+ },
42
+ "TRECCOVID":
43
+ {
44
+ "query": "Given a query on COVID-19, retrieve documents that answer the query",
45
+ "corpus": ""
46
+ },
47
+ "Touche2020":
48
+ {
49
+ "query": "Given a question, retrieve passages that answer the question",
50
+ "corpus": ""
51
+ },
52
+ "SciFact":
53
+ {
54
+ "query": "Given a scientific claim, retrieve documents that support or refute the claim",
55
+ "corpus": ""
56
+ },
57
+ "NFCorpus":
58
+ {
59
+ "query": "Given a question, retrieve relevant documents that answer the question",
60
+ "corpus": ""
61
+ },
62
+ "ArguAna":
63
+ {
64
+ "query": "Given a claim, retrieve documents that support or refute the claim",
65
+ "corpus": ""
66
+ },
67
+ "FiQA2018":
68
+ {
69
+ "query": "Given a financial question, retrieve relevant passages that answer the query",
70
+ "corpus": ""
71
+ },
72
+ "STS":
73
+ {
74
+ "text": "Retrieve semantically similar text"
75
+ },
76
+ "SUMM":
77
+ {
78
+ "text": "Given a news summary, retrieve other semantically similar summaries"
79
+ }
80
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ce5651268058d961eaeabd4f65a5cb5d003ac7e0e34b7095658b5d5a4802f6a
3
+ size 4997761248
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbd7e85b57afbc74fab67e50a572590ce57dde8b5fa76fe7527c42189074d57d
3
+ size 4915917048
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87c90f033107075c9531ed8163d4b087ce77e63596c8510821da15a4d892a85c
3
+ size 4999820296
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ff251c6b33ed89101915eb82a92575fd7d7daf9db953205f3bb4b982c4c3f5
3
+ size 788571960
model.safetensors.index.json ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15702032384
4
+ },
5
+ "weight_map": {
6
+ "embedding_model.embed_tokens.weight": "model-00001-of-00004.safetensors",
7
+ "embedding_model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
8
+ "embedding_model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
9
+ "embedding_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
10
+ "embedding_model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
11
+ "embedding_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
12
+ "embedding_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
13
+ "embedding_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
14
+ "embedding_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
15
+ "embedding_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
16
+ "embedding_model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
17
+ "embedding_model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
18
+ "embedding_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
19
+ "embedding_model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
20
+ "embedding_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "embedding_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
22
+ "embedding_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
23
+ "embedding_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
24
+ "embedding_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
25
+ "embedding_model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
26
+ "embedding_model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
27
+ "embedding_model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
28
+ "embedding_model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
29
+ "embedding_model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
30
+ "embedding_model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
31
+ "embedding_model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
32
+ "embedding_model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
33
+ "embedding_model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
34
+ "embedding_model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
35
+ "embedding_model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
36
+ "embedding_model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
37
+ "embedding_model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
38
+ "embedding_model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
39
+ "embedding_model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
40
+ "embedding_model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
41
+ "embedding_model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
42
+ "embedding_model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
43
+ "embedding_model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
44
+ "embedding_model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
45
+ "embedding_model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
46
+ "embedding_model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
47
+ "embedding_model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
48
+ "embedding_model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
49
+ "embedding_model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
50
+ "embedding_model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
51
+ "embedding_model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
52
+ "embedding_model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
53
+ "embedding_model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
54
+ "embedding_model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
55
+ "embedding_model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
56
+ "embedding_model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
57
+ "embedding_model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
58
+ "embedding_model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
59
+ "embedding_model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
60
+ "embedding_model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
61
+ "embedding_model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
62
+ "embedding_model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
63
+ "embedding_model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
64
+ "embedding_model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
65
+ "embedding_model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
66
+ "embedding_model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
67
+ "embedding_model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
68
+ "embedding_model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
69
+ "embedding_model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
70
+ "embedding_model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
71
+ "embedding_model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
72
+ "embedding_model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
73
+ "embedding_model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
74
+ "embedding_model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
75
+ "embedding_model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
76
+ "embedding_model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
77
+ "embedding_model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
78
+ "embedding_model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
79
+ "embedding_model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
80
+ "embedding_model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
81
+ "embedding_model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
82
+ "embedding_model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
83
+ "embedding_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
84
+ "embedding_model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
85
+ "embedding_model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
86
+ "embedding_model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
87
+ "embedding_model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
88
+ "embedding_model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
89
+ "embedding_model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
90
+ "embedding_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
91
+ "embedding_model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
92
+ "embedding_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
93
+ "embedding_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
94
+ "embedding_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
95
+ "embedding_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
96
+ "embedding_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
97
+ "embedding_model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
98
+ "embedding_model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
99
+ "embedding_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
100
+ "embedding_model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
101
+ "embedding_model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
102
+ "embedding_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
103
+ "embedding_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
104
+ "embedding_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
105
+ "embedding_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
106
+ "embedding_model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
107
+ "embedding_model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
108
+ "embedding_model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
109
+ "embedding_model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
110
+ "embedding_model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
111
+ "embedding_model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
112
+ "embedding_model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
113
+ "embedding_model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
114
+ "embedding_model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
115
+ "embedding_model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
116
+ "embedding_model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
117
+ "embedding_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
118
+ "embedding_model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
119
+ "embedding_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
120
+ "embedding_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
121
+ "embedding_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
122
+ "embedding_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
123
+ "embedding_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
124
+ "embedding_model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
125
+ "embedding_model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
126
+ "embedding_model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
127
+ "embedding_model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
128
+ "embedding_model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
129
+ "embedding_model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
130
+ "embedding_model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
131
+ "embedding_model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
132
+ "embedding_model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
133
+ "embedding_model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
134
+ "embedding_model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
135
+ "embedding_model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
136
+ "embedding_model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
137
+ "embedding_model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
138
+ "embedding_model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
139
+ "embedding_model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
140
+ "embedding_model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
141
+ "embedding_model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
142
+ "embedding_model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
143
+ "embedding_model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
144
+ "embedding_model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
145
+ "embedding_model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
146
+ "embedding_model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
147
+ "embedding_model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
148
+ "embedding_model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
149
+ "embedding_model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
150
+ "embedding_model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
151
+ "embedding_model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
152
+ "embedding_model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
153
+ "embedding_model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
154
+ "embedding_model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
155
+ "embedding_model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
156
+ "embedding_model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
157
+ "embedding_model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
158
+ "embedding_model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
159
+ "embedding_model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
160
+ "embedding_model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
161
+ "embedding_model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
162
+ "embedding_model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
163
+ "embedding_model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
164
+ "embedding_model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
165
+ "embedding_model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
166
+ "embedding_model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
167
+ "embedding_model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
168
+ "embedding_model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
169
+ "embedding_model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
170
+ "embedding_model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
171
+ "embedding_model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
172
+ "embedding_model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
173
+ "embedding_model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
174
+ "embedding_model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
175
+ "embedding_model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
176
+ "embedding_model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
177
+ "embedding_model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
178
+ "embedding_model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
179
+ "embedding_model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
180
+ "embedding_model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
181
+ "embedding_model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
182
+ "embedding_model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
183
+ "embedding_model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
184
+ "embedding_model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
185
+ "embedding_model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
186
+ "embedding_model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
187
+ "embedding_model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
188
+ "embedding_model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
189
+ "embedding_model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
190
+ "embedding_model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
191
+ "embedding_model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
192
+ "embedding_model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
193
+ "embedding_model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
194
+ "embedding_model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
195
+ "embedding_model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
196
+ "embedding_model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
197
+ "embedding_model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
198
+ "embedding_model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
199
+ "embedding_model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
200
+ "embedding_model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "embedding_model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
202
+ "embedding_model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
203
+ "embedding_model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
204
+ "embedding_model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
205
+ "embedding_model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
206
+ "embedding_model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
207
+ "embedding_model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
208
+ "embedding_model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
209
+ "embedding_model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
210
+ "embedding_model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
211
+ "embedding_model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
212
+ "embedding_model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
213
+ "embedding_model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
214
+ "embedding_model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
215
+ "embedding_model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
216
+ "embedding_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
217
+ "embedding_model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
218
+ "embedding_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
219
+ "embedding_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
220
+ "embedding_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
221
+ "embedding_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
222
+ "embedding_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
223
+ "embedding_model.layers.30.input_layernorm.weight": "model-00004-of-00004.safetensors",
224
+ "embedding_model.layers.30.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
225
+ "embedding_model.layers.30.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
226
+ "embedding_model.layers.30.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
227
+ "embedding_model.layers.30.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
228
+ "embedding_model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
229
+ "embedding_model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
230
+ "embedding_model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
231
+ "embedding_model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
232
+ "embedding_model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
233
+ "embedding_model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
234
+ "embedding_model.layers.31.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
235
+ "embedding_model.layers.31.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
236
+ "embedding_model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
237
+ "embedding_model.layers.31.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
238
+ "embedding_model.layers.31.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
239
+ "embedding_model.layers.31.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
240
+ "embedding_model.layers.31.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
241
+ "embedding_model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
242
+ "embedding_model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
243
+ "embedding_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
244
+ "embedding_model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
245
+ "embedding_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
246
+ "embedding_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
247
+ "embedding_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
248
+ "embedding_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
249
+ "embedding_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
250
+ "embedding_model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
251
+ "embedding_model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
252
+ "embedding_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
253
+ "embedding_model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
254
+ "embedding_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
255
+ "embedding_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
256
+ "embedding_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
257
+ "embedding_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
258
+ "embedding_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
259
+ "embedding_model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
260
+ "embedding_model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
261
+ "embedding_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
262
+ "embedding_model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
263
+ "embedding_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
264
+ "embedding_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
265
+ "embedding_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
266
+ "embedding_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
267
+ "embedding_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
268
+ "embedding_model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
269
+ "embedding_model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
270
+ "embedding_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
271
+ "embedding_model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
272
+ "embedding_model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
273
+ "embedding_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
274
+ "embedding_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
275
+ "embedding_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
276
+ "embedding_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
277
+ "embedding_model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
278
+ "embedding_model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
279
+ "embedding_model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
280
+ "embedding_model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
281
+ "embedding_model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
282
+ "embedding_model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
283
+ "embedding_model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
284
+ "embedding_model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
285
+ "embedding_model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
286
+ "embedding_model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
287
+ "embedding_model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
288
+ "embedding_model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
289
+ "embedding_model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
290
+ "embedding_model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
291
+ "embedding_model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
292
+ "embedding_model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
293
+ "embedding_model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
294
+ "embedding_model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
295
+ "embedding_model.norm.weight": "model-00004-of-00004.safetensors",
296
+ "latent_attention_model.cross_attend_blocks.0.fn.to_kv.weight": "model-00001-of-00004.safetensors",
297
+ "latent_attention_model.cross_attend_blocks.0.fn.to_out.weight": "model-00001-of-00004.safetensors",
298
+ "latent_attention_model.cross_attend_blocks.0.fn.to_q.weight": "model-00001-of-00004.safetensors",
299
+ "latent_attention_model.cross_attend_blocks.0.norm.bias": "model-00001-of-00004.safetensors",
300
+ "latent_attention_model.cross_attend_blocks.0.norm.weight": "model-00001-of-00004.safetensors",
301
+ "latent_attention_model.cross_attend_blocks.0.norm_context.bias": "model-00001-of-00004.safetensors",
302
+ "latent_attention_model.cross_attend_blocks.0.norm_context.weight": "model-00001-of-00004.safetensors",
303
+ "latent_attention_model.cross_attend_blocks.1.fn.net.0.bias": "model-00001-of-00004.safetensors",
304
+ "latent_attention_model.cross_attend_blocks.1.fn.net.0.weight": "model-00001-of-00004.safetensors",
305
+ "latent_attention_model.cross_attend_blocks.1.fn.net.2.bias": "model-00001-of-00004.safetensors",
306
+ "latent_attention_model.cross_attend_blocks.1.fn.net.2.weight": "model-00001-of-00004.safetensors",
307
+ "latent_attention_model.cross_attend_blocks.1.norm.bias": "model-00001-of-00004.safetensors",
308
+ "latent_attention_model.cross_attend_blocks.1.norm.weight": "model-00001-of-00004.safetensors",
309
+ "latent_attention_model.latents": "model-00001-of-00004.safetensors"
310
+ }
311
+ }
modeling_nvembed.py ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Union, Dict, Mapping, Optional, Tuple, TypedDict
2
+ import torch
3
+ import os
4
+ import json
5
+ import numpy as np
6
+ from functools import partial
7
+ from contextlib import nullcontext
8
+ from transformers import AutoModel, PreTrainedTokenizerFast, BatchEncoding, DataCollatorWithPadding
9
+ from transformers.modeling_utils import PreTrainedModel
10
+ from transformers.models.auto import AutoTokenizer
11
+ from transformers.models.mistral.modeling_mistral import MISTRAL_INPUTS_DOCSTRING
12
+ from transformers.modeling_outputs import BaseModelOutputWithPast
13
+ from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask, _prepare_4d_attention_mask_for_sdpa
14
+ from transformers import MistralModel, MistralConfig
15
+ from transformers.cache_utils import Cache, DynamicCache
16
+ from transformers.utils import (
17
+ add_start_docstrings_to_model_forward,
18
+ logging,
19
+ )
20
+ from einops import rearrange, repeat
21
+ from tqdm.auto import tqdm
22
+ from datasets import Dataset
23
+ from torch.utils.data import DataLoader
24
+ from .configuration_nvembed import NVEmbedConfig, LatentAttentionConfig, BidirectionalMistralConfig
25
+
26
+ logger = logging.get_logger(__name__)
27
+
28
+ class NVEmbedFeatures(TypedDict):
29
+ input_dict: torch.Tensor
30
+ attention_mask: torch.Tensor
31
+ pool_mask: torch.Tensor
32
+
33
+ class BidirectionalMistralModel(MistralModel):
34
+ config_class = BidirectionalMistralConfig
35
+
36
+ def __init__(self, config: MistralConfig):
37
+ super().__init__(config)
38
+ for layer in self.layers:
39
+ layer.self_attn.is_causal = False
40
+ self._attn_implementation = "eager"
41
+
42
+ @add_start_docstrings_to_model_forward(MISTRAL_INPUTS_DOCSTRING)
43
+ def forward(
44
+ self,
45
+ input_ids: torch.LongTensor = None,
46
+ attention_mask: Optional[torch.Tensor] = None,
47
+ position_ids: Optional[torch.LongTensor] = None,
48
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
49
+ inputs_embeds: Optional[torch.FloatTensor] = None,
50
+ use_cache: Optional[bool] = None,
51
+ output_attentions: Optional[bool] = None,
52
+ output_hidden_states: Optional[bool] = None,
53
+ return_dict: Optional[bool] = None,
54
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
55
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
56
+ output_hidden_states = (
57
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
58
+ )
59
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
60
+
61
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
62
+
63
+ # retrieve input_ids and inputs_embeds
64
+ if input_ids is not None and inputs_embeds is not None:
65
+ raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
66
+ elif input_ids is not None:
67
+ batch_size, seq_length = input_ids.shape
68
+ elif inputs_embeds is not None:
69
+ batch_size, seq_length, _ = inputs_embeds.shape
70
+ else:
71
+ raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
72
+
73
+ if self.gradient_checkpointing and self.training:
74
+ if use_cache:
75
+ logger.warning_once(
76
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
77
+ )
78
+ use_cache = False
79
+
80
+ past_key_values_length = 0
81
+
82
+ if use_cache:
83
+ use_legacy_cache = not isinstance(past_key_values, Cache)
84
+ if use_legacy_cache:
85
+ past_key_values = DynamicCache.from_legacy_cache(past_key_values)
86
+ past_key_values_length = past_key_values.get_usable_length(seq_length)
87
+
88
+ if position_ids is None:
89
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
90
+ position_ids = torch.arange(
91
+ past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
92
+ )
93
+ position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
94
+ else:
95
+ position_ids = position_ids.view(-1, seq_length).long()
96
+
97
+ if inputs_embeds is None:
98
+ inputs_embeds = self.embed_tokens(input_ids)
99
+
100
+ if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
101
+ is_padding_right = attention_mask[:, -1].sum().item() != batch_size
102
+ if is_padding_right:
103
+ raise ValueError(
104
+ "You are attempting to perform batched generation with padding_side='right'"
105
+ " this may lead to unexpected behaviour for Flash Attention version of Mistral. Make sure to "
106
+ " call `tokenizer.padding_side = 'left'` before tokenizing the input. "
107
+ )
108
+
109
+ if self._attn_implementation == "flash_attention_2":
110
+ # 2d mask is passed through the layers
111
+ attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
112
+ elif self._attn_implementation == "sdpa" and not output_attentions:
113
+ # output_attentions=True can not be supported when using SDPA, and we fall back on
114
+ # the manual implementation that requires a 4D causal mask in all cases.
115
+ attention_mask = _prepare_4d_attention_mask_for_sdpa(
116
+ attention_mask, inputs_embeds.dtype
117
+ )
118
+ else:
119
+ # 4d mask is passed through the layers
120
+ attention_mask = _prepare_4d_attention_mask(
121
+ attention_mask, inputs_embeds.dtype,
122
+ )
123
+
124
+ hidden_states = inputs_embeds
125
+
126
+ # decoder layers
127
+ all_hidden_states = () if output_hidden_states else None
128
+ all_self_attns = () if output_attentions else None
129
+ next_decoder_cache = None
130
+
131
+ for decoder_layer in self.layers:
132
+ if output_hidden_states:
133
+ all_hidden_states += (hidden_states,)
134
+
135
+ if self.gradient_checkpointing and self.training:
136
+ layer_outputs = self._gradient_checkpointing_func(
137
+ decoder_layer.__call__,
138
+ hidden_states,
139
+ attention_mask,
140
+ position_ids,
141
+ past_key_values,
142
+ output_attentions,
143
+ use_cache,
144
+ )
145
+ else:
146
+ layer_outputs = decoder_layer(
147
+ hidden_states,
148
+ attention_mask=attention_mask,
149
+ position_ids=position_ids,
150
+ past_key_value=past_key_values,
151
+ output_attentions=output_attentions,
152
+ use_cache=use_cache,
153
+ )
154
+
155
+ hidden_states = layer_outputs[0]
156
+
157
+ if use_cache:
158
+ next_decoder_cache = layer_outputs[2 if output_attentions else 1]
159
+
160
+ if output_attentions:
161
+ all_self_attns += (layer_outputs[1],)
162
+
163
+ hidden_states = self.norm(hidden_states)
164
+
165
+ # add hidden states from the last decoder layer
166
+ if output_hidden_states:
167
+ all_hidden_states += (hidden_states,)
168
+
169
+ next_cache = None
170
+ if use_cache:
171
+ next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
172
+
173
+ if not return_dict:
174
+ return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
175
+ return BaseModelOutputWithPast(
176
+ last_hidden_state=hidden_states,
177
+ past_key_values=next_cache,
178
+ hidden_states=all_hidden_states,
179
+ attentions=all_self_attns,
180
+ )
181
+
182
+ def _move_to_device(maybe_tensor, device: torch.device):
183
+ if torch.is_tensor(maybe_tensor):
184
+ return maybe_tensor.to(device, non_blocking=device.type == "cuda")
185
+ elif isinstance(maybe_tensor, dict):
186
+ return {key: _move_to_device(value, device) for key, value in maybe_tensor.items()}
187
+ elif isinstance(maybe_tensor, list):
188
+ return [_move_to_device(x, device) for x in maybe_tensor]
189
+ elif isinstance(maybe_tensor, tuple):
190
+ return tuple([_move_to_device(x, device) for x in maybe_tensor])
191
+ elif isinstance(maybe_tensor, Mapping):
192
+ return type(maybe_tensor)({k: _move_to_device(v, device) for k, v in maybe_tensor.items()})
193
+ else:
194
+ return maybe_tensor
195
+
196
+ def move_to_device(sample, device: torch.device):
197
+ if device.type == "cpu":
198
+ return sample
199
+
200
+ if len(sample) == 0:
201
+ return {}
202
+ return _move_to_device(sample, device)
203
+
204
+
205
+ def input_transform_func(
206
+ tokenizer: PreTrainedTokenizerFast,
207
+ examples: Dict[str, List],
208
+ always_add_eos: bool,
209
+ max_length: int,
210
+ instruction: str,
211
+ ) -> BatchEncoding:
212
+ if always_add_eos:
213
+ examples['input_texts'] = [instruction + input_example + tokenizer.eos_token for input_example in examples['input_texts']]
214
+ batch_dict = tokenizer(
215
+ examples['input_texts'],
216
+ max_length=max_length,
217
+ padding=True,
218
+ return_token_type_ids=False,
219
+ return_tensors="pt",
220
+ truncation=True)
221
+ return batch_dict
222
+
223
+
224
+ class PreNorm(torch.nn.Module):
225
+ def __init__(self, dim, fn, context_dim = None):
226
+ super().__init__()
227
+ self.fn = fn
228
+ self.norm = torch.nn.LayerNorm(dim)
229
+ self.norm_context = torch.nn.LayerNorm(context_dim) if exists(context_dim) else None
230
+
231
+ def forward(self, x, **kwargs):
232
+ x = self.norm(x)
233
+ if exists(self.norm_context):
234
+ context = kwargs['context']
235
+ normed_context = self.norm_context(context)
236
+ kwargs.update(context = normed_context)
237
+ return self.fn(x, **kwargs)
238
+
239
+ class GEGLU(torch.nn.Module):
240
+ def forward(self, x):
241
+ x, gates = x.chunk(2, dim = -1)
242
+ return x * torch.nn.functional.gelu(gates)
243
+
244
+ class FeedForward(torch.nn.Module):
245
+ def __init__(self, dim, mult = 4):
246
+ super().__init__()
247
+ self.net = torch.nn.Sequential(torch.nn.Linear(dim, dim * mult * 2),
248
+ GEGLU(),
249
+ torch.nn.Linear(dim * mult, dim))
250
+
251
+ def forward(self, x):
252
+ return self.net(x)
253
+
254
+ def exists(val):
255
+ return val is not None
256
+
257
+ def default(val, d):
258
+ return val if exists(val) else d
259
+
260
+
261
+ class Attention(torch.nn.Module):
262
+ def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64):
263
+ super().__init__()
264
+ inner_dim = dim_head * heads
265
+ context_dim = default(context_dim, query_dim)
266
+ self.scale = dim_head ** -0.5
267
+ self.heads = heads
268
+
269
+ self.to_q = torch.nn.Linear(query_dim, inner_dim, bias = False)
270
+ self.to_kv = torch.nn.Linear(context_dim, inner_dim * 2, bias = False)
271
+ self.to_out = torch.nn.Linear(inner_dim, query_dim, bias = False)
272
+
273
+ def forward(self, x, context = None, mask = None):
274
+ h = self.heads
275
+ q = self.to_q(x)
276
+ context = default(context, x)
277
+ k, v = self.to_kv(context).chunk(2, dim = -1)
278
+ q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
279
+ with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_mem_efficient=True):
280
+ out = torch.nn.functional.scaled_dot_product_attention(q, k, v)
281
+ out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
282
+ return self.to_out(out)
283
+
284
+
285
+ class LatentAttentionModel(PreTrainedModel):
286
+ config_class = LatentAttentionConfig
287
+
288
+ def __init__(self, config: LatentAttentionConfig):
289
+ super().__init__(config)
290
+ ## cross-attention block
291
+ num_latents, latent_dim, cross_heads, cross_dim_head = config.num_latents_value, config.latent_dim, config.num_cross_heads, config.cross_dim_head
292
+ dim = config.hidden_dim
293
+ # init latent_attention and latents
294
+ self.cross_attend_blocks = torch.nn.ModuleList([
295
+ PreNorm(latent_dim, Attention(latent_dim, dim, heads = cross_heads, dim_head = cross_dim_head),
296
+ context_dim = dim),
297
+ PreNorm(latent_dim, FeedForward(latent_dim)),
298
+ ])
299
+ self.output_normalize = config.output_normalize
300
+ self.register_parameter("latents", torch.nn.Parameter(torch.randn(num_latents, latent_dim)))
301
+
302
+ def forward(self, hiddens, attention_mask: torch.Tensor=None):
303
+ ## cross-attention block
304
+ cross_attn, cross_ff = self.cross_attend_blocks
305
+ b, *_, device = *hiddens.shape, hiddens.device
306
+ x = repeat(self.latents, 'n d -> b n d', b = b)
307
+ hiddens = cross_attn(hiddens, context = x, mask = None) + hiddens
308
+ hiddens = cross_ff(hiddens) + hiddens
309
+ if attention_mask !=None:
310
+ s = torch.sum(hiddens * attention_mask.unsqueeze(-1).float(), dim=1)
311
+ d = attention_mask.sum(dim=1, keepdim=True).float()
312
+ hiddens = s / d
313
+ if self.output_normalize:
314
+ hiddens = torch.nn.functional.normalize(hiddens, p=2, dim=-1)
315
+ return hiddens
316
+
317
+ class NVEmbedModel(PreTrainedModel):
318
+ config_class = NVEmbedConfig
319
+ _no_split_modules = ["MistralDecoderLayer", "LatentAttentionModel"]
320
+
321
+ def __init__(self, config: NVEmbedConfig):
322
+ super().__init__(config)
323
+ self.latent_attention_model = AutoModel.from_config(config.latent_attention_config)
324
+ self.embedding_model = AutoModel.from_config(
325
+ config.text_config,
326
+ ) if config.text_config is not None else None
327
+ self.tokenizer = AutoTokenizer.from_pretrained(config.text_config._name_or_path) if config.text_config is not None else None
328
+ self.padding_side = config.padding_side
329
+ self.is_mask_instruction = config.is_mask_instruction
330
+ self.add_eos = config.add_eos
331
+ self.mask_type = config.mask_type
332
+ if config.add_pad_token and self.tokenizer is not None:
333
+ self.add_pad_token()
334
+
335
+ def add_pad_token(self):
336
+ self.tokenizer.pad_token = self.tokenizer.eos_token
337
+ self.tokenizer.padding_side = self.padding_side
338
+
339
+ def prepare_kwargs_from_batch(self, batch_dict: dict, instruction_lens: int, device: torch.device):
340
+ batch_dict = move_to_device(batch_dict, device)
341
+ attention_mask = batch_dict['attention_mask'].clone() if 'attention_mask' in batch_dict else None
342
+ if (attention_mask is not None and
343
+ self.padding_side == "right" and
344
+ self.is_mask_instruction == True and
345
+ instruction_lens > 0):
346
+ # Mask out the instruction tokens for mean-pooling
347
+ attention_mask[:, :instruction_lens] = 0
348
+ features: NVEmbedFeatures = {
349
+ 'input_ids': torch.tensor(batch_dict.get('input_ids').to(batch_dict.get('input_ids')).long()),
350
+ 'attention_mask': batch_dict['attention_mask'],
351
+ 'pool_mask': attention_mask,
352
+ }
353
+ return features
354
+
355
+ @torch.no_grad()
356
+ def _do_encode(self,
357
+ prompts: List[str],
358
+ batch_size: int=1,
359
+ instruction: str="",
360
+ max_length: int=4096,
361
+ num_workers: int=32,
362
+ **kwargs
363
+ ) -> Union[np.ndarray, torch.FloatTensor]:
364
+ dataset: Dataset = Dataset.from_dict({'input_texts': prompts})
365
+ dataset.set_transform(partial(input_transform_func,
366
+ self.tokenizer,
367
+ always_add_eos=True,
368
+ max_length=max_length,
369
+ instruction=instruction))
370
+
371
+ data_collator = DataCollatorWithPadding(self.tokenizer)
372
+ data_loader = DataLoader(
373
+ dataset,
374
+ batch_size=batch_size,
375
+ shuffle=False,
376
+ drop_last=False,
377
+ num_workers=num_workers,
378
+ collate_fn=data_collator,
379
+ pin_memory=True)
380
+
381
+ if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
382
+ instruction_lens = len(self.tokenizer.tokenize(instruction))
383
+ else:
384
+ instruction_lens = 0
385
+
386
+ encoded_embeds = []
387
+ device = next(self.embedding_model.parameters()).device
388
+ for batch_dict in tqdm(data_loader, desc='encoding', mininterval=10):
389
+ features = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
390
+ embeds=self(**features)["sentence_embeddings"].squeeze(1)
391
+ encoded_embeds.append(embeds)
392
+ encoded_embeds = torch.cat(encoded_embeds, axis=0)
393
+ if "return_numpy" in kwargs and kwargs.get("return_numpy"):
394
+ encoded_embeds = encoded_embeds.cpu().detach().numpy()
395
+ return encoded_embeds
396
+
397
+ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, pool_mask: Optional[torch.Tensor]=None, return_dict: bool=True):
398
+ autocast_ctx = torch.autocast if torch.cuda.is_available() else nullcontext
399
+ with autocast_ctx("cuda"):
400
+ ## decoder only layer
401
+ outputs = self.embedding_model(
402
+ input_ids=input_ids,
403
+ attention_mask=attention_mask,
404
+ )
405
+ ## latent attention layer
406
+ embeds = self.latent_attention_model(
407
+ outputs.last_hidden_state,
408
+ pool_mask,
409
+ )
410
+ if not return_dict:
411
+ return (embeds,)
412
+ return {"sentence_embeddings": embeds}
413
+
414
+
415
+ @torch.no_grad()
416
+ def encode(self, prompts: List[str], instruction: str="", max_length: int=4096, **kwargs):
417
+ if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
418
+ instruction_lens = len(self.tokenizer.tokenize(instruction))
419
+ else:
420
+ instruction_lens = 0
421
+
422
+ device = next(self.embedding_model.parameters()).device
423
+ batch_dict = input_transform_func(self.tokenizer,
424
+ {"input_texts": [prompt for prompt in prompts]},
425
+ always_add_eos=True,
426
+ max_length=max_length,
427
+ instruction=instruction)
428
+
429
+ features: NVEmbedFeatures = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
430
+ return self(**features)["sentence_embeddings"].squeeze(1)
431
+
432
+
433
+ ## AutoModel Register
434
+ AutoModel.register(NVEmbedConfig, NVEmbedModel)
435
+ AutoModel.register(LatentAttentionConfig, LatentAttentionModel)
436
+ AutoModel.register(BidirectionalMistralConfig, BidirectionalMistralModel)
437
+
438
+ ## Register for auto class
439
+ NVEmbedModel.register_for_auto_class("AutoModel")
440
+ LatentAttentionModel.register_for_auto_class("AutoModel")
441
+ BidirectionalMistralModel.register_for_auto_class("AutoModel")
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 4096,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "additional_special_tokens": [],
32
+ "bos_token": "<s>",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }