Files changed (6) hide show
  1. .gitignore +2 -1
  2. EXTERNAL_MODEL_RESULTS.json +0 -0
  3. app.py +0 -0
  4. config.yaml +389 -0
  5. envs.py +48 -0
  6. model_meta.yaml +1312 -0
.gitignore CHANGED
@@ -1 +1,2 @@
1
- *.pyc
 
 
1
+ *.pyc
2
+ model_infos.json
EXTERNAL_MODEL_RESULTS.json CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
The diff for this file is too large to render. See raw diff
 
config.yaml ADDED
@@ -0,0 +1,389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config:
2
+ REPO_ID: "mteb/leaderboard"
3
+ RESULTS_REPO: mteb/results
4
+ LEADERBOARD_NAME: "MTEB Leaderboard"
5
+ tasks:
6
+ BitextMining:
7
+ icon: "🎌"
8
+ metric: f1
9
+ metric_description: "[F1](https://huggingface.co/spaces/evaluate-metric/f1)"
10
+ task_description: "Bitext mining is the task of finding parallel sentences in two languages."
11
+ Classification:
12
+ icon: "❤️"
13
+ metric: accuracy
14
+ metric_description: "[Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)"
15
+ task_description: "Classification is the task of assigning a label to a text."
16
+ Clustering:
17
+ icon: "✨"
18
+ metric: v_measure
19
+ metric_description: "Validity Measure (v_measure)"
20
+ task_description: "Clustering is the task of grouping similar documents together."
21
+ PairClassification:
22
+ icon: "🎭"
23
+ metric: cos_sim_ap
24
+ metric_description: "Average Precision based on Cosine Similarities (cos_sim_ap)"
25
+ task_description: "Pair classification is the task of determining whether two texts are similar."
26
+ Reranking:
27
+ icon: "🥈"
28
+ metric: map
29
+ metric_description: "Mean Average Precision (MAP)"
30
+ task_description: "Reranking is the task of reordering a list of documents to improve relevance."
31
+ Retrieval:
32
+ icon: "🔎"
33
+ metric: ndcg_at_10
34
+ metric_description: "Normalized Discounted Cumulative Gain @ k (ndcg_at_10)"
35
+ task_description: "Retrieval is the task of finding relevant documents for a query."
36
+ STS:
37
+ icon: "🤖"
38
+ metric: cos_sim_spearman
39
+ metric_description: "Spearman correlation based on cosine similarity"
40
+ task_description: "Semantic Textual Similarity is the task of determining how similar two texts are."
41
+ Summarization:
42
+ icon: "📜"
43
+ metric: cos_sim_spearman
44
+ metric_description: "Spearman correlation based on cosine similarity"
45
+ task_description: "Summarization is the task of generating a summary of a text."
46
+ InstructionRetrieval:
47
+ icon: "🔎📋"
48
+ metric: "p-MRR"
49
+ metric_description: "paired mean reciprocal rank"
50
+ task_description: "Retrieval w/Instructions is the task of finding relevant documents for a query that has detailed instructions."
51
+ boards:
52
+ en:
53
+ title: English
54
+ language_long: "English"
55
+ has_overall: true
56
+ acronym: null
57
+ icon: null
58
+ special_icons: null
59
+ credits: null
60
+ tasks:
61
+ Classification:
62
+ - AmazonCounterfactualClassification (en)
63
+ - AmazonPolarityClassification
64
+ - AmazonReviewsClassification (en)
65
+ - Banking77Classification
66
+ - EmotionClassification
67
+ - ImdbClassification
68
+ - MassiveIntentClassification (en)
69
+ - MassiveScenarioClassification (en)
70
+ - MTOPDomainClassification (en)
71
+ - MTOPIntentClassification (en)
72
+ - ToxicConversationsClassification
73
+ - TweetSentimentExtractionClassification
74
+ Clustering:
75
+ - ArxivClusteringP2P
76
+ - ArxivClusteringS2S
77
+ - BiorxivClusteringP2P
78
+ - BiorxivClusteringS2S
79
+ - MedrxivClusteringP2P
80
+ - MedrxivClusteringS2S
81
+ - RedditClustering
82
+ - RedditClusteringP2P
83
+ - StackExchangeClustering
84
+ - StackExchangeClusteringP2P
85
+ - TwentyNewsgroupsClustering
86
+ PairClassification:
87
+ - SprintDuplicateQuestions
88
+ - TwitterSemEval2015
89
+ - TwitterURLCorpus
90
+ Reranking:
91
+ - AskUbuntuDupQuestions
92
+ - MindSmallReranking
93
+ - SciDocsRR
94
+ - StackOverflowDupQuestions
95
+ Retrieval:
96
+ - ArguAna
97
+ - ClimateFEVER
98
+ - CQADupstackRetrieval
99
+ - DBPedia
100
+ - FEVER
101
+ - FiQA2018
102
+ - HotpotQA
103
+ - MSMARCO
104
+ - NFCorpus
105
+ - NQ
106
+ - QuoraRetrieval
107
+ - SCIDOCS
108
+ - SciFact
109
+ - Touche2020
110
+ - TRECCOVID
111
+ STS:
112
+ - BIOSSES
113
+ - SICK-R
114
+ - STS12
115
+ - STS13
116
+ - STS14
117
+ - STS15
118
+ - STS16
119
+ - STS17 (en-en)
120
+ - STS22 (en)
121
+ - STSBenchmark
122
+ Summarization:
123
+ - SummEval
124
+ en-x:
125
+ title: "English-X"
126
+ language_long: "117 (Pairs of: English & other language)"
127
+ has_overall: false
128
+ acronym: null
129
+ icon: null
130
+ special_icons: null
131
+ credits: null
132
+ tasks:
133
+ BitextMining: ['BUCC (de-en)', 'BUCC (fr-en)', 'BUCC (ru-en)', 'BUCC (zh-en)', 'Tatoeba (afr-eng)', 'Tatoeba (amh-eng)', 'Tatoeba (ang-eng)', 'Tatoeba (ara-eng)', 'Tatoeba (arq-eng)', 'Tatoeba (arz-eng)', 'Tatoeba (ast-eng)', 'Tatoeba (awa-eng)', 'Tatoeba (aze-eng)', 'Tatoeba (bel-eng)', 'Tatoeba (ben-eng)', 'Tatoeba (ber-eng)', 'Tatoeba (bos-eng)', 'Tatoeba (bre-eng)', 'Tatoeba (bul-eng)', 'Tatoeba (cat-eng)', 'Tatoeba (cbk-eng)', 'Tatoeba (ceb-eng)', 'Tatoeba (ces-eng)', 'Tatoeba (cha-eng)', 'Tatoeba (cmn-eng)', 'Tatoeba (cor-eng)', 'Tatoeba (csb-eng)', 'Tatoeba (cym-eng)', 'Tatoeba (dan-eng)', 'Tatoeba (deu-eng)', 'Tatoeba (dsb-eng)', 'Tatoeba (dtp-eng)', 'Tatoeba (ell-eng)', 'Tatoeba (epo-eng)', 'Tatoeba (est-eng)', 'Tatoeba (eus-eng)', 'Tatoeba (fao-eng)', 'Tatoeba (fin-eng)', 'Tatoeba (fra-eng)', 'Tatoeba (fry-eng)', 'Tatoeba (gla-eng)', 'Tatoeba (gle-eng)', 'Tatoeba (glg-eng)', 'Tatoeba (gsw-eng)', 'Tatoeba (heb-eng)', 'Tatoeba (hin-eng)', 'Tatoeba (hrv-eng)', 'Tatoeba (hsb-eng)', 'Tatoeba (hun-eng)', 'Tatoeba (hye-eng)', 'Tatoeba (ido-eng)', 'Tatoeba (ile-eng)', 'Tatoeba (ina-eng)', 'Tatoeba (ind-eng)', 'Tatoeba (isl-eng)', 'Tatoeba (ita-eng)', 'Tatoeba (jav-eng)', 'Tatoeba (jpn-eng)', 'Tatoeba (kab-eng)', 'Tatoeba (kat-eng)', 'Tatoeba (kaz-eng)', 'Tatoeba (khm-eng)', 'Tatoeba (kor-eng)', 'Tatoeba (kur-eng)', 'Tatoeba (kzj-eng)', 'Tatoeba (lat-eng)', 'Tatoeba (lfn-eng)', 'Tatoeba (lit-eng)', 'Tatoeba (lvs-eng)', 'Tatoeba (mal-eng)', 'Tatoeba (mar-eng)', 'Tatoeba (max-eng)', 'Tatoeba (mhr-eng)', 'Tatoeba (mkd-eng)', 'Tatoeba (mon-eng)', 'Tatoeba (nds-eng)', 'Tatoeba (nld-eng)', 'Tatoeba (nno-eng)', 'Tatoeba (nob-eng)', 'Tatoeba (nov-eng)', 'Tatoeba (oci-eng)', 'Tatoeba (orv-eng)', 'Tatoeba (pam-eng)', 'Tatoeba (pes-eng)', 'Tatoeba (pms-eng)', 'Tatoeba (pol-eng)', 'Tatoeba (por-eng)', 'Tatoeba (ron-eng)', 'Tatoeba (rus-eng)', 'Tatoeba (slk-eng)', 'Tatoeba (slv-eng)', 'Tatoeba (spa-eng)', 'Tatoeba (sqi-eng)', 'Tatoeba (srp-eng)', 'Tatoeba (swe-eng)', 'Tatoeba (swg-eng)', 'Tatoeba (swh-eng)', 'Tatoeba (tam-eng)', 'Tatoeba (tat-eng)', 'Tatoeba (tel-eng)', 'Tatoeba (tgl-eng)', 'Tatoeba (tha-eng)', 'Tatoeba (tuk-eng)', 'Tatoeba (tur-eng)', 'Tatoeba (tzl-eng)', 'Tatoeba (uig-eng)', 'Tatoeba (ukr-eng)', 'Tatoeba (urd-eng)', 'Tatoeba (uzb-eng)', 'Tatoeba (vie-eng)', 'Tatoeba (war-eng)', 'Tatoeba (wuu-eng)', 'Tatoeba (xho-eng)', 'Tatoeba (yid-eng)', 'Tatoeba (yue-eng)', 'Tatoeba (zsm-eng)']
134
+ zh:
135
+ title: Chinese
136
+ language_long: Chinese
137
+ has_overall: true
138
+ acronym: C-MTEB
139
+ icon: "🇨🇳"
140
+ special_icons:
141
+ Classification: "🧡"
142
+ credits: "[FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)"
143
+ tasks:
144
+ Classification:
145
+ - AmazonReviewsClassification (zh)
146
+ - IFlyTek
147
+ - JDReview
148
+ - MassiveIntentClassification (zh-CN)
149
+ - MassiveScenarioClassification (zh-CN)
150
+ - MultilingualSentiment
151
+ - OnlineShopping
152
+ - TNews
153
+ - Waimai
154
+ Clustering:
155
+ - CLSClusteringP2P
156
+ - CLSClusteringS2S
157
+ - ThuNewsClusteringP2P
158
+ - ThuNewsClusteringS2S
159
+ PairClassification:
160
+ - Cmnli
161
+ - Ocnli
162
+ Reranking:
163
+ - CMedQAv1
164
+ - CMedQAv2
165
+ - MMarcoReranking
166
+ - T2Reranking
167
+ Retrieval:
168
+ - CmedqaRetrieval
169
+ - CovidRetrieval
170
+ - DuRetrieval
171
+ - EcomRetrieval
172
+ - MedicalRetrieval
173
+ - MMarcoRetrieval
174
+ - T2Retrieval
175
+ - VideoRetrieval
176
+ STS:
177
+ - AFQMC
178
+ - ATEC
179
+ - BQ
180
+ - LCQMC
181
+ - PAWSX
182
+ - QBQTC
183
+ - STS22 (zh)
184
+ - STSB
185
+ da:
186
+ title: Danish
187
+ language_long: Danish
188
+ has_overall: false
189
+ acronym: null
190
+ icon: "🇩🇰"
191
+ special_icons:
192
+ Classification: "🤍"
193
+ credits: "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
194
+ tasks:
195
+ BitextMining:
196
+ - BornholmBitextMining
197
+ Classification:
198
+ - AngryTweetsClassification
199
+ - DanishPoliticalCommentsClassification
200
+ - DKHateClassification
201
+ - LccSentimentClassification
202
+ - MassiveIntentClassification (da)
203
+ - MassiveScenarioClassification (da)
204
+ - NordicLangClassification
205
+ - ScalaDaClassification
206
+ fr:
207
+ title: French
208
+ language_long: "French"
209
+ has_overall: true
210
+ acronym: "F-MTEB"
211
+ icon: "🇫🇷"
212
+ special_icons:
213
+ Classification: "💙"
214
+ credits: "[Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](https://github.com/GabrielSequeira), [Imene Kerboua](https://github.com/imenelydiaker), [Wissam Siblini](https://github.com/wissam-sib), [Mathieu Ciancone](https://github.com/MathieuCiancone), [Marion Schaeffer](https://github.com/schmarion)"
215
+ tasks:
216
+ Classification:
217
+ - AmazonReviewsClassification (fr)
218
+ - MasakhaNEWSClassification (fra)
219
+ - MassiveIntentClassification (fr)
220
+ - MassiveScenarioClassification (fr)
221
+ - MTOPDomainClassification (fr)
222
+ - MTOPIntentClassification (fr)
223
+ Clustering:
224
+ - AlloProfClusteringP2P
225
+ - AlloProfClusteringS2S
226
+ - HALClusteringS2S
227
+ - MLSUMClusteringP2P
228
+ - MLSUMClusteringS2S
229
+ - MasakhaNEWSClusteringP2P (fra)
230
+ - MasakhaNEWSClusteringS2S (fra)
231
+ PairClassification:
232
+ - OpusparcusPC (fr)
233
+ - PawsX (fr)
234
+ Reranking:
235
+ - AlloprofReranking
236
+ - SyntecReranking
237
+ Retrieval:
238
+ - AlloprofRetrieval
239
+ - BSARDRetrieval
240
+ - MintakaRetrieval (fr)
241
+ - SyntecRetrieval
242
+ - XPQARetrieval (fr)
243
+ STS:
244
+ - STS22 (fr)
245
+ - STSBenchmarkMultilingualSTS (fr)
246
+ - SICKFr
247
+ Summarization:
248
+ - SummEvalFr
249
+ 'no':
250
+ title: Norwegian
251
+ language_long: "Norwegian Bokmål"
252
+ has_overall: false
253
+ acronym: null
254
+ icon: "🇳🇴"
255
+ special_icons:
256
+ Classification: "💙"
257
+ credits: "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
258
+ tasks:
259
+ Classification: &id001
260
+ - NoRecClassification
261
+ - NordicLangClassification
262
+ - NorwegianParliament
263
+ - MassiveIntentClassification (nb)
264
+ - MassiveScenarioClassification (nb)
265
+ - ScalaNbClassification
266
+ instructions:
267
+ title: English
268
+ language_long: "English"
269
+ has_overall: false
270
+ acronym: null
271
+ icon: null
272
+ credits: "[Orion Weller, FollowIR](https://arxiv.org/abs/2403.15246)"
273
+ tasks:
274
+ InstructionRetrieval:
275
+ - Robust04InstructionRetrieval
276
+ - News21InstructionRetrieval
277
+ - Core17InstructionRetrieval
278
+ law:
279
+ title: Law
280
+ language_long: "English, German, Chinese"
281
+ has_overall: false
282
+ acronym: null
283
+ icon: "⚖️"
284
+ special_icons: null
285
+ credits: "[Voyage AI](https://www.voyageai.com/)"
286
+ tasks:
287
+ Retrieval:
288
+ - AILACasedocs
289
+ - AILAStatutes
290
+ - GerDaLIRSmall
291
+ - LeCaRDv2
292
+ - LegalBenchConsumerContractsQA
293
+ - LegalBenchCorporateLobbying
294
+ - LegalQuAD
295
+ - LegalSummarization
296
+ de:
297
+ title: German
298
+ language_long: "German"
299
+ has_overall: false
300
+ acronym: null
301
+ icon: "🇩🇪"
302
+ special_icons: null
303
+ credits: "[Silvan](https://github.com/slvnwhrl)"
304
+ tasks:
305
+ Clustering:
306
+ - BlurbsClusteringP2P
307
+ - BlurbsClusteringS2S
308
+ - TenKGnadClusteringP2P
309
+ - TenKGnadClusteringS2S
310
+ pl:
311
+ title: Polish
312
+ language_long: Polish
313
+ has_overall: true
314
+ acronym: null
315
+ icon: "🇵🇱"
316
+ special_icons:
317
+ Classification: "🤍"
318
+ credits: "[Rafał Poświata](https://github.com/rafalposwiata)"
319
+ tasks:
320
+ Classification:
321
+ - AllegroReviews
322
+ - CBD
323
+ - MassiveIntentClassification (pl)
324
+ - MassiveScenarioClassification (pl)
325
+ - PAC
326
+ - PolEmo2.0-IN
327
+ - PolEmo2.0-OUT
328
+ Clustering:
329
+ - 8TagsClustering
330
+ PairClassification:
331
+ - CDSC-E
332
+ - PPC
333
+ - PSC
334
+ - SICK-E-PL
335
+ Retrieval:
336
+ - ArguAna-PL
337
+ - DBPedia-PL
338
+ - FiQA-PL
339
+ - HotpotQA-PL
340
+ - MSMARCO-PL
341
+ - NFCorpus-PL
342
+ - NQ-PL
343
+ - Quora-PL
344
+ - SCIDOCS-PL
345
+ - SciFact-PL
346
+ - TRECCOVID-PL
347
+ STS:
348
+ - CDSC-R
349
+ - SICK-R-PL
350
+ - STS22 (pl)
351
+ se:
352
+ title: Swedish
353
+ language_long: Swedish
354
+ has_overall: false
355
+ acronym: null
356
+ icon: "🇸🇪"
357
+ special_icons:
358
+ Classification: "💛"
359
+ credits: "[Kenneth Enevoldsen](https://github.com/KennethEnevoldsen), [scandinavian-embedding-benchmark](https://kennethenevoldsen.github.io/scandinavian-embedding-benchmark/)"
360
+ tasks:
361
+ Classification:
362
+ - NoRecClassification
363
+ - NordicLangClassification
364
+ - NorwegianParliament
365
+ - MassiveIntentClassification (nb)
366
+ - MassiveScenarioClassification (nb)
367
+ - ScalaNbClassification
368
+ other-cls:
369
+ title: "Other Languages"
370
+ language_long: "47 (Only languages not included in the other tabs)"
371
+ has_overall: false
372
+ acronym: null
373
+ icon: null
374
+ special_icons:
375
+ Classification: "💜💚💙"
376
+ credits: null
377
+ tasks:
378
+ Classification: ['AmazonCounterfactualClassification (de)', 'AmazonCounterfactualClassification (ja)', 'AmazonReviewsClassification (de)', 'AmazonReviewsClassification (es)', 'AmazonReviewsClassification (fr)', 'AmazonReviewsClassification (ja)', 'AmazonReviewsClassification (zh)', 'MTOPDomainClassification (de)', 'MTOPDomainClassification (es)', 'MTOPDomainClassification (fr)', 'MTOPDomainClassification (hi)', 'MTOPDomainClassification (th)', 'MTOPIntentClassification (de)', 'MTOPIntentClassification (es)', 'MTOPIntentClassification (fr)', 'MTOPIntentClassification (hi)', 'MTOPIntentClassification (th)', 'MassiveIntentClassification (af)', 'MassiveIntentClassification (am)', 'MassiveIntentClassification (ar)', 'MassiveIntentClassification (az)', 'MassiveIntentClassification (bn)', 'MassiveIntentClassification (cy)', 'MassiveIntentClassification (de)', 'MassiveIntentClassification (el)', 'MassiveIntentClassification (es)', 'MassiveIntentClassification (fa)', 'MassiveIntentClassification (fi)', 'MassiveIntentClassification (fr)', 'MassiveIntentClassification (he)', 'MassiveIntentClassification (hi)', 'MassiveIntentClassification (hu)', 'MassiveIntentClassification (hy)', 'MassiveIntentClassification (id)', 'MassiveIntentClassification (is)', 'MassiveIntentClassification (it)', 'MassiveIntentClassification (ja)', 'MassiveIntentClassification (jv)', 'MassiveIntentClassification (ka)', 'MassiveIntentClassification (km)', 'MassiveIntentClassification (kn)', 'MassiveIntentClassification (ko)', 'MassiveIntentClassification (lv)', 'MassiveIntentClassification (ml)', 'MassiveIntentClassification (mn)', 'MassiveIntentClassification (ms)', 'MassiveIntentClassification (my)', 'MassiveIntentClassification (nl)', 'MassiveIntentClassification (pt)', 'MassiveIntentClassification (ro)', 'MassiveIntentClassification (ru)', 'MassiveIntentClassification (sl)', 'MassiveIntentClassification (sq)', 'MassiveIntentClassification (sw)', 'MassiveIntentClassification (ta)', 'MassiveIntentClassification (te)', 'MassiveIntentClassification (th)', 'MassiveIntentClassification (tl)', 'MassiveIntentClassification (tr)', 'MassiveIntentClassification (ur)', 'MassiveIntentClassification (vi)', 'MassiveIntentClassification (zh-TW)', 'MassiveScenarioClassification (af)', 'MassiveScenarioClassification (am)', 'MassiveScenarioClassification (ar)', 'MassiveScenarioClassification (az)', 'MassiveScenarioClassification (bn)', 'MassiveScenarioClassification (cy)', 'MassiveScenarioClassification (de)', 'MassiveScenarioClassification (el)', 'MassiveScenarioClassification (es)', 'MassiveScenarioClassification (fa)', 'MassiveScenarioClassification (fi)', 'MassiveScenarioClassification (fr)', 'MassiveScenarioClassification (he)', 'MassiveScenarioClassification (hi)', 'MassiveScenarioClassification (hu)', 'MassiveScenarioClassification (hy)', 'MassiveScenarioClassification (id)', 'MassiveScenarioClassification (is)', 'MassiveScenarioClassification (it)', 'MassiveScenarioClassification (ja)', 'MassiveScenarioClassification (jv)', 'MassiveScenarioClassification (ka)', 'MassiveScenarioClassification (km)', 'MassiveScenarioClassification (kn)', 'MassiveScenarioClassification (ko)', 'MassiveScenarioClassification (lv)', 'MassiveScenarioClassification (ml)', 'MassiveScenarioClassification (mn)', 'MassiveScenarioClassification (ms)', 'MassiveScenarioClassification (my)', 'MassiveScenarioClassification (nl)', 'MassiveScenarioClassification (pt)', 'MassiveScenarioClassification (ro)', 'MassiveScenarioClassification (ru)', 'MassiveScenarioClassification (sl)', 'MassiveScenarioClassification (sq)', 'MassiveScenarioClassification (sw)', 'MassiveScenarioClassification (ta)', 'MassiveScenarioClassification (te)', 'MassiveScenarioClassification (th)', 'MassiveScenarioClassification (tl)', 'MassiveScenarioClassification (tr)', 'MassiveScenarioClassification (ur)', 'MassiveScenarioClassification (vi)', 'MassiveScenarioClassification (zh-TW)']
379
+ other-sts:
380
+ title: Other
381
+ language_long: "Arabic, Chinese, Dutch, English, French, German, Italian, Korean, Polish, Russian, Spanish (Only language combos not included in the other tabs)"
382
+ has_overall: false
383
+ acronym: null
384
+ icon: null
385
+ special_icons:
386
+ STS: "👽"
387
+ credits: null
388
+ tasks:
389
+ STS: ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark"]
envs.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from yaml import safe_load
3
+
4
+ from huggingface_hub import HfApi
5
+
6
+ LEADERBOARD_CONFIG_PATH = "config.yaml"
7
+ with open(LEADERBOARD_CONFIG_PATH, 'r', encoding='utf-8') as f:
8
+ LEADERBOARD_CONFIG = safe_load(f)
9
+ MODEL_META_PATH = "model_meta.yaml"
10
+ with open(MODEL_META_PATH, 'r', encoding='utf-8') as f:
11
+ MODEL_META = safe_load(f)
12
+
13
+ # Try first to get the config from the environment variables, then from the config.yaml file
14
+ def get_config(name, default):
15
+ res = None
16
+
17
+ if name in os.environ:
18
+ res = os.environ[name]
19
+ elif 'config' in LEADERBOARD_CONFIG:
20
+ res = LEADERBOARD_CONFIG['config'].get(name, None)
21
+
22
+ if res is None:
23
+ return default
24
+ return res
25
+
26
+ def str2bool(v):
27
+ return str(v).lower() in ("yes", "true", "t", "1")
28
+
29
+ # clone / pull the lmeh eval data
30
+ HF_TOKEN = get_config("HF_TOKEN", None)
31
+
32
+ LEADERBOARD_NAME = get_config("LEADERBOARD_NAME", "MTEB Leaderboard")
33
+
34
+ REPO_ID = get_config("REPO_ID", "mteb/leaderboard")
35
+ RESULTS_REPO = get_config("RESULTS_REPO", "mteb/results")
36
+
37
+ CACHE_PATH=get_config("HF_HOME", ".")
38
+ os.environ["HF_HOME"] = CACHE_PATH
39
+
40
+ # Check if it is using persistent storage
41
+ if not os.access(CACHE_PATH, os.W_OK):
42
+ print(f"No write access to HF_HOME: {CACHE_PATH}. Resetting to current directory.")
43
+ CACHE_PATH = "."
44
+ os.environ["HF_HOME"] = CACHE_PATH
45
+ else:
46
+ print(f"Write access confirmed for HF_HOME")
47
+
48
+ API = HfApi(token=HF_TOKEN)
model_meta.yaml ADDED
@@ -0,0 +1,1312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_meta:
2
+ Baichuan-text-embedding:
3
+ link: https://platform.baichuan-ai.com/docs/text-Embedding
4
+ seq_len: 512
5
+ size: null
6
+ dim: 1024
7
+ is_external: true
8
+ is_proprietary: true
9
+ is_sentence_transformers_compatible: false
10
+ Cohere-embed-english-v3.0:
11
+ link: https://huggingface.co/Cohere/Cohere-embed-english-v3.0
12
+ seq_len: 512
13
+ size: null
14
+ dim: 1024
15
+ is_external: true
16
+ is_proprietary: true
17
+ is_sentence_transformers_compatible: false
18
+ Cohere-embed-multilingual-light-v3.0:
19
+ link: https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0
20
+ seq_len: 512
21
+ size: null
22
+ dim: 384
23
+ is_external: true
24
+ is_proprietary: true
25
+ is_sentence_transformers_compatible: false
26
+ Cohere-embed-multilingual-v3.0:
27
+ link: https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0
28
+ seq_len: 512
29
+ size: null
30
+ dim: 1024
31
+ is_external: true
32
+ is_proprietary: true
33
+ is_sentence_transformers_compatible: false
34
+ DanskBERT:
35
+ link: https://huggingface.co/vesteinn/DanskBERT
36
+ seq_len: 514
37
+ size: 125
38
+ dim: 768
39
+ is_external: true
40
+ is_proprietary: false
41
+ is_sentence_transformers_compatible: true
42
+ FollowIR-7B:
43
+ link: https://huggingface.co/jhu-clsp/FollowIR-7B
44
+ seq_len: 4096
45
+ size: 7240
46
+ is_external: true
47
+ is_propietary: false
48
+ is_sentence_transformer_compatible: false
49
+ GritLM-7B:
50
+ link: https://huggingface.co/GritLM/GritLM-7B
51
+ seq_len: 4096
52
+ size: 7240
53
+ is_external: true
54
+ is_propietary: false
55
+ is_sentence_transformer_compatible: false
56
+ LASER2:
57
+ link: https://github.com/facebookresearch/LASER
58
+ seq_len: N/A
59
+ size: 43
60
+ dim: 1024
61
+ is_external: true
62
+ is_proprietary: false
63
+ is_sentence_transformers_compatible: false
64
+ LLM2Vec-Llama-2-supervised:
65
+ link: https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised
66
+ seq_len: 4096
67
+ size: 6607
68
+ dim: 4096
69
+ is_external: true
70
+ is_proprietary: false
71
+ is_sentence_transformers_compatible: false
72
+ LLM2Vec-Llama-2-unsupervised:
73
+ link: https://huggingface.co/McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse
74
+ seq_len: 4096
75
+ size: 6607
76
+ dim: 4096
77
+ is_external: true
78
+ is_proprietary: false
79
+ is_sentence_transformers_compatible: false
80
+ LLM2Vec-Meta-Llama-3-supervised:
81
+ link: https://huggingface.co/McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised
82
+ seq_len: 8192
83
+ size: 7505
84
+ dim: 4096
85
+ is_external: true
86
+ is_proprietary: false
87
+ is_sentence_transformers_compatible: false
88
+ LLM2Vec-Meta-Llama-3-unsupervised:
89
+ link: https://huggingface.co/McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse
90
+ seq_len: 8192
91
+ size: 7505
92
+ dim: 4096
93
+ is_external: true
94
+ is_proprietary: false
95
+ is_sentence_transformers_compatible: false
96
+ LLM2Vec-Mistral-supervised:
97
+ link: https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised
98
+ seq_len: 32768
99
+ size: 7111
100
+ dim: 4096
101
+ is_external: true
102
+ is_proprietary: false
103
+ is_sentence_transformers_compatible: false
104
+ LLM2Vec-Mistral-unsupervised:
105
+ link: https://huggingface.co/McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse
106
+ seq_len: 32768
107
+ size: 7111
108
+ dim: 4096
109
+ is_external: true
110
+ is_proprietary: false
111
+ is_sentence_transformers_compatible: false
112
+ LLM2Vec-Sheared-Llama-supervised:
113
+ link: https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised
114
+ seq_len: 4096
115
+ size: 1280
116
+ dim: 2048
117
+ is_external: true
118
+ is_proprietary: false
119
+ is_sentence_transformers_compatible: false
120
+ LLM2Vec-Sheared-Llama-unsupervised:
121
+ link: https://huggingface.co/McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse
122
+ seq_len: 4096
123
+ size: 1280
124
+ dim: 2048
125
+ is_external: true
126
+ is_proprietary: false
127
+ is_sentence_transformers_compatible: false
128
+ LaBSE:
129
+ link: https://huggingface.co/sentence-transformers/LaBSE
130
+ seq_len: 512
131
+ size: 471
132
+ dim: 768
133
+ is_external: true
134
+ is_proprietary: false
135
+ is_sentence_transformers_compatible: true
136
+ OpenSearch-text-hybrid:
137
+ link: https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval
138
+ seq_len: 512
139
+ size: null
140
+ dim: 1792
141
+ is_external: true
142
+ is_proprietary: true
143
+ is_sentence_transformers_compatible: false
144
+ all-MiniLM-L12-v2:
145
+ link: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
146
+ seq_len: 512
147
+ size: 33
148
+ dim: 384
149
+ is_external: true
150
+ is_proprietary: false
151
+ is_sentence_transformers_compatible: true
152
+ all-MiniLM-L6-v2:
153
+ link: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
154
+ seq_len: 512
155
+ size: 23
156
+ dim: 384
157
+ is_external: true
158
+ is_proprietary: false
159
+ is_sentence_transformers_compatible: true
160
+ all-mpnet-base-v2:
161
+ link: https://huggingface.co/sentence-transformers/all-mpnet-base-v2
162
+ seq_len: 514
163
+ size: 110
164
+ dim: 768
165
+ is_external: true
166
+ is_proprietary: false
167
+ is_sentence_transformers_compatible: true
168
+ allenai-specter:
169
+ link: https://huggingface.co/sentence-transformers/allenai-specter
170
+ seq_len: 512
171
+ size: 110
172
+ dim: 768
173
+ is_external: true
174
+ is_proprietary: false
175
+ is_sentence_transformers_compatible: true
176
+ bert-base-10lang-cased:
177
+ link: https://huggingface.co/Geotrend/bert-base-10lang-cased
178
+ seq_len: 512
179
+ size: 138
180
+ dim: 768
181
+ is_external: true
182
+ is_proprietary: false
183
+ is_sentence_transformers_compatible: true
184
+ bert-base-15lang-cased:
185
+ link: https://huggingface.co/Geotrend/bert-base-15lang-cased
186
+ seq_len: 512
187
+ size: 138
188
+ dim: 768
189
+ is_external: true
190
+ is_proprietary: false
191
+ is_sentence_transformers_compatible: true
192
+ bert-base-25lang-cased:
193
+ link: https://huggingface.co/Geotrend/bert-base-25lang-cased
194
+ seq_len: 512
195
+ size: 138
196
+ dim: 768
197
+ is_external: true
198
+ is_proprietary: false
199
+ is_sentence_transformers_compatible: true
200
+ bert-base-multilingual-cased:
201
+ link: https://huggingface.co/google-bert/bert-base-multilingual-cased
202
+ seq_len: 512
203
+ size: 179
204
+ dim: 768
205
+ is_external: true
206
+ is_proprietary: false
207
+ is_sentence_transformers_compatible: true
208
+ bert-base-multilingual-uncased:
209
+ link: https://huggingface.co/google-bert/bert-base-multilingual-uncased
210
+ seq_len: 512
211
+ size: 168
212
+ dim: 768
213
+ is_external: true
214
+ is_proprietary: false
215
+ is_sentence_transformers_compatible: true
216
+ bert-base-swedish-cased:
217
+ link: https://huggingface.co/KB/bert-base-swedish-cased
218
+ seq_len: 512
219
+ size: 125
220
+ dim: 768
221
+ is_external: true
222
+ is_proprietary: false
223
+ is_sentence_transformers_compatible: true
224
+ bert-base-uncased:
225
+ link: https://huggingface.co/bert-base-uncased
226
+ seq_len: 512
227
+ size: 110
228
+ dim: 768
229
+ is_external: true
230
+ is_proprietary: false
231
+ is_sentence_transformers_compatible: true
232
+ bge-base-zh-v1.5:
233
+ link: https://huggingface.co/BAAI/bge-base-zh-v1.5
234
+ seq_len: 512
235
+ size: 102
236
+ dim: 768
237
+ is_external: true
238
+ is_proprietary: false
239
+ is_sentence_transformers_compatible: true
240
+ bge-large-en-v1.5:
241
+ link: https://huggingface.co/BAAI/bge-large-en-v1.5
242
+ seq_len: 512
243
+ size: null
244
+ dim: 1024
245
+ is_external: true
246
+ is_proprietary: false
247
+ is_sentence_transformers_compatible: false
248
+ bge-large-zh-noinstruct:
249
+ link: https://huggingface.co/BAAI/bge-large-zh-noinstruct
250
+ seq_len: 512
251
+ size: 326
252
+ dim: 1024
253
+ is_external: true
254
+ is_proprietary: false
255
+ is_sentence_transformers_compatible: true
256
+ bge-large-zh-v1.5:
257
+ link: https://huggingface.co/BAAI/bge-large-zh-v1.5
258
+ seq_len: 512
259
+ size: 326
260
+ dim: 1024
261
+ is_external: true
262
+ is_proprietary: false
263
+ is_sentence_transformers_compatible: true
264
+ bge-small-zh-v1.5:
265
+ link: https://huggingface.co/BAAI/bge-small-zh-v1.5
266
+ seq_len: 512
267
+ size: 24
268
+ dim: 512
269
+ is_external: true
270
+ is_proprietary: false
271
+ is_sentence_transformers_compatible: true
272
+ bm25:
273
+ link: https://en.wikipedia.org/wiki/Okapi_BM25
274
+ size: 0
275
+ is_external: true
276
+ is_proprietary: false
277
+ is_sentence_transformers_compatible: false
278
+ camembert-base:
279
+ link: https://huggingface.co/almanach/camembert-base
280
+ seq_len: 512
281
+ size: 111
282
+ dim: 512
283
+ is_external: false
284
+ is_proprietary: false
285
+ is_sentence_transformers_compatible: true
286
+ camembert-large:
287
+ link: https://huggingface.co/almanach/camembert-large
288
+ seq_len: 512
289
+ size: 338
290
+ dim: 768
291
+ is_external: false
292
+ is_proprietary: false
293
+ is_sentence_transformers_compatible: true
294
+ contriever-base-msmarco:
295
+ link: https://huggingface.co/nthakur/contriever-base-msmarco
296
+ seq_len: 512
297
+ size: 110
298
+ dim: 768
299
+ is_external: true
300
+ is_proprietary: false
301
+ is_sentence_transformers_compatible: true
302
+ cross-en-de-roberta-sentence-transformer:
303
+ link: https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer
304
+ seq_len: 514
305
+ size: 278
306
+ dim: 768
307
+ is_external: true
308
+ is_proprietary: false
309
+ is_sentence_transformers_compatible: true
310
+ dfm-encoder-large-v1:
311
+ link: https://huggingface.co/chcaa/dfm-encoder-large-v1
312
+ seq_len: 512
313
+ size: 355
314
+ dim: 1024
315
+ is_external: true
316
+ is_proprietary: false
317
+ is_sentence_transformers_compatible: true
318
+ dfm-sentence-encoder-large-1:
319
+ link: https://huggingface.co/chcaa/dfm-encoder-large-v1
320
+ seq_len: 512
321
+ size: 355
322
+ dim: 1024
323
+ is_external: true
324
+ is_proprietary: false
325
+ is_sentence_transformers_compatible: true
326
+ distilbert-base-25lang-cased:
327
+ link: https://huggingface.co/Geotrend/distilbert-base-25lang-cased
328
+ seq_len: 512
329
+ size: 110
330
+ dim: 768
331
+ is_external: false
332
+ is_proprietary: false
333
+ is_sentence_transformers_compatible: true
334
+ distilbert-base-en-fr-cased:
335
+ link: https://huggingface.co/Geotrend/distilbert-base-en-fr-cased
336
+ seq_len: 512
337
+ size: 110
338
+ dim: 768
339
+ is_external: false
340
+ is_proprietary: false
341
+ is_sentence_transformers_compatible: true
342
+ distilbert-base-en-fr-es-pt-it-cased:
343
+ link: https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased
344
+ seq_len: 512
345
+ size: 110
346
+ dim: 768
347
+ is_external: false
348
+ is_proprietary: false
349
+ is_sentence_transformers_compatible: true
350
+ distilbert-base-fr-cased:
351
+ link: https://huggingface.co/Geotrend/distilbert-base-fr-cased
352
+ seq_len: 512
353
+ size: 110
354
+ dim: 768
355
+ is_external: false
356
+ is_proprietary: false
357
+ is_sentence_transformers_compatible: true
358
+ distilbert-base-uncased:
359
+ link: https://huggingface.co/distilbert-base-uncased
360
+ seq_len: 512
361
+ size: 110
362
+ dim: 768
363
+ is_external: false
364
+ is_proprietary: false
365
+ is_sentence_transformers_compatible: true
366
+ distiluse-base-multilingual-cased-v2:
367
+ link: https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2
368
+ seq_len: 512
369
+ size: 135
370
+ dim: 512
371
+ is_external: true
372
+ is_proprietary: false
373
+ is_sentence_transformers_compatible: true
374
+ e5-base-v2:
375
+ link: https://huggingface.co/intfloat/e5-base-v2
376
+ seq_len: 512
377
+ size: 110
378
+ dim: 768
379
+ is_external: true
380
+ is_proprietary: false
381
+ is_sentence_transformers_compatible: true
382
+ e5-base:
383
+ link: https://huggingface.co/intfloat/e5-base
384
+ seq_len: 512
385
+ size: 110
386
+ dim: 768
387
+ is_external: true
388
+ is_proprietary: false
389
+ is_sentence_transformers_compatible: true
390
+ e5-large-v2:
391
+ link: https://huggingface.co/intfloat/e5-large-v2
392
+ seq_len: 512
393
+ size: 335
394
+ dim: 1024
395
+ is_external: true
396
+ is_proprietary: false
397
+ is_sentence_transformers_compatible: true
398
+ e5-large:
399
+ link: https://huggingface.co/intfloat/e5-large
400
+ seq_len: 512
401
+ size: 335
402
+ dim: 1024
403
+ is_external: true
404
+ is_proprietary: false
405
+ is_sentence_transformers_compatible: true
406
+ e5-mistral-7b-instruct:
407
+ link: https://huggingface.co/intfloat/e5-mistral-7b-instruct
408
+ seq_len: 32768
409
+ size: 7111
410
+ dim: 4096
411
+ is_external: true
412
+ is_proprietary: false
413
+ is_sentence_transformers_compatible: true
414
+ e5-small:
415
+ link: https://huggingface.co/intfloat/e5-small
416
+ seq_len: 512
417
+ size: 33
418
+ dim: 384
419
+ is_external: true
420
+ is_proprietary: false
421
+ is_sentence_transformers_compatible: true
422
+ electra-small-nordic:
423
+ link: https://huggingface.co/jonfd/electra-small-nordic
424
+ seq_len: 512
425
+ size: 23
426
+ dim: 256
427
+ is_external: true
428
+ is_proprietary: false
429
+ is_sentence_transformers_compatible: true
430
+ electra-small-swedish-cased-discriminator:
431
+ link: https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator
432
+ seq_len: 512
433
+ size: 16
434
+ dim: 256
435
+ is_external: true
436
+ is_proprietary: false
437
+ is_sentence_transformers_compatible: true
438
+ flan-t5-base:
439
+ link: https://huggingface.co/google/flan-t5-base
440
+ seq_len: 512
441
+ size: 220
442
+ dim: -1
443
+ is_external: true
444
+ is_proprietary: false
445
+ is_sentence_transformers_compatible: true
446
+ flan-t5-large:
447
+ link: https://huggingface.co/google/flan-t5-large
448
+ seq_len: 512
449
+ size: 770
450
+ dim: -1
451
+ is_external: true
452
+ is_proprietary: false
453
+ is_sentence_transformers_compatible: true
454
+ flaubert_base_cased:
455
+ link: https://huggingface.co/flaubert/flaubert_base_cased
456
+ seq_len: 512
457
+ size: 138
458
+ dim: 768
459
+ is_external: true
460
+ is_proprietary: false
461
+ is_sentence_transformers_compatible: true
462
+ flaubert_base_uncased:
463
+ link: https://huggingface.co/flaubert/flaubert_base_uncased
464
+ seq_len: 512
465
+ size: 138
466
+ dim: 768
467
+ is_external: true
468
+ is_proprietary: false
469
+ is_sentence_transformers_compatible: true
470
+ flaubert_large_cased:
471
+ link: https://huggingface.co/flaubert/flaubert_large_cased
472
+ seq_len: 512
473
+ size: 372
474
+ dim: 1024
475
+ is_external: true
476
+ is_proprietary: false
477
+ is_sentence_transformers_compatible: true
478
+ gbert-base:
479
+ link: https://huggingface.co/deepset/gbert-base
480
+ seq_len: 512
481
+ size: 110
482
+ dim: 768
483
+ is_external: true
484
+ is_proprietary: false
485
+ is_sentence_transformers_compatible: true
486
+ gbert-large:
487
+ link: https://huggingface.co/deepset/gbert-large
488
+ seq_len: 512
489
+ size: 337
490
+ dim: 1024
491
+ is_external: true
492
+ is_proprietary: false
493
+ is_sentence_transformers_compatible: true
494
+ gelectra-base:
495
+ link: https://huggingface.co/deepset/gelectra-base
496
+ seq_len: 512
497
+ size: 110
498
+ dim: 768
499
+ is_external: true
500
+ is_proprietary: false
501
+ is_sentence_transformers_compatible: true
502
+ gelectra-large:
503
+ link: https://huggingface.co/deepset/gelectra-large
504
+ seq_len: 512
505
+ size: 335
506
+ dim: 1024
507
+ is_external: true
508
+ is_proprietary: false
509
+ is_sentence_transformers_compatible: true
510
+ glove.6B.300d:
511
+ link: https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d
512
+ seq_len: N/A
513
+ size: 120
514
+ dim: 300
515
+ is_external: true
516
+ is_proprietary: false
517
+ is_sentence_transformers_compatible: true
518
+ google-gecko-256.text-embedding-preview-0409:
519
+ link: https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models
520
+ seq_len: 2048
521
+ size: 1200
522
+ dim: 256
523
+ is_external: true
524
+ is_proprietary: true
525
+ is_sentence_transformers_compatible: false
526
+ google-gecko.text-embedding-preview-0409:
527
+ link: https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models
528
+ seq_len: 2048
529
+ size: 1200
530
+ dim: 768
531
+ is_external: true
532
+ is_proprietary: true
533
+ is_sentence_transformers_compatible: false
534
+ gottbert-base:
535
+ link: https://huggingface.co/uklfr/gottbert-base
536
+ seq_len: 512
537
+ size: 127
538
+ dim: 768
539
+ is_external: true
540
+ is_proprietary: false
541
+ is_sentence_transformers_compatible: true
542
+ gtr-t5-base:
543
+ link: https://huggingface.co/sentence-transformers/gtr-t5-base
544
+ seq_len: 512
545
+ size: 110
546
+ dim: 768
547
+ is_external: true
548
+ is_proprietary: false
549
+ is_sentence_transformers_compatible: true
550
+ gtr-t5-large:
551
+ link: https://huggingface.co/sentence-transformers/gtr-t5-large
552
+ seq_len: 512
553
+ size: 168
554
+ dim: 768
555
+ is_external: true
556
+ is_proprietary: false
557
+ is_sentence_transformers_compatible: true
558
+ gtr-t5-xl:
559
+ link: https://huggingface.co/sentence-transformers/gtr-t5-xl
560
+ seq_len: 512
561
+ size: 1240
562
+ dim: 768
563
+ is_external: true
564
+ is_proprietary: false
565
+ is_sentence_transformers_compatible: true
566
+ gtr-t5-xxl:
567
+ link: https://huggingface.co/sentence-transformers/gtr-t5-xxl
568
+ seq_len: 512
569
+ size: 4865
570
+ dim: 768
571
+ is_external: true
572
+ is_proprietary: false
573
+ is_sentence_transformers_compatible: true
574
+ herbert-base-retrieval-v2:
575
+ link: https://huggingface.co/ipipan/herbert-base-retrieval-v2
576
+ seq_len: 514
577
+ size: 125
578
+ dim: 768
579
+ is_external: true
580
+ is_proprietary: false
581
+ is_sentence_transformers_compatible: true
582
+ instructor-base:
583
+ link: https://huggingface.co/hkunlp/instructor-base
584
+ seq_len: N/A
585
+ size: 110
586
+ dim: 768
587
+ is_external: true
588
+ is_proprietary: false
589
+ is_sentence_transformers_compatible: true
590
+ instructor-xl:
591
+ link: https://huggingface.co/hkunlp/instructor-xl
592
+ seq_len: N/A
593
+ size: 1241
594
+ dim: 768
595
+ is_external: true
596
+ is_proprietary: false
597
+ is_sentence_transformers_compatible: true
598
+ komninos:
599
+ link: https://huggingface.co/sentence-transformers/average_word_embeddings_komninos
600
+ seq_len: N/A
601
+ size: 134
602
+ dim: 300
603
+ is_external: true
604
+ is_proprietary: false
605
+ is_sentence_transformers_compatible: true
606
+ llama-2-7b-chat:
607
+ link: https://huggingface.co/meta-llama/Llama-2-7b-chat-hf
608
+ seq_len: 4096
609
+ size: 7000
610
+ dim: -1
611
+ is_external: true
612
+ is_proprietary: false
613
+ is_sentence_transformers_compatible: false
614
+ luotuo-bert-medium:
615
+ link: https://huggingface.co/silk-road/luotuo-bert-medium
616
+ seq_len: 512
617
+ size: 328
618
+ dim: 768
619
+ is_external: true
620
+ is_proprietary: false
621
+ is_sentence_transformers_compatible: true
622
+ m3e-base:
623
+ link: https://huggingface.co/moka-ai/m3e-base
624
+ seq_len: 512
625
+ size: 102
626
+ dim: 768
627
+ is_external: true
628
+ is_proprietary: false
629
+ is_sentence_transformers_compatible: true
630
+ m3e-large:
631
+ link: https://huggingface.co/moka-ai/m3e-large
632
+ seq_len: 512
633
+ size: 102
634
+ dim: 768
635
+ is_external: true
636
+ is_proprietary: false
637
+ is_sentence_transformers_compatible: true
638
+ mistral-7b-instruct-v0.2:
639
+ link: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2
640
+ seq_len: 4096
641
+ size: 7240
642
+ dim: -1
643
+ is_external: true
644
+ is_proprietary: false
645
+ is_sentence_transformers_compatible: false
646
+ mistral-embed:
647
+ link: https://docs.mistral.ai/guides/embeddings
648
+ seq_len: null
649
+ size: null
650
+ dim: 1024
651
+ is_external: true
652
+ is_proprietary: true
653
+ is_sentence_transformers_compatible: false
654
+ monobert-large-msmarco:
655
+ link: https://huggingface.co/castorini/monobert-large-msmarco
656
+ seq_len: 512
657
+ size: 770
658
+ dim: -1
659
+ is_external: true
660
+ is_proprietary: false
661
+ is_sentence_transformers_compatible: false
662
+ monot5-3b-msmarco-10k:
663
+ link: https://huggingface.co/castorini/monot5-3b-msmarco-10k
664
+ seq_len: 512
665
+ size: 2480
666
+ dim: -1
667
+ is_external: true
668
+ is_proprietary: false
669
+ is_sentence_transformers_compatible: false
670
+ monot5-base-msmarco-10k:
671
+ link: https://huggingface.co/castorini/monot5-base-msmarco-10k
672
+ seq_len: 512
673
+ size: 220
674
+ dim: -1
675
+ is_external: true
676
+ is_proprietary: false
677
+ is_sentence_transformers_compatible: false
678
+ msmarco-bert-co-condensor:
679
+ link: https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor
680
+ seq_len: 512
681
+ size: 110
682
+ dim: 768
683
+ is_external: true
684
+ is_proprietary: false
685
+ is_sentence_transformers_compatible: true
686
+ multi-qa-MiniLM-L6-cos-v1:
687
+ link: https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1
688
+ seq_len: 512
689
+ size: 23
690
+ dim: 384
691
+ is_external: true
692
+ is_proprietary: false
693
+ is_sentence_transformers_compatible: true
694
+ multilingual-e5-base:
695
+ link: https://huggingface.co/intfloat/multilingual-e5-base
696
+ seq_len: 514
697
+ size: 278
698
+ dim: 768
699
+ is_external: true
700
+ is_proprietary: false
701
+ is_sentence_transformers_compatible: true
702
+ multilingual-e5-large:
703
+ link: https://huggingface.co/intfloat/multilingual-e5-large
704
+ seq_len: 514
705
+ size: 560
706
+ dim: 1024
707
+ is_external: true
708
+ is_proprietary: false
709
+ is_sentence_transformers_compatible: true
710
+ multilingual-e5-small:
711
+ link: https://huggingface.co/intfloat/multilingual-e5-small
712
+ seq_len: 512
713
+ size: 118
714
+ dim: 384
715
+ is_external: true
716
+ is_proprietary: false
717
+ is_sentence_transformers_compatible: true
718
+ nb-bert-base:
719
+ link: https://huggingface.co/NbAiLab/nb-bert-base
720
+ seq_len: 512
721
+ size: 179
722
+ dim: 768
723
+ is_external: true
724
+ is_proprietary: false
725
+ is_sentence_transformers_compatible: true
726
+ nb-bert-large:
727
+ link: https://huggingface.co/NbAiLab/nb-bert-large
728
+ seq_len: 512
729
+ size: 355
730
+ dim: 1024
731
+ is_external: true
732
+ is_proprietary: false
733
+ is_sentence_transformers_compatible: true
734
+ nomic-embed-text-v1.5-128:
735
+ link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
736
+ seq_len: 8192
737
+ size: 138
738
+ dim: 128
739
+ is_external: true
740
+ is_proprietary: false
741
+ is_sentence_transformers_compatible: true
742
+ nomic-embed-text-v1.5-256:
743
+ link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
744
+ seq_len: 8192
745
+ size: 138
746
+ dim: 256
747
+ is_external: true
748
+ is_proprietary: false
749
+ is_sentence_transformers_compatible: true
750
+ nomic-embed-text-v1.5-512:
751
+ link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
752
+ seq_len: 8192
753
+ size: 138
754
+ dim: 512
755
+ is_external: true
756
+ is_proprietary: false
757
+ is_sentence_transformers_compatible: true
758
+ nomic-embed-text-v1.5-64:
759
+ link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
760
+ seq_len: 8192
761
+ size: 138
762
+ dim: 64
763
+ is_external: true
764
+ is_proprietary: false
765
+ is_sentence_transformers_compatible: true
766
+ norbert3-base:
767
+ link: https://huggingface.co/ltg/norbert3-base
768
+ seq_len: 512
769
+ size: 131
770
+ dim: 768
771
+ is_external: true
772
+ is_proprietary: false
773
+ is_sentence_transformers_compatible: true
774
+ norbert3-large:
775
+ link: https://huggingface.co/ltg/norbert3-large
776
+ seq_len: 512
777
+ size: 368
778
+ dim: 1024
779
+ is_external: true
780
+ is_proprietary: false
781
+ is_sentence_transformers_compatible: true
782
+ paraphrase-multilingual-MiniLM-L12-v2:
783
+ link: https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
784
+ seq_len: 512
785
+ size: 118
786
+ dim: 384
787
+ is_external: true
788
+ is_proprietary: false
789
+ is_sentence_transformers_compatible: true
790
+ paraphrase-multilingual-mpnet-base-v2:
791
+ link: https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2
792
+ seq_len: 514
793
+ size: 278
794
+ dim: 768
795
+ is_external: true
796
+ is_proprietary: false
797
+ is_sentence_transformers_compatible: true
798
+ sentence-bert-swedish-cased:
799
+ link: https://huggingface.co/KBLab/sentence-bert-swedish-cased
800
+ seq_len: 512
801
+ size: 125
802
+ dim: 768
803
+ is_external: true
804
+ is_proprietary: false
805
+ is_sentence_transformers_compatible: true
806
+ sentence-camembert-base:
807
+ link: https://huggingface.co/dangvantuan/sentence-camembert-base
808
+ seq_len: 512
809
+ size: 110
810
+ dim: 768
811
+ is_external: true
812
+ is_proprietary: false
813
+ is_sentence_transformers_compatible: true
814
+ sentence-camembert-large:
815
+ link: https://huggingface.co/dangvantuan/sentence-camembert-large
816
+ seq_len: 512
817
+ size: 337
818
+ dim: 1024
819
+ is_external: true
820
+ is_proprietary: false
821
+ is_sentence_transformers_compatible: true
822
+ sentence-croissant-llm-base:
823
+ link: https://huggingface.co/Wissam42/sentence-croissant-llm-base
824
+ seq_len: 2048
825
+ size: 1280
826
+ dim: 2048
827
+ is_external: true
828
+ is_proprietary: false
829
+ is_sentence_transformers_compatible: true
830
+ sentence-t5-base:
831
+ link: https://huggingface.co/sentence-transformers/sentence-t5-base
832
+ seq_len: 512
833
+ size: 110
834
+ dim: 768
835
+ is_external: true
836
+ is_proprietary: false
837
+ is_sentence_transformers_compatible: true
838
+ sentence-t5-large:
839
+ link: https://huggingface.co/sentence-transformers/sentence-t5-large
840
+ seq_len: 512
841
+ size: 168
842
+ dim: 768
843
+ is_external: true
844
+ is_proprietary: false
845
+ is_sentence_transformers_compatible: true
846
+ sentence-t5-xl:
847
+ link: https://huggingface.co/sentence-transformers/sentence-t5-xl
848
+ seq_len: 512
849
+ size: 1240
850
+ dim: 768
851
+ is_external: true
852
+ is_proprietary: false
853
+ is_sentence_transformers_compatible: true
854
+ sentence-t5-xxl:
855
+ link: https://huggingface.co/sentence-transformers/sentence-t5-xxl
856
+ seq_len: 512
857
+ size: 4865
858
+ dim: 768
859
+ is_external: true
860
+ is_proprietary: false
861
+ is_sentence_transformers_compatible: true
862
+ silver-retriever-base-v1:
863
+ link: https://huggingface.co/ipipan/silver-retriever-base-v1
864
+ seq_len: 514
865
+ size: 125
866
+ dim: 768
867
+ is_external: true
868
+ is_proprietary: false
869
+ is_sentence_transformers_compatible: true
870
+ st-polish-paraphrase-from-distilroberta:
871
+ link: https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta
872
+ seq_len: 514
873
+ size: 125
874
+ dim: 768
875
+ is_external: true
876
+ is_proprietary: false
877
+ is_sentence_transformers_compatible: true
878
+ st-polish-paraphrase-from-mpnet:
879
+ link: https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet
880
+ seq_len: 514
881
+ size: 125
882
+ dim: 768
883
+ is_external: true
884
+ is_proprietary: false
885
+ is_sentence_transformers_compatible: true
886
+ sup-simcse-bert-base-uncased:
887
+ link: https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased
888
+ seq_len: 512
889
+ size: 110
890
+ dim: 768
891
+ is_external: true
892
+ is_proprietary: false
893
+ is_sentence_transformers_compatible: true
894
+ text-embedding-3-large:
895
+ link: https://openai.com/blog/new-embedding-models-and-api-updates
896
+ seq_len: 8191
897
+ size: null
898
+ dim: 3072
899
+ is_external: true
900
+ is_proprietary: true
901
+ is_sentence_transformers_compatible: false
902
+ text-embedding-3-large-256:
903
+ link: https://openai.com/blog/new-embedding-models-and-api-updates
904
+ seq_len: 8191
905
+ size: null
906
+ dim: 256
907
+ is_external: true
908
+ is_proprietary: true
909
+ is_sentence_transformers_compatible: false
910
+ text-embedding-3-small:
911
+ link: https://openai.com/blog/new-embedding-models-and-api-updates
912
+ seq_len: 8191
913
+ size: null
914
+ dim: 1536
915
+ is_external: true
916
+ is_proprietary: true
917
+ is_sentence_transformers_compatible: false
918
+ text-embedding-ada-002:
919
+ link: https://openai.com/blog/new-and-improved-embedding-model
920
+ seq_len: 8191
921
+ size: null
922
+ dim: 1536
923
+ is_external: true
924
+ is_proprietary: true
925
+ is_sentence_transformers_compatible: false
926
+ text-search-ada-001:
927
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
928
+ seq_len: 2046
929
+ size: null
930
+ dim: 1024
931
+ is_external: true
932
+ is_proprietary: true
933
+ is_sentence_transformers_compatible: false
934
+ text-search-ada-doc-001:
935
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
936
+ seq_len: 2046
937
+ size: null
938
+ dim: 1024
939
+ is_external: true
940
+ is_proprietary: true
941
+ is_sentence_transformers_compatible: false
942
+ text-search-ada-query-001:
943
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
944
+ seq_len: 2046
945
+ size: null
946
+ dim: 1024
947
+ is_external: false
948
+ is_proprietary: true
949
+ is_sentence_transformers_compatible: false
950
+ text-search-babbage-001:
951
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
952
+ seq_len: 2046
953
+ size: null
954
+ dim: 2048
955
+ is_external: true
956
+ is_proprietary: true
957
+ is_sentence_transformers_compatible: false
958
+ text-search-curie-001:
959
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
960
+ seq_len: 2046
961
+ size: null
962
+ dim: 4096
963
+ is_external: true
964
+ is_proprietary: true
965
+ is_sentence_transformers_compatible: false
966
+ text-search-davinci-001:
967
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
968
+ seq_len: 2046
969
+ size: null
970
+ dim: 12288
971
+ is_external: true
972
+ is_proprietary: true
973
+ is_sentence_transformers_compatible: false
974
+ text-similarity-ada-001:
975
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
976
+ seq_len: 2046
977
+ size: null
978
+ dim: 1024
979
+ is_external: true
980
+ is_proprietary: true
981
+ is_sentence_transformers_compatible: false
982
+ text-similarity-babbage-001:
983
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
984
+ seq_len: 2046
985
+ size: null
986
+ dim: 2048
987
+ is_external: true
988
+ is_proprietary: true
989
+ is_sentence_transformers_compatible: false
990
+ text-similarity-curie-001:
991
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
992
+ seq_len: 2046
993
+ size: null
994
+ dim: 4096
995
+ is_external: true
996
+ is_proprietary: true
997
+ is_sentence_transformers_compatible: false
998
+ text-similarity-davinci-001:
999
+ link: https://openai.com/blog/introducing-text-and-code-embeddings
1000
+ seq_len: 2046
1001
+ size: null
1002
+ dim: 12288
1003
+ is_external: true
1004
+ is_proprietary: true
1005
+ is_sentence_transformers_compatible: false
1006
+ tart-dual-contriever-msmarco:
1007
+ link: https://huggingface.co/orionweller/tart-dual-contriever-msmarco
1008
+ seq_len: 512
1009
+ size: 110
1010
+ dim: 768
1011
+ is_external: true
1012
+ is_proprietary: false
1013
+ is_sentence_transformers_compatible: false
1014
+ tart-full-flan-t5-xl:
1015
+ link: https://huggingface.co/facebook/tart-full-flan-t5-xl
1016
+ seq_len: 512
1017
+ size: 2480
1018
+ dim: -1
1019
+ is_external: true
1020
+ is_proprietary: false
1021
+ is_sentence_transformers_compatible: false
1022
+ text2vec-base-chinese:
1023
+ link: https://huggingface.co/shibing624/text2vec-base-chinese
1024
+ seq_len: 512
1025
+ size: 102
1026
+ dim: 768
1027
+ is_external: true
1028
+ is_proprietary: false
1029
+ is_sentence_transformers_compatible: true
1030
+ text2vec-base-multilingual:
1031
+ link: null
1032
+ seq_len: null
1033
+ size: null
1034
+ dim: null
1035
+ is_external: true
1036
+ is_proprietary: false
1037
+ is_sentence_transformers_compatible: false
1038
+ text2vec-large-chinese:
1039
+ link: https://huggingface.co/GanymedeNil/text2vec-large-chinese
1040
+ seq_len: 512
1041
+ size: 326
1042
+ dim: 1024
1043
+ is_external: true
1044
+ is_proprietary: false
1045
+ is_sentence_transformers_compatible: true
1046
+ titan-embed-text-v1:
1047
+ link: https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html
1048
+ seq_len: 8000
1049
+ size: null
1050
+ dim: 1536
1051
+ is_external: true
1052
+ is_proprietary: true
1053
+ is_sentence_transformers_compatible: false
1054
+ udever-bloom-1b1:
1055
+ link: https://huggingface.co/izhx/udever-bloom-1b1
1056
+ seq_len: 2048
1057
+ size: null
1058
+ dim: 1536
1059
+ is_external: true
1060
+ is_proprietary: false
1061
+ is_sentence_transformers_compatible: true
1062
+ udever-bloom-560m:
1063
+ link: https://huggingface.co/izhx/udever-bloom-560m
1064
+ seq_len: 2048
1065
+ size: null
1066
+ dim: 1024
1067
+ is_external: true
1068
+ is_proprietary: false
1069
+ is_sentence_transformers_compatible: true
1070
+ universal-sentence-encoder-multilingual-3:
1071
+ link: https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-3
1072
+ seq_len: 512
1073
+ size: null
1074
+ dim: 512
1075
+ is_external: true
1076
+ is_proprietary: false
1077
+ is_sentence_transformers_compatible: true
1078
+ universal-sentence-encoder-multilingual-large-3:
1079
+ link: https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-large-3
1080
+ seq_len: 512
1081
+ size: null
1082
+ dim: 512
1083
+ is_external: true
1084
+ is_proprietary: false
1085
+ is_sentence_transformers_compatible: true
1086
+ unsup-simcse-bert-base-uncased:
1087
+ link: https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased
1088
+ seq_len: 512
1089
+ size: 110
1090
+ dim: 768
1091
+ is_external: true
1092
+ is_proprietary: false
1093
+ is_sentence_transformers_compatible: true
1094
+ use-cmlm-multilingual:
1095
+ link: https://huggingface.co/sentence-transformers/use-cmlm-multilingual
1096
+ seq_len: 512
1097
+ size: 472
1098
+ dim: 768
1099
+ is_external: true
1100
+ is_proprietary: false
1101
+ is_sentence_transformers_compatible: true
1102
+ voyage-2:
1103
+ link: https://docs.voyageai.com/embeddings/
1104
+ seq_len: 1024
1105
+ size: null
1106
+ dim: 1024
1107
+ is_external: true
1108
+ is_proprietary: true
1109
+ is_sentence_transformers_compatible: false
1110
+ voyage-code-2:
1111
+ link: https://docs.voyageai.com/embeddings/
1112
+ seq_len: 16000
1113
+ size: null
1114
+ dim: 1536
1115
+ is_external: true
1116
+ is_proprietary: true
1117
+ is_sentence_transformers_compatible: false
1118
+ voyage-large-2-instruct:
1119
+ link: https://docs.voyageai.com/embeddings/
1120
+ seq_len: 16000
1121
+ size: null
1122
+ dim: 1024
1123
+ is_external: true
1124
+ is_proprietary: false
1125
+ is_sentence_transformers_compatible: false
1126
+ voyage-law-2:
1127
+ link: https://docs.voyageai.com/embeddings/
1128
+ seq_len: 4000
1129
+ size: null
1130
+ dim: 1024
1131
+ is_external: true
1132
+ is_proprietary: true
1133
+ is_sentence_transformers_compatible: false
1134
+ voyage-lite-01-instruct:
1135
+ link: https://docs.voyageai.com/embeddings/
1136
+ seq_len: 4000
1137
+ size: null
1138
+ dim: 1024
1139
+ is_external: true
1140
+ is_proprietary: true
1141
+ is_sentence_transformers_compatible: false
1142
+ voyage-lite-02-instruct:
1143
+ link: https://docs.voyageai.com/embeddings/
1144
+ seq_len: 4000
1145
+ size: 1220
1146
+ dim: 1024
1147
+ is_external: true
1148
+ is_proprietary: true
1149
+ is_sentence_transformers_compatible: false
1150
+ xlm-roberta-base:
1151
+ link: https://huggingface.co/xlm-roberta-base
1152
+ seq_len: 514
1153
+ size: 279
1154
+ dim: 768
1155
+ is_external: true
1156
+ is_proprietary: false
1157
+ is_sentence_transformers_compatible: true
1158
+ xlm-roberta-large:
1159
+ link: https://huggingface.co/xlm-roberta-large
1160
+ seq_len: 514
1161
+ size: 560
1162
+ dim: 1024
1163
+ is_external: true
1164
+ is_proprietary: false
1165
+ is_sentence_transformers_compatible: true
1166
+ models_to_skip:
1167
+ - michaelfeil/ct2fast-e5-large-v2
1168
+ - McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-unsup-simcse
1169
+ - newsrx/instructor-xl
1170
+ - sionic-ai/sionic-ai-v1
1171
+ - lsf1000/bge-evaluation
1172
+ - Intel/bge-small-en-v1.5-sst2
1173
+ - newsrx/instructor-xl-newsrx
1174
+ - McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-unsup-simcse
1175
+ - McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-unsup-simcse
1176
+ - davidpeer/gte-small
1177
+ - goldenrooster/multilingual-e5-large
1178
+ - kozistr/fused-large-en
1179
+ - mixamrepijey/instructor-small
1180
+ - McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-supervised
1181
+ - DecisionOptimizationSystem/DeepFeatEmbeddingLargeContext
1182
+ - Intel/bge-base-en-v1.5-sst2-int8-dynamic
1183
+ - morgendigital/multilingual-e5-large-quantized
1184
+ - BAAI/bge-small-en
1185
+ - ggrn/e5-small-v2
1186
+ - vectoriseai/gte-small
1187
+ - giulio98/placeholder
1188
+ - odunola/UAE-Large-VI
1189
+ - vectoriseai/e5-large-v2
1190
+ - gruber/e5-small-v2-ggml
1191
+ - Severian/nomic
1192
+ - arcdev/e5-mistral-7b-instruct
1193
+ - mlx-community/multilingual-e5-base-mlx
1194
+ - michaelfeil/ct2fast-bge-base-en-v1.5
1195
+ - Intel/bge-small-en-v1.5-sst2-int8-static
1196
+ - jncraton/stella-base-en-v2-ct2-int8
1197
+ - vectoriseai/multilingual-e5-large
1198
+ - rlsChapters/Chapters-SFR-Embedding-Mistral
1199
+ - arcdev/SFR-Embedding-Mistral
1200
+ - McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp-supervised
1201
+ - McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised
1202
+ - vectoriseai/gte-base
1203
+ - mixamrepijey/instructor-models
1204
+ - GovCompete/e5-large-v2
1205
+ - ef-zulla/e5-multi-sml-torch
1206
+ - khoa-klaytn/bge-small-en-v1.5-angle
1207
+ - krilecy/e5-mistral-7b-instruct
1208
+ - vectoriseai/bge-base-en-v1.5
1209
+ - vectoriseai/instructor-base
1210
+ - jingyeom/korean_embedding_model
1211
+ - rizki/bgr-tf
1212
+ - barisaydin/bge-base-en
1213
+ - jamesgpt1/zzz
1214
+ - Malmuk1/e5-large-v2_Sharded
1215
+ - vectoriseai/ember-v1
1216
+ - Consensus/instructor-base
1217
+ - barisaydin/bge-small-en
1218
+ - barisaydin/gte-base
1219
+ - woody72/multilingual-e5-base
1220
+ - Einas/einas_ashkar
1221
+ - michaelfeil/ct2fast-bge-large-en-v1.5
1222
+ - vectoriseai/bge-small-en-v1.5
1223
+ - iampanda/Test
1224
+ - cherubhao/yogamodel
1225
+ - ieasybooks/multilingual-e5-large-onnx
1226
+ - jncraton/e5-small-v2-ct2-int8
1227
+ - radames/e5-large
1228
+ - khoa-klaytn/bge-base-en-v1.5-angle
1229
+ - Intel/bge-base-en-v1.5-sst2-int8-static
1230
+ - vectoriseai/e5-large
1231
+ - TitanML/jina-v2-base-en-embed
1232
+ - Koat/gte-tiny
1233
+ - binqiangliu/EmbeddingModlebgelargeENv1.5
1234
+ - beademiguelperez/sentence-transformers-multilingual-e5-small
1235
+ - sionic-ai/sionic-ai-v2
1236
+ - jamesdborin/jina-v2-base-en-embed
1237
+ - maiyad/multilingual-e5-small
1238
+ - dmlls/all-mpnet-base-v2
1239
+ - odunola/e5-base-v2
1240
+ - vectoriseai/bge-large-en-v1.5
1241
+ - vectoriseai/bge-small-en
1242
+ - karrar-alwaili/UAE-Large-V1
1243
+ - t12e/instructor-base
1244
+ - Frazic/udever-bloom-3b-sentence
1245
+ - Geolumina/instructor-xl
1246
+ - hsikchi/dump
1247
+ - recipe/embeddings
1248
+ - michaelfeil/ct2fast-bge-small-en-v1.5
1249
+ - ildodeltaRule/multilingual-e5-large
1250
+ - shubham-bgi/UAE-Large
1251
+ - BAAI/bge-large-en
1252
+ - michaelfeil/ct2fast-e5-small-v2
1253
+ - cgldo/semanticClone
1254
+ - barisaydin/gte-small
1255
+ - aident-ai/bge-base-en-onnx
1256
+ - jamesgpt1/english-large-v1
1257
+ - michaelfeil/ct2fast-e5-small
1258
+ - baseplate/instructor-large-1
1259
+ - newsrx/instructor-large
1260
+ - Narsil/bge-base-en
1261
+ - michaelfeil/ct2fast-e5-large
1262
+ - mlx-community/multilingual-e5-small-mlx
1263
+ - lightbird-ai/nomic
1264
+ - MaziyarPanahi/GritLM-8x7B-GGUF
1265
+ - newsrx/instructor-large-newsrx
1266
+ - dhairya0907/thenlper-get-large
1267
+ - barisaydin/bge-large-en
1268
+ - jncraton/bge-small-en-ct2-int8
1269
+ - retrainai/instructor-xl
1270
+ - BAAI/bge-base-en
1271
+ - gentlebowl/instructor-large-safetensors
1272
+ - d0rj/e5-large-en-ru
1273
+ - atian-chapters/Chapters-SFR-Embedding-Mistral
1274
+ - Intel/bge-base-en-v1.5-sts-int8-static
1275
+ - Intel/bge-base-en-v1.5-sts-int8-dynamic
1276
+ - jncraton/GIST-small-Embedding-v0-ct2-int8
1277
+ - jncraton/gte-tiny-ct2-int8
1278
+ - d0rj/e5-small-en-ru
1279
+ - vectoriseai/e5-small-v2
1280
+ - SmartComponents/bge-micro-v2
1281
+ - michaelfeil/ct2fast-gte-base
1282
+ - vectoriseai/e5-base-v2
1283
+ - Intel/bge-base-en-v1.5-sst2
1284
+ - McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised
1285
+ - Research2NLP/electrical_stella
1286
+ - weakit-v/bge-base-en-v1.5-onnx
1287
+ - GovCompete/instructor-xl
1288
+ - barisaydin/text2vec-base-multilingual
1289
+ - Intel/bge-small-en-v1.5-sst2-int8-dynamic
1290
+ - jncraton/gte-small-ct2-int8
1291
+ - d0rj/e5-base-en-ru
1292
+ - barisaydin/gte-large
1293
+ - fresha/e5-large-v2-endpoint
1294
+ - vectoriseai/instructor-large
1295
+ - Severian/embed
1296
+ - vectoriseai/e5-base
1297
+ - mlx-community/multilingual-e5-large-mlx
1298
+ - vectoriseai/gte-large
1299
+ - anttip/ct2fast-e5-small-v2-hfie
1300
+ - michaelfeil/ct2fast-gte-large
1301
+ - gizmo-ai/Cohere-embed-multilingual-v3.0
1302
+ - McGill-NLP/LLM2Vec-Llama-2-7b-chat-hf-mntp-unsup-simcse
1303
+ cross_encoders:
1304
+ - FollowIR-7B
1305
+ - flan-t5-base
1306
+ - flan-t5-large
1307
+ - monobert-large-msmarco
1308
+ - monot5-3b-msmarco-10k
1309
+ - monot5-base-msmarco-10k
1310
+ - llama-2-7b-chat
1311
+ - mistral-7b-instruct-v0.2
1312
+ - tart-full-flan-t5-xl