Muennighoff commited on
Commit
735975d
1 Parent(s): 27a30e1

Update conf

Browse files
Files changed (2) hide show
  1. config.yaml +31 -31
  2. model_meta.yaml +62 -9
config.yaml CHANGED
@@ -365,6 +365,37 @@ boards:
365
  - CDSC-R
366
  - SICK-R-PL
367
  - STS22 (pl)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
368
  se:
369
  title: Swedish
370
  language_long: Swedish
@@ -452,34 +483,3 @@ boards:
452
  - BrightRetrieval (aops)
453
  - BrightRetrieval (theoremqa_theorems)
454
  - BrightRetrieval (theoremqa_questions)
455
- ru:
456
- title: Russian
457
- language_long: "Russian"
458
- has_overall: true
459
- acronym: null
460
- icon: "🇷🇺"
461
- special_icons: null
462
- credits: "[Roman Solomatin](https://github.com/Samoed) and SaluteDevices: [Alena Fenogenova](https://github.com/Alenush), [Aleksandr Abramov](https://github.com/Ab1992ao), [Artem Snegirev](https://github.com/artemsnegirev), [Anna Maksimova](https://github.com/anpalmak2003), [Maria Tikhonova](https://github.com/MariyaTikhonova)"
463
- tasks:
464
- Classification:
465
- - GeoreviewClassification
466
- - HeadlineClassification
467
- - InappropriatenessClassification
468
- - KinopoiskClassification
469
- - RuReviewsClassification
470
- - RuSciBenchGRNTIClassification
471
- - RuSciBenchOECDClassification
472
- Clustering:
473
- - GeoreviewClusteringP2P
474
- - RuSciBenchGRNTIClusteringP2P
475
- - RuSciBenchOECDClusteringP2P
476
- PairClassification:
477
- - TERRa
478
- Reranking:
479
- - RuBQReranking
480
- Retrieval:
481
- - RiaNewsRetrieval
482
- - RuBQRetrieval
483
- STS:
484
- - RUParaPhraserSTS
485
- - RuSTSBenchmarkSTS
 
365
  - CDSC-R
366
  - SICK-R-PL
367
  - STS22 (pl)
368
+ ru:
369
+ title: Russian
370
+ language_long: "Russian"
371
+ has_overall: true
372
+ acronym: null
373
+ icon: "🇷🇺"
374
+ special_icons: null
375
+ credits: "[Roman Solomatin](https://github.com/Samoed) and SaluteDevices: [Alena Fenogenova](https://github.com/Alenush), [Aleksandr Abramov](https://github.com/Ab1992ao), [Artem Snegirev](https://github.com/artemsnegirev), [Anna Maksimova](https://github.com/anpalmak2003), [Maria Tikhonova](https://github.com/MariyaTikhonova)"
376
+ tasks:
377
+ Classification:
378
+ - GeoreviewClassification
379
+ - HeadlineClassification
380
+ - InappropriatenessClassification
381
+ - KinopoiskClassification
382
+ - RuReviewsClassification
383
+ - RuSciBenchGRNTIClassification
384
+ - RuSciBenchOECDClassification
385
+ Clustering:
386
+ - GeoreviewClusteringP2P
387
+ - RuSciBenchGRNTIClusteringP2P
388
+ - RuSciBenchOECDClusteringP2P
389
+ PairClassification:
390
+ - TERRa
391
+ Reranking:
392
+ - RuBQReranking
393
+ Retrieval:
394
+ - RiaNewsRetrieval
395
+ - RuBQRetrieval
396
+ STS:
397
+ - RUParaPhraserSTS
398
+ - RuSTSBenchmarkSTS
399
  se:
400
  title: Swedish
401
  language_long: Swedish
 
483
  - BrightRetrieval (aops)
484
  - BrightRetrieval (theoremqa_theorems)
485
  - BrightRetrieval (theoremqa_questions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model_meta.yaml CHANGED
@@ -212,6 +212,15 @@ model_meta:
212
  is_proprietary: false
213
  is_sentence_transformers_compatible: true
214
  uses_instruct: true
 
 
 
 
 
 
 
 
 
215
  all-MiniLM-L12-v2:
216
  link: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
217
  seq_len: 512
@@ -356,6 +365,24 @@ model_meta:
356
  is_proprietary: false
357
  is_sentence_transformers_compatible: true
358
  uses_instruct: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  bge-large-en-v1.5:
360
  link: https://huggingface.co/BAAI/bge-large-en-v1.5
361
  seq_len: 512
@@ -514,15 +541,6 @@ model_meta:
514
  is_proprietary: false
515
  is_sentence_transformers_compatible: true
516
  uses_instruct: false
517
- dfm-sentence-encoder-large-1:
518
- link: https://huggingface.co/chcaa/dfm-encoder-large-v1
519
- seq_len: 512
520
- size: 355
521
- dim: 1024
522
- is_external: false # no result in results repo
523
- is_proprietary: false
524
- is_sentence_transformers_compatible: true
525
- uses_instruct: false
526
  distilbert-base-25lang-cased:
527
  link: https://huggingface.co/Geotrend/distilbert-base-25lang-cased
528
  seq_len: 512
@@ -820,6 +838,15 @@ model_meta:
820
  is_proprietary: false
821
  is_sentence_transformers_compatible: true
822
  uses_instruct: true
 
 
 
 
 
 
 
 
 
823
  gte-Qwen2-7B-instruct:
824
  link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct
825
  seq_len: 32768
@@ -1243,6 +1270,24 @@ model_meta:
1243
  is_proprietary: false
1244
  is_sentence_transformers_compatible: true
1245
  uses_instruct: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1246
  sup-simcse-bert-base-uncased:
1247
  link: https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased
1248
  seq_len: 512
@@ -1874,6 +1919,14 @@ models_to_skip:
1874
  - liddlefish/privacy_embedding_bge_small_synthetic
1875
  - mxs980/gte-Qwen2-1.5B-instruct-Q8_0-GGUF
1876
  - leonn71/gte-Qwen2-1.5B-instruct-Q6_K-GGUF
 
 
 
 
 
 
 
 
1877
  cross_encoders:
1878
  - FollowIR-7B
1879
  - flan-t5-base
 
212
  is_proprietary: false
213
  is_sentence_transformers_compatible: true
214
  uses_instruct: true
215
+ SFR-Embedding-2_R:
216
+ link: https://huggingface.co/Salesforce/SFR-Embedding-2_R
217
+ seq_len: 32768
218
+ size: 7111
219
+ dim: 4096
220
+ is_external: false
221
+ is_proprietary: false
222
+ is_sentence_transformers_compatible: true
223
+ uses_instruct: true
224
  all-MiniLM-L12-v2:
225
  link: https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
226
  seq_len: 512
 
365
  is_proprietary: false
366
  is_sentence_transformers_compatible: true
367
  uses_instruct: false
368
+ bge-en-icl:
369
+ link: https://huggingface.co/BAAI/bge-en-icl
370
+ seq_len: 32768
371
+ size: 7110
372
+ dim: 4096
373
+ is_external: false
374
+ is_proprietary: false
375
+ is_sentence_transformers_compatible: true
376
+ uses_instruct: true
377
+ bge-multilingual-gemma2:
378
+ link: https://huggingface.co/BAAI/bge-multilingual-gemma2
379
+ seq_len: 8192
380
+ size: 9240
381
+ dim: 3584
382
+ is_external: false
383
+ is_proprietary: false
384
+ is_sentence_transformers_compatible: false
385
+ uses_instruct: true
386
  bge-large-en-v1.5:
387
  link: https://huggingface.co/BAAI/bge-large-en-v1.5
388
  seq_len: 512
 
541
  is_proprietary: false
542
  is_sentence_transformers_compatible: true
543
  uses_instruct: false
 
 
 
 
 
 
 
 
 
544
  distilbert-base-25lang-cased:
545
  link: https://huggingface.co/Geotrend/distilbert-base-25lang-cased
546
  seq_len: 512
 
838
  is_proprietary: false
839
  is_sentence_transformers_compatible: true
840
  uses_instruct: true
841
+ gte-Qwen2-1.5B-instruct:
842
+ link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct
843
+ seq_len: 32768
844
+ size: 1780
845
+ dim: 1536
846
+ is_external: false
847
+ is_proprietary: false
848
+ is_sentence_transformers_compatible: true
849
+ uses_instruct: true
850
  gte-Qwen2-7B-instruct:
851
  link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct
852
  seq_len: 32768
 
1270
  is_proprietary: false
1271
  is_sentence_transformers_compatible: true
1272
  uses_instruct: false
1273
+ stella_en_1.5B_v5:
1274
+ link: https://huggingface.co/dunzhang/stella_en_1.5B_v5
1275
+ seq_len: 512
1276
+ size: 1500
1277
+ dim: 8192
1278
+ is_external: false
1279
+ is_proprietary: false
1280
+ is_sentence_transformers_compatible: true
1281
+ uses_instruct: true
1282
+ stella_en_400M_v5:
1283
+ link: https://huggingface.co/dunzhang/stella_en_400M_v5
1284
+ seq_len: 512
1285
+ size: 400
1286
+ dim: 8192
1287
+ is_external: false
1288
+ is_proprietary: false
1289
+ is_sentence_transformers_compatible: true
1290
+ uses_instruct: true
1291
  sup-simcse-bert-base-uncased:
1292
  link: https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased
1293
  seq_len: 512
 
1919
  - liddlefish/privacy_embedding_bge_small_synthetic
1920
  - mxs980/gte-Qwen2-1.5B-instruct-Q8_0-GGUF
1921
  - leonn71/gte-Qwen2-1.5B-instruct-Q6_K-GGUF
1922
+ - niancheng/gte-Qwen2-7B-instruct-Q4_K_M-GGUF
1923
+ - cleatherbury/gte-Qwen2-7B-instruct-Q5_K_M-GGUF
1924
+ - niancheng/gte-Qwen2-1.5B-instruct-Q4_K_M-GGUF
1925
+ - mxs980/b1ade-embed-Q8_0-GGUF
1926
+ - chihlunLee/NoInstruct-small-Embedding-v0-Q4_0-GGUF
1927
+ - corto-ai/nomic-embed-text-v1
1928
+ - bcastle/snowflake-arctic-embed-l-Q8_0-GGUF
1929
+ - Intel/neural-embedding-v1
1930
  cross_encoders:
1931
  - FollowIR-7B
1932
  - flan-t5-base