library_name: sentence-transformers
pipeline_tag: sentence-similarity
tags:
- sentence-transformers
- feature-extraction
- sentence-similarity
- transformers
- sentence-embedding
- mteb
model-index:
- name: bilingual-document-embedding
results:
- task:
type: Clustering
dataset:
type: lyon-nlp/alloprof
name: MTEB AlloProfClusteringP2P
config: default
split: test
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b
metrics:
- type: v_measure
value: 59.15114729440782
- type: v_measures
value:
- 0.6088344883526443
- 0.5973965747446686
- 0.5447756077407359
- 0.5758855880313155
- 0.5557001687682872
- task:
type: Clustering
dataset:
type: lyon-nlp/alloprof
name: MTEB AlloProfClusteringS2S
config: default
split: test
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b
metrics:
- type: v_measure
value: 41.577823072264664
- type: v_measures
value:
- 0.38879769314999035
- 0.4006045672100778
- 0.3958716052074101
- 0.4405039656967712
- 0.47468521453925405
- task:
type: Reranking
dataset:
type: lyon-nlp/mteb-fr-reranking-alloprof-s2p
name: MTEB AlloprofReranking
config: default
split: test
revision: 65393d0d7a08a10b4e348135e824f385d420b0fd
metrics:
- type: map
value: 73.87182436982245
- type: mrr
value: 75.16911341393207
- type: nAUC_map_diff1
value: 57.405401360219784
- type: nAUC_map_max
value: 23.453753045677463
- type: nAUC_mrr_diff1
value: 56.36974368790562
- type: nAUC_mrr_max
value: 24.630226324027316
- task:
type: Retrieval
dataset:
type: lyon-nlp/alloprof
name: MTEB AlloprofRetrieval
config: default
split: test
revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd
metrics:
- type: map_at_1
value: 32.513
- type: map_at_10
value: 43.175999999999995
- type: map_at_100
value: 44.062
- type: map_at_1000
value: 44.115
- type: map_at_20
value: 43.702999999999996
- type: map_at_3
value: 40.205999999999996
- type: map_at_5
value: 41.978
- type: mrr_at_1
value: 32.512953367875646
- type: mrr_at_10
value: 43.175871096855616
- type: mrr_at_100
value: 44.06232913339137
- type: mrr_at_1000
value: 44.11462846644048
- type: mrr_at_20
value: 43.70270697751798
- type: mrr_at_3
value: 40.20581462291314
- type: mrr_at_5
value: 41.978267127230986
- type: nauc_map_at_1000_diff1
value: 41.780712093626434
- type: nauc_map_at_1000_max
value: 37.56496014685421
- type: nauc_map_at_100_diff1
value: 41.783476025622775
- type: nauc_map_at_100_max
value: 37.60097300537378
- type: nauc_map_at_10_diff1
value: 41.61653902439649
- type: nauc_map_at_10_max
value: 37.33970786907192
- type: nauc_map_at_1_diff1
value: 44.90184716266261
- type: nauc_map_at_1_max
value: 34.452525158255284
- type: nauc_map_at_20_diff1
value: 41.6589893917753
- type: nauc_map_at_20_max
value: 37.58641485307153
- type: nauc_map_at_3_diff1
value: 42.104788108051075
- type: nauc_map_at_3_max
value: 36.5928644326236
- type: nauc_map_at_5_diff1
value: 41.607739702876565
- type: nauc_map_at_5_max
value: 36.907229583593825
- type: nauc_mrr_at_1000_diff1
value: 41.780712093626434
- type: nauc_mrr_at_1000_max
value: 37.56496014685421
- type: nauc_mrr_at_100_diff1
value: 41.783476025622775
- type: nauc_mrr_at_100_max
value: 37.60097300537378
- type: nauc_mrr_at_10_diff1
value: 41.61653902439649
- type: nauc_mrr_at_10_max
value: 37.33970786907192
- type: nauc_mrr_at_1_diff1
value: 44.90184716266261
- type: nauc_mrr_at_1_max
value: 34.452525158255284
- type: nauc_mrr_at_20_diff1
value: 41.6589893917753
- type: nauc_mrr_at_20_max
value: 37.58641485307153
- type: nauc_mrr_at_3_diff1
value: 42.104788108051075
- type: nauc_mrr_at_3_max
value: 36.5928644326236
- type: nauc_mrr_at_5_diff1
value: 41.607739702876565
- type: nauc_mrr_at_5_max
value: 36.907229583593825
- type: nauc_ndcg_at_1000_diff1
value: 41.28546857310532
- type: nauc_ndcg_at_1000_max
value: 39.086823074137
- type: nauc_ndcg_at_100_diff1
value: 41.25161168648205
- type: nauc_ndcg_at_100_max
value: 40.22844726831379
- type: nauc_ndcg_at_10_diff1
value: 40.33705639032033
- type: nauc_ndcg_at_10_max
value: 39.1320635099517
- type: nauc_ndcg_at_1_diff1
value: 44.90184716266261
- type: nauc_ndcg_at_1_max
value: 34.452525158255284
- type: nauc_ndcg_at_20_diff1
value: 40.40784583920326
- type: nauc_ndcg_at_20_max
value: 40.069552678695416
- type: nauc_ndcg_at_3_diff1
value: 41.30895891523514
- type: nauc_ndcg_at_3_max
value: 37.414699073823584
- type: nauc_ndcg_at_5_diff1
value: 40.36028401033484
- type: nauc_ndcg_at_5_max
value: 37.97523651073113
- type: nauc_precision_at_1000_diff1
value: 46.82456733521383
- type: nauc_precision_at_1000_max
value: 85.71400945217201
- type: nauc_precision_at_100_diff1
value: 40.52716981002009
- type: nauc_precision_at_100_max
value: 65.51987173508483
- type: nauc_precision_at_10_diff1
value: 35.67963463267156
- type: nauc_precision_at_10_max
value: 46.155216936968856
- type: nauc_precision_at_1_diff1
value: 44.90184716266261
- type: nauc_precision_at_1_max
value: 34.452525158255284
- type: nauc_precision_at_20_diff1
value: 34.94608063839023
- type: nauc_precision_at_20_max
value: 52.447339810747174
- type: nauc_precision_at_3_diff1
value: 39.0332348086419
- type: nauc_precision_at_3_max
value: 39.83919369547502
- type: nauc_precision_at_5_diff1
value: 36.38511756252038
- type: nauc_precision_at_5_max
value: 41.375729851686486
- type: nauc_recall_at_1000_diff1
value: 46.824567335213885
- type: nauc_recall_at_1000_max
value: 85.71400945217061
- type: nauc_recall_at_100_diff1
value: 40.52716981002009
- type: nauc_recall_at_100_max
value: 65.51987173508483
- type: nauc_recall_at_10_diff1
value: 35.67963463267154
- type: nauc_recall_at_10_max
value: 46.15521693696879
- type: nauc_recall_at_1_diff1
value: 44.90184716266261
- type: nauc_recall_at_1_max
value: 34.452525158255284
- type: nauc_recall_at_20_diff1
value: 34.94608063839018
- type: nauc_recall_at_20_max
value: 52.44733981074723
- type: nauc_recall_at_3_diff1
value: 39.033234808641886
- type: nauc_recall_at_3_max
value: 39.83919369547505
- type: nauc_recall_at_5_diff1
value: 36.38511756252039
- type: nauc_recall_at_5_max
value: 41.37572985168646
- type: ndcg_at_1
value: 32.513
- type: ndcg_at_10
value: 48.796
- type: ndcg_at_100
value: 53.273
- type: ndcg_at_1000
value: 54.686
- type: ndcg_at_20
value: 50.702000000000005
- type: ndcg_at_3
value: 42.721
- type: ndcg_at_5
value: 45.9
- type: precision_at_1
value: 32.513
- type: precision_at_10
value: 6.662
- type: precision_at_100
value: 0.88
- type: precision_at_1000
value: 0.099
- type: precision_at_20
value: 3.707
- type: precision_at_3
value: 16.667
- type: precision_at_5
value: 11.537
- type: recall_at_1
value: 32.513
- type: recall_at_10
value: 66.623
- type: recall_at_100
value: 87.953
- type: recall_at_1000
value: 99.136
- type: recall_at_20
value: 74.136
- type: recall_at_3
value: 50
- type: recall_at_5
value: 57.68600000000001
- task:
type: Classification
dataset:
type: mteb/amazon_reviews_multi
name: MTEB AmazonReviewsClassification (fr)
config: fr
split: test
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
metrics:
- type: accuracy
value: 43.48599999999999
- type: f1
value: 41.52411498679777
- type: f1_weighted
value: 41.524114986797784
- task:
type: Retrieval
dataset:
type: maastrichtlawtech/bsard
name: MTEB BSARDRetrieval
config: default
split: test
revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59
metrics:
- type: map_at_1
value: 7.6579999999999995
- type: map_at_10
value: 11.706
- type: map_at_100
value: 12.948
- type: map_at_1000
value: 13.062000000000001
- type: map_at_20
value: 12.342
- type: map_at_3
value: 9.76
- type: map_at_5
value: 10.683
- type: mrr_at_1
value: 7.657657657657657
- type: mrr_at_10
value: 11.706170456170454
- type: mrr_at_100
value: 12.947870943304876
- type: mrr_at_1000
value: 13.06189894235417
- type: mrr_at_20
value: 12.342441460088518
- type: mrr_at_3
value: 9.75975975975976
- type: mrr_at_5
value: 10.683183183183182
- type: nauc_map_at_1000_diff1
value: 1.8612167614294672
- type: nauc_map_at_1000_max
value: 1.586408397411475
- type: nauc_map_at_100_diff1
value: 1.9210527499420051
- type: nauc_map_at_100_max
value: 1.6875048679362115
- type: nauc_map_at_10_diff1
value: 1.5262203122571876
- type: nauc_map_at_10_max
value: 1.2218645063194111
- type: nauc_map_at_1_diff1
value: -2.9465131557421675
- type: nauc_map_at_1_max
value: -1.8953694303529376
- type: nauc_map_at_20_diff1
value: 1.8980212905989484
- type: nauc_map_at_20_max
value: 1.1460175145386533
- type: nauc_map_at_3_diff1
value: 1.8300766192776097
- type: nauc_map_at_3_max
value: -1.8234185626649715
- type: nauc_map_at_5_diff1
value: 1.9632596967629419
- type: nauc_map_at_5_max
value: -1.0055447005584437
- type: nauc_mrr_at_1000_diff1
value: 1.8612167614294672
- type: nauc_mrr_at_1000_max
value: 1.586408397411475
- type: nauc_mrr_at_100_diff1
value: 1.9210527499420051
- type: nauc_mrr_at_100_max
value: 1.6875048679362115
- type: nauc_mrr_at_10_diff1
value: 1.5262203122571876
- type: nauc_mrr_at_10_max
value: 1.2218645063194111
- type: nauc_mrr_at_1_diff1
value: -2.9465131557421675
- type: nauc_mrr_at_1_max
value: -1.8953694303529376
- type: nauc_mrr_at_20_diff1
value: 1.8980212905989484
- type: nauc_mrr_at_20_max
value: 1.1460175145386533
- type: nauc_mrr_at_3_diff1
value: 1.8300766192776097
- type: nauc_mrr_at_3_max
value: -1.8234185626649715
- type: nauc_mrr_at_5_diff1
value: 1.9632596967629419
- type: nauc_mrr_at_5_max
value: -1.0055447005584437
- type: nauc_ndcg_at_1000_diff1
value: 2.886203742022491
- type: nauc_ndcg_at_1000_max
value: 5.34835634942694
- type: nauc_ndcg_at_100_diff1
value: 3.5183102844141443
- type: nauc_ndcg_at_100_max
value: 7.479214430443089
- type: nauc_ndcg_at_10_diff1
value: 1.8765330789166412
- type: nauc_ndcg_at_10_max
value: 4.450826426093314
- type: nauc_ndcg_at_1_diff1
value: -2.9465131557421675
- type: nauc_ndcg_at_1_max
value: -1.8953694303529376
- type: nauc_ndcg_at_20_diff1
value: 2.7436758637624705
- type: nauc_ndcg_at_20_max
value: 3.713084041742973
- type: nauc_ndcg_at_3_diff1
value: 2.84641225972613
- type: nauc_ndcg_at_3_max
value: -1.6797424196225121
- type: nauc_ndcg_at_5_diff1
value: 3.0652301308463192
- type: nauc_ndcg_at_5_max
value: -0.2333717294229873
- type: nauc_precision_at_1000_diff1
value: 6.795054101803062
- type: nauc_precision_at_1000_max
value: 17.30874103781348
- type: nauc_precision_at_100_diff1
value: 7.527516539082746
- type: nauc_precision_at_100_max
value: 22.74844672263555
- type: nauc_precision_at_10_diff1
value: 2.0761049260177407
- type: nauc_precision_at_10_max
value: 11.360023168126489
- type: nauc_precision_at_1_diff1
value: -2.9465131557421675
- type: nauc_precision_at_1_max
value: -1.8953694303529376
- type: nauc_precision_at_20_diff1
value: 3.923410121079766
- type: nauc_precision_at_20_max
value: 8.296820719888059
- type: nauc_precision_at_3_diff1
value: 5.1715442640514215
- type: nauc_precision_at_3_max
value: -1.3116999013605417
- type: nauc_precision_at_5_diff1
value: 5.331309068134777
- type: nauc_precision_at_5_max
value: 1.5638174487988539
- type: nauc_recall_at_1000_diff1
value: 6.795054101803136
- type: nauc_recall_at_1000_max
value: 17.308741037813558
- type: nauc_recall_at_100_diff1
value: 7.527516539082746
- type: nauc_recall_at_100_max
value: 22.748446722635553
- type: nauc_recall_at_10_diff1
value: 2.076104926017711
- type: nauc_recall_at_10_max
value: 11.360023168126451
- type: nauc_recall_at_1_diff1
value: -2.9465131557421675
- type: nauc_recall_at_1_max
value: -1.8953694303529376
- type: nauc_recall_at_20_diff1
value: 3.9234101210797143
- type: nauc_recall_at_20_max
value: 8.296820719888002
- type: nauc_recall_at_3_diff1
value: 5.17154426405143
- type: nauc_recall_at_3_max
value: -1.311699901360526
- type: nauc_recall_at_5_diff1
value: 5.331309068134779
- type: nauc_recall_at_5_max
value: 1.5638174487988667
- type: ndcg_at_1
value: 7.6579999999999995
- type: ndcg_at_10
value: 14.633
- type: ndcg_at_100
value: 21.199
- type: ndcg_at_1000
value: 24.505
- type: ndcg_at_20
value: 16.849
- type: ndcg_at_3
value: 10.488999999999999
- type: ndcg_at_5
value: 12.156
- type: precision_at_1
value: 7.6579999999999995
- type: precision_at_10
value: 2.432
- type: precision_at_100
value: 0.563
- type: precision_at_1000
value: 0.083
- type: precision_at_20
value: 1.644
- type: precision_at_3
value: 4.204
- type: precision_at_5
value: 3.3329999999999997
- type: recall_at_1
value: 7.6579999999999995
- type: recall_at_10
value: 24.324
- type: recall_at_100
value: 56.306
- type: recall_at_1000
value: 82.883
- type: recall_at_20
value: 32.883
- type: recall_at_3
value: 12.613
- type: recall_at_5
value: 16.667
- task:
type: Clustering
dataset:
type: lyon-nlp/clustering-hal-s2s
name: MTEB HALClusteringS2S
config: default
split: test
revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915
metrics:
- type: v_measure
value: 24.87943546753088
- type: v_measures
value:
- 0.278272502518604
- 0.25921772339921395
- 0.2641171251066139
- 0.2663752999094091
- 0.23649418885985485
- task:
type: Clustering
dataset:
type: reciTAL/mlsum
name: MTEB MLSUMClusteringP2P
config: fr
split: test
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7
metrics:
- type: v_measure
value: 44.175558680182796
- type: v_measures
value:
- 0.44382305997614757
- 0.45849638769110745
- 0.45186964282579195
- 0.44407241104469836
- 0.39488181195438643
- task:
type: Clustering
dataset:
type: reciTAL/mlsum
name: MTEB MLSUMClusteringS2S
config: fr
split: test
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7
metrics:
- type: v_measure
value: 44.649498161719784
- type: v_measures
value:
- 0.44293968862639355
- 0.45358259404927
- 0.4544509656034716
- 0.4464804623311193
- 0.400104460457011
- task:
type: Classification
dataset:
type: mteb/mtop_domain
name: MTEB MTOPDomainClassification (fr)
config: fr
split: test
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
metrics:
- type: accuracy
value: 88.46226119636705
- type: f1
value: 88.37561423387648
- type: f1_weighted
value: 88.38817570958008
- task:
type: Classification
dataset:
type: mteb/mtop_intent
name: MTEB MTOPIntentClassification (fr)
config: fr
split: test
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
metrics:
- type: accuracy
value: 60.27247103037895
- type: f1
value: 43.36800798113768
- type: f1_weighted
value: 62.65127593999621
- task:
type: Classification
dataset:
type: mteb/masakhanews
name: MTEB MasakhaNEWSClassification (fra)
config: fra
split: test
revision: 18193f187b92da67168c655c9973a165ed9593dd
metrics:
- type: accuracy
value: 78.0094786729858
- type: f1
value: 74.34441973526405
- type: f1_weighted
value: 78.20439089386724
- task:
type: Clustering
dataset:
type: masakhane/masakhanews
name: MTEB MasakhaNEWSClusteringP2P (fra)
config: fra
split: test
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60
metrics:
- type: v_measure
value: 69.98688791220088
- type: v_measures
value:
- 1
- 0.11908920120641955
- 0.7679216739314454
- 0.8367645040119921
- 0.7755690164601873
- task:
type: Clustering
dataset:
type: masakhane/masakhanews
name: MTEB MasakhaNEWSClusteringS2S (fra)
config: fra
split: test
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60
metrics:
- type: v_measure
value: 36.65801636831311
- type: v_measures
value:
- 1
- 0.017508140483218165
- 0.4153261241535689
- 0.21327667744326673
- 0.18678987633560207
- task:
type: Classification
dataset:
type: mteb/amazon_massive_intent
name: MTEB MassiveIntentClassification (fr)
config: fr
split: test
revision: 4672e20407010da34463acc759c162ca9734bca6
metrics:
- type: accuracy
value: 67.39408204438466
- type: f1
value: 65.4548720535735
- type: f1_weighted
value: 66.30814406163043
- task:
type: Classification
dataset:
type: mteb/amazon_massive_scenario
name: MTEB MassiveScenarioClassification (fr)
config: fr
split: test
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
metrics:
- type: accuracy
value: 74.12239408204438
- type: f1
value: 73.59473076543576
- type: f1_weighted
value: 73.70038947164628
- task:
type: Retrieval
dataset:
type: jinaai/mintakaqa
name: MTEB MintakaRetrieval (fr)
config: fr
split: test
revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e
metrics:
- type: map_at_1
value: 13.062999999999999
- type: map_at_10
value: 20.28
- type: map_at_100
value: 21.271
- type: map_at_1000
value: 21.384
- type: map_at_20
value: 20.822
- type: map_at_3
value: 18.195
- type: map_at_5
value: 19.293
- type: mrr_at_1
value: 13.063063063063062
- type: mrr_at_10
value: 20.280426530426514
- type: mrr_at_100
value: 21.27068193401496
- type: mrr_at_1000
value: 21.38419408143633
- type: mrr_at_20
value: 20.821814700930886
- type: mrr_at_3
value: 18.19546819546817
- type: mrr_at_5
value: 19.292929292929255
- type: nauc_map_at_1000_diff1
value: 19.722064275802
- type: nauc_map_at_1000_max
value: 31.384556319729395
- type: nauc_map_at_100_diff1
value: 19.71033532515874
- type: nauc_map_at_100_max
value: 31.402828787179143
- type: nauc_map_at_10_diff1
value: 20.001133718713536
- type: nauc_map_at_10_max
value: 31.806014785924102
- type: nauc_map_at_1_diff1
value: 29.69565140559411
- type: nauc_map_at_1_max
value: 30.683216020589533
- type: nauc_map_at_20_diff1
value: 19.821797788715696
- type: nauc_map_at_20_max
value: 31.59711268659909
- type: nauc_map_at_3_diff1
value: 21.615605640070964
- type: nauc_map_at_3_max
value: 31.966650937266305
- type: nauc_map_at_5_diff1
value: 20.505641463837247
- type: nauc_map_at_5_max
value: 31.950326449610333
- type: nauc_mrr_at_1000_diff1
value: 19.722064275802
- type: nauc_mrr_at_1000_max
value: 31.384556319729395
- type: nauc_mrr_at_100_diff1
value: 19.71033532515874
- type: nauc_mrr_at_100_max
value: 31.402828787179143
- type: nauc_mrr_at_10_diff1
value: 20.001133718713536
- type: nauc_mrr_at_10_max
value: 31.806014785924102
- type: nauc_mrr_at_1_diff1
value: 29.69565140559411
- type: nauc_mrr_at_1_max
value: 30.683216020589533
- type: nauc_mrr_at_20_diff1
value: 19.821797788715696
- type: nauc_mrr_at_20_max
value: 31.59711268659909
- type: nauc_mrr_at_3_diff1
value: 21.615605640070964
- type: nauc_mrr_at_3_max
value: 31.966650937266305
- type: nauc_mrr_at_5_diff1
value: 20.505641463837247
- type: nauc_mrr_at_5_max
value: 31.950326449610333
- type: nauc_ndcg_at_1000_diff1
value: 16.644876374984612
- type: nauc_ndcg_at_1000_max
value: 30.00552722677877
- type: nauc_ndcg_at_100_diff1
value: 15.707910701262051
- type: nauc_ndcg_at_100_max
value: 29.581303411340663
- type: nauc_ndcg_at_10_diff1
value: 16.76054369006531
- type: nauc_ndcg_at_10_max
value: 31.603443500691675
- type: nauc_ndcg_at_1_diff1
value: 29.69565140559411
- type: nauc_ndcg_at_1_max
value: 30.683216020589533
- type: nauc_ndcg_at_20_diff1
value: 16.269251917194648
- type: nauc_ndcg_at_20_max
value: 30.935281233489686
- type: nauc_ndcg_at_3_diff1
value: 19.672433215100494
- type: nauc_ndcg_at_3_max
value: 32.07848616783397
- type: nauc_ndcg_at_5_diff1
value: 17.88855855774221
- type: nauc_ndcg_at_5_max
value: 32.01468420337384
- type: nauc_precision_at_1000_diff1
value: -2.7987428835555157
- type: nauc_precision_at_1000_max
value: 13.9766188144417
- type: nauc_precision_at_100_diff1
value: 3.9597929189458183
- type: nauc_precision_at_100_max
value: 21.581900275188854
- type: nauc_precision_at_10_diff1
value: 9.174898767869335
- type: nauc_precision_at_10_max
value: 30.88927862766609
- type: nauc_precision_at_1_diff1
value: 29.69565140559411
- type: nauc_precision_at_1_max
value: 30.683216020589533
- type: nauc_precision_at_20_diff1
value: 7.774469550439256
- type: nauc_precision_at_20_max
value: 28.801273985757952
- type: nauc_precision_at_3_diff1
value: 15.058108507548344
- type: nauc_precision_at_3_max
value: 32.28970787769507
- type: nauc_precision_at_5_diff1
value: 11.81883670502361
- type: nauc_precision_at_5_max
value: 32.08267698057494
- type: nauc_recall_at_1000_diff1
value: -2.7987428835554455
- type: nauc_recall_at_1000_max
value: 13.976618814441693
- type: nauc_recall_at_100_diff1
value: 3.9597929189458183
- type: nauc_recall_at_100_max
value: 21.581900275188858
- type: nauc_recall_at_10_diff1
value: 9.17489876786934
- type: nauc_recall_at_10_max
value: 30.889278627666112
- type: nauc_recall_at_1_diff1
value: 29.69565140559411
- type: nauc_recall_at_1_max
value: 30.683216020589533
- type: nauc_recall_at_20_diff1
value: 7.774469550439271
- type: nauc_recall_at_20_max
value: 28.80127398575797
- type: nauc_recall_at_3_diff1
value: 15.058108507548361
- type: nauc_recall_at_3_max
value: 32.2897078776951
- type: nauc_recall_at_5_diff1
value: 11.818836705023593
- type: nauc_recall_at_5_max
value: 32.08267698057489
- type: ndcg_at_1
value: 13.062999999999999
- type: ndcg_at_10
value: 24.166
- type: ndcg_at_100
value: 29.48
- type: ndcg_at_1000
value: 33.236
- type: ndcg_at_20
value: 26.168999999999997
- type: ndcg_at_3
value: 19.796
- type: ndcg_at_5
value: 21.762999999999998
- type: precision_at_1
value: 13.062999999999999
- type: precision_at_10
value: 3.6609999999999996
- type: precision_at_100
value: 0.626
- type: precision_at_1000
value: 0.094
- type: precision_at_20
value: 2.23
- type: precision_at_3
value: 8.135
- type: precision_at_5
value: 5.831
- type: recall_at_1
value: 13.062999999999999
- type: recall_at_10
value: 36.609
- type: recall_at_100
value: 62.572
- type: recall_at_1000
value: 93.735
- type: recall_at_20
value: 44.595
- type: recall_at_3
value: 24.406
- type: recall_at_5
value: 29.156
- task:
type: PairClassification
dataset:
type: GEM/opusparcus
name: MTEB OpusparcusPC (fr)
config: fr
split: test
revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a
metrics:
- type: cos_sim_accuracy
value: 81.94822888283377
- type: cos_sim_accuracy_threshold
value: 59.67133641242981
- type: cos_sim_ap
value: 93.77568000367297
- type: cos_sim_f1
value: 87.33944954128441
- type: cos_sim_f1_threshold
value: 48.620444536209106
- type: cos_sim_precision
value: 81.15942028985508
- type: cos_sim_recall
value: 94.5382323733863
- type: dot_accuracy
value: 81.94822888283377
- type: dot_accuracy_threshold
value: 59.67133045196533
- type: dot_ap
value: 93.77568000367297
- type: dot_f1
value: 87.33944954128441
- type: dot_f1_threshold
value: 48.620444536209106
- type: dot_precision
value: 81.15942028985508
- type: dot_recall
value: 94.5382323733863
- type: euclidean_accuracy
value: 81.94822888283377
- type: euclidean_accuracy_threshold
value: 89.80941772460938
- type: euclidean_ap
value: 93.77568000367297
- type: euclidean_f1
value: 87.33944954128441
- type: euclidean_f1_threshold
value: 101.37012004852295
- type: euclidean_precision
value: 81.15942028985508
- type: euclidean_recall
value: 94.5382323733863
- type: manhattan_accuracy
value: 81.94822888283377
- type: manhattan_accuracy_threshold
value: 2278.3992767333984
- type: manhattan_ap
value: 93.736221809257
- type: manhattan_f1
value: 87.24319159101768
- type: manhattan_f1_threshold
value: 2442.0352935791016
- type: manhattan_precision
value: 84.06998158379373
- type: manhattan_recall
value: 90.66534260178749
- type: max_accuracy
value: 81.94822888283377
- type: max_ap
value: 93.77568000367297
- type: max_f1
value: 87.33944954128441
- task:
type: PairClassification
dataset:
type: google-research-datasets/paws-x
name: MTEB PawsX (fr)
config: fr
split: test
revision: 8a04d940a42cd40658986fdd8e3da561533a3646
metrics:
- type: cos_sim_accuracy
value: 63
- type: cos_sim_ap
value: 62.8421811357794
- type: cos_sim_f1
value: 62.491349480968864
- type: cos_sim_precision
value: 45.44539506794162
- type: cos_sim_recall
value: 100
- type: dot_accuracy
value: 63
- type: dot_ap
value: 62.83128860568098
- type: dot_f1
value: 62.491349480968864
- type: dot_precision
value: 45.44539506794162
- type: dot_recall
value: 100
- type: euclidean_accuracy
value: 63
- type: euclidean_ap
value: 62.842229411681984
- type: euclidean_f1
value: 62.491349480968864
- type: euclidean_precision
value: 45.44539506794162
- type: euclidean_recall
value: 100
- type: manhattan_accuracy
value: 63
- type: manhattan_ap
value: 62.83631065292994
- type: manhattan_f1
value: 62.491349480968864
- type: manhattan_precision
value: 45.44539506794162
- type: manhattan_recall
value: 100
- type: max_accuracy
value: 63
- type: max_ap
value: 62.842229411681984
- type: max_f1
value: 62.491349480968864
- task:
type: STS
dataset:
type: Lajavaness/SICK-fr
name: MTEB SICKFr
config: default
split: test
revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a
metrics:
- type: cos_sim_pearson
value: 84.65226798174751
- type: cos_sim_spearman
value: 78.46069171893217
- type: euclidean_pearson
value: 82.24338215489338
- type: euclidean_spearman
value: 78.46069230414263
- type: manhattan_pearson
value: 82.19430457441406
- type: manhattan_spearman
value: 78.39600534130474
- task:
type: STS
dataset:
type: mteb/sts22-crosslingual-sts
name: MTEB STS22 (fr)
config: fr
split: test
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
metrics:
- type: cos_sim_pearson
value: 84.34356343286501
- type: cos_sim_spearman
value: 83.82441862674773
- type: euclidean_pearson
value: 83.36025657327927
- type: euclidean_spearman
value: 83.82441862674773
- type: manhattan_pearson
value: 83.28632889698486
- type: manhattan_spearman
value: 83.72086058674401
- task:
type: STS
dataset:
type: mteb/stsb_multi_mt
name: MTEB STSBenchmarkMultilingualSTS (fr)
config: fr
split: test
revision: 29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c
metrics:
- type: cos_sim_pearson
value: 85.61138706775607
- type: cos_sim_spearman
value: 86.79352172029321
- type: euclidean_pearson
value: 85.83986489860736
- type: euclidean_spearman
value: 86.79352162100042
- type: manhattan_pearson
value: 85.7236873261734
- type: manhattan_spearman
value: 86.66968689546516
- task:
type: Summarization
dataset:
type: lyon-nlp/summarization-summeval-fr-p2p
name: MTEB SummEvalFr
config: default
split: test
revision: b385812de6a9577b6f4d0f88c6a6e35395a94054
metrics:
- type: cos_sim_pearson
value: 30.185028827409205
- type: cos_sim_spearman
value: 30.13786083775273
- type: dot_pearson
value: 30.18503030285526
- type: dot_spearman
value: 30.13786083775273
- task:
type: Reranking
dataset:
type: lyon-nlp/mteb-fr-reranking-syntec-s2p
name: MTEB SyntecReranking
config: default
split: test
revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad
metrics:
- type: map
value: 88.89444444444443
- type: mrr
value: 88.89444444444443
- type: nAUC_map_diff1
value: 65.57681789015096
- type: nAUC_map_max
value: 11.774011617096468
- type: nAUC_mrr_diff1
value: 65.57681789015096
- type: nAUC_mrr_max
value: 11.774011617096468
- task:
type: Retrieval
dataset:
type: lyon-nlp/mteb-fr-retrieval-syntec-s2p
name: MTEB SyntecRetrieval
config: default
split: test
revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9
metrics:
- type: map_at_1
value: 71
- type: map_at_10
value: 81.3
- type: map_at_100
value: 81.407
- type: map_at_1000
value: 81.407
- type: map_at_20
value: 81.353
- type: map_at_3
value: 80.333
- type: map_at_5
value: 81.033
- type: mrr_at_1
value: 71
- type: mrr_at_10
value: 81.29999999999998
- type: mrr_at_100
value: 81.40672514619881
- type: mrr_at_1000
value: 81.40672514619881
- type: mrr_at_20
value: 81.35263157894735
- type: mrr_at_3
value: 80.33333333333333
- type: mrr_at_5
value: 81.03333333333333
- type: nauc_map_at_1000_diff1
value: 65.56551939236816
- type: nauc_map_at_1000_max
value: 21.060745704748204
- type: nauc_map_at_100_diff1
value: 65.56551939236816
- type: nauc_map_at_100_max
value: 21.060745704748204
- type: nauc_map_at_10_diff1
value: 65.55980069698242
- type: nauc_map_at_10_max
value: 21.3190442929788
- type: nauc_map_at_1_diff1
value: 67.21642606971449
- type: nauc_map_at_1_max
value: 19.793191631302918
- type: nauc_map_at_20_diff1
value: 65.538721219245
- type: nauc_map_at_20_max
value: 21.070102756046573
- type: nauc_map_at_3_diff1
value: 66.48655081074173
- type: nauc_map_at_3_max
value: 22.33715748971969
- type: nauc_map_at_5_diff1
value: 65.4498092196869
- type: nauc_map_at_5_max
value: 22.041207079018868
- type: nauc_mrr_at_1000_diff1
value: 65.56551939236816
- type: nauc_mrr_at_1000_max
value: 21.060745704748204
- type: nauc_mrr_at_100_diff1
value: 65.56551939236816
- type: nauc_mrr_at_100_max
value: 21.060745704748204
- type: nauc_mrr_at_10_diff1
value: 65.55980069698242
- type: nauc_mrr_at_10_max
value: 21.3190442929788
- type: nauc_mrr_at_1_diff1
value: 67.21642606971449
- type: nauc_mrr_at_1_max
value: 19.793191631302918
- type: nauc_mrr_at_20_diff1
value: 65.538721219245
- type: nauc_mrr_at_20_max
value: 21.070102756046573
- type: nauc_mrr_at_3_diff1
value: 66.48655081074173
- type: nauc_mrr_at_3_max
value: 22.33715748971969
- type: nauc_mrr_at_5_diff1
value: 65.4498092196869
- type: nauc_mrr_at_5_max
value: 22.041207079018868
- type: nauc_ndcg_at_1000_diff1
value: 65.37799652661094
- type: nauc_ndcg_at_1000_max
value: 21.618775539952175
- type: nauc_ndcg_at_100_diff1
value: 65.37799652661094
- type: nauc_ndcg_at_100_max
value: 21.618775539952175
- type: nauc_ndcg_at_10_diff1
value: 65.24121131711044
- type: nauc_ndcg_at_10_max
value: 23.00629044068508
- type: nauc_ndcg_at_1_diff1
value: 67.21642606971449
- type: nauc_ndcg_at_1_max
value: 19.793191631302918
- type: nauc_ndcg_at_20_diff1
value: 65.11745065699384
- type: nauc_ndcg_at_20_max
value: 21.64133163322825
- type: nauc_ndcg_at_3_diff1
value: 66.99908176973135
- type: nauc_ndcg_at_3_max
value: 25.59125363095015
- type: nauc_ndcg_at_5_diff1
value: 64.80888193232458
- type: nauc_ndcg_at_5_max
value: 25.161787586855322
- type: nauc_precision_at_1000_diff1
value: nan
- type: nauc_precision_at_1000_max
value: nan
- type: nauc_precision_at_100_diff1
value: nan
- type: nauc_precision_at_100_max
value: nan
- type: nauc_precision_at_10_diff1
value: 61.50015561780299
- type: nauc_precision_at_10_max
value: 47.88359788359829
- type: nauc_precision_at_1_diff1
value: 67.21642606971449
- type: nauc_precision_at_1_max
value: 19.793191631302918
- type: nauc_precision_at_20_diff1
value: 56.13912231559286
- type: nauc_precision_at_20_max
value: 21.82539682539744
- type: nauc_precision_at_3_diff1
value: 70.79831932773126
- type: nauc_precision_at_3_max
value: 47.46148459383747
- type: nauc_precision_at_5_diff1
value: 58.50606909430468
- type: nauc_precision_at_5_max
value: 57.19887955182096
- type: nauc_recall_at_1000_diff1
value: nan
- type: nauc_recall_at_1000_max
value: nan
- type: nauc_recall_at_100_diff1
value: nan
- type: nauc_recall_at_100_max
value: nan
- type: nauc_recall_at_10_diff1
value: 61.500155617802555
- type: nauc_recall_at_10_max
value: 47.88359788359823
- type: nauc_recall_at_1_diff1
value: 67.21642606971449
- type: nauc_recall_at_1_max
value: 19.793191631302918
- type: nauc_recall_at_20_diff1
value: 56.13912231559305
- type: nauc_recall_at_20_max
value: 21.825396825396858
- type: nauc_recall_at_3_diff1
value: 70.79831932773116
- type: nauc_recall_at_3_max
value: 47.461484593837426
- type: nauc_recall_at_5_diff1
value: 58.506069094304394
- type: nauc_recall_at_5_max
value: 57.19887955182054
- type: ndcg_at_1
value: 71
- type: ndcg_at_10
value: 85.226
- type: ndcg_at_100
value: 85.839
- type: ndcg_at_1000
value: 85.839
- type: ndcg_at_20
value: 85.458
- type: ndcg_at_3
value: 83.333
- type: ndcg_at_5
value: 84.58099999999999
- type: precision_at_1
value: 71
- type: precision_at_10
value: 9.700000000000001
- type: precision_at_100
value: 1
- type: precision_at_1000
value: 0.1
- type: precision_at_20
value: 4.9
- type: precision_at_3
value: 30.667
- type: precision_at_5
value: 19
- type: recall_at_1
value: 71
- type: recall_at_10
value: 97
- type: recall_at_100
value: 100
- type: recall_at_1000
value: 100
- type: recall_at_20
value: 98
- type: recall_at_3
value: 92
- type: recall_at_5
value: 95
- task:
type: Retrieval
dataset:
type: jinaai/xpqa
name: MTEB XPQARetrieval (fr)
config: fr
split: test
revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f
metrics:
- type: map_at_1
value: 40.668
- type: map_at_10
value: 63.29900000000001
- type: map_at_100
value: 64.628
- type: map_at_1000
value: 64.683
- type: map_at_20
value: 64.156
- type: map_at_3
value: 56.858
- type: map_at_5
value: 61.072
- type: mrr_at_1
value: 63.28437917222964
- type: mrr_at_10
value: 71.24700659079828
- type: mrr_at_100
value: 71.73622475819593
- type: mrr_at_1000
value: 71.7489306936674
- type: mrr_at_20
value: 71.54825584541467
- type: mrr_at_3
value: 69.55941255006672
- type: mrr_at_5
value: 70.47396528704935
- type: nauc_map_at_1000_diff1
value: 43.35620080035366
- type: nauc_map_at_1000_max
value: 50.28640921325736
- type: nauc_map_at_100_diff1
value: 43.31973658913103
- type: nauc_map_at_100_max
value: 50.273258626884484
- type: nauc_map_at_10_diff1
value: 43.027049566983536
- type: nauc_map_at_10_max
value: 49.577710318540966
- type: nauc_map_at_1_diff1
value: 54.26507755550101
- type: nauc_map_at_1_max
value: 29.3055004033253
- type: nauc_map_at_20_diff1
value: 43.21822622085122
- type: nauc_map_at_20_max
value: 50.055159148215544
- type: nauc_map_at_3_diff1
value: 46.17179722912072
- type: nauc_map_at_3_max
value: 43.098861622889245
- type: nauc_map_at_5_diff1
value: 43.417118302901045
- type: nauc_map_at_5_max
value: 47.855182277192995
- type: nauc_mrr_at_1000_diff1
value: 53.264166874886484
- type: nauc_mrr_at_1000_max
value: 60.06399045079078
- type: nauc_mrr_at_100_diff1
value: 53.25723295738035
- type: nauc_mrr_at_100_max
value: 60.064446692426365
- type: nauc_mrr_at_10_diff1
value: 53.27175189594254
- type: nauc_mrr_at_10_max
value: 60.0620551274014
- type: nauc_mrr_at_1_diff1
value: 55.382898728149954
- type: nauc_mrr_at_1_max
value: 59.47364922562707
- type: nauc_mrr_at_20_diff1
value: 53.101546449165404
- type: nauc_mrr_at_20_max
value: 59.98535813727071
- type: nauc_mrr_at_3_diff1
value: 53.83121615715132
- type: nauc_mrr_at_3_max
value: 60.86140499580485
- type: nauc_mrr_at_5_diff1
value: 53.17340169131113
- type: nauc_mrr_at_5_max
value: 60.323733961935865
- type: nauc_ndcg_at_1000_diff1
value: 45.529862481919835
- type: nauc_ndcg_at_1000_max
value: 54.19889340138254
- type: nauc_ndcg_at_100_diff1
value: 45.03283772116745
- type: nauc_ndcg_at_100_max
value: 54.014933886963036
- type: nauc_ndcg_at_10_diff1
value: 43.69847706677576
- type: nauc_ndcg_at_10_max
value: 51.997083339083474
- type: nauc_ndcg_at_1_diff1
value: 55.382898728149954
- type: nauc_ndcg_at_1_max
value: 59.47364922562707
- type: nauc_ndcg_at_20_diff1
value: 43.97031810457665
- type: nauc_ndcg_at_20_max
value: 52.75113969394979
- type: nauc_ndcg_at_3_diff1
value: 45.15249621607577
- type: nauc_ndcg_at_3_max
value: 51.97757108163661
- type: nauc_ndcg_at_5_diff1
value: 44.01197180455844
- type: nauc_ndcg_at_5_max
value: 50.4940600552972
- type: nauc_precision_at_1000_diff1
value: -21.744958100458017
- type: nauc_precision_at_1000_max
value: 17.992122779928053
- type: nauc_precision_at_100_diff1
value: -19.676955126243957
- type: nauc_precision_at_100_max
value: 21.92261529052923
- type: nauc_precision_at_10_diff1
value: -12.153879041711848
- type: nauc_precision_at_10_max
value: 30.632660221696995
- type: nauc_precision_at_1_diff1
value: 55.382898728149954
- type: nauc_precision_at_1_max
value: 59.47364922562707
- type: nauc_precision_at_20_diff1
value: -15.083687263517998
- type: nauc_precision_at_20_max
value: 26.855087773361202
- type: nauc_precision_at_3_diff1
value: 2.4635804150765113
- type: nauc_precision_at_3_max
value: 41.11369929685033
- type: nauc_precision_at_5_diff1
value: -6.912714357985636
- type: nauc_precision_at_5_max
value: 35.72995297460379
- type: nauc_recall_at_1000_diff1
value: 71.02370020243924
- type: nauc_recall_at_1000_max
value: 27.48289323103369
- type: nauc_recall_at_100_diff1
value: 29.646214405433696
- type: nauc_recall_at_100_max
value: 44.07221611142022
- type: nauc_recall_at_10_diff1
value: 31.939036367001002
- type: nauc_recall_at_10_max
value: 41.20048321364925
- type: nauc_recall_at_1_diff1
value: 54.26507755550101
- type: nauc_recall_at_1_max
value: 29.3055004033253
- type: nauc_recall_at_20_diff1
value: 29.698861624429636
- type: nauc_recall_at_20_max
value: 41.33416829563071
- type: nauc_recall_at_3_diff1
value: 41.73527831566349
- type: nauc_recall_at_3_max
value: 38.73426347266254
- type: nauc_recall_at_5_diff1
value: 35.44302402135149
- type: nauc_recall_at_5_max
value: 42.141691917800586
- type: ndcg_at_1
value: 63.284
- type: ndcg_at_10
value: 69.503
- type: ndcg_at_100
value: 73.687
- type: ndcg_at_1000
value: 74.52499999999999
- type: ndcg_at_20
value: 71.50800000000001
- type: ndcg_at_3
value: 64.434
- type: ndcg_at_5
value: 65.996
- type: precision_at_1
value: 63.284
- type: precision_at_10
value: 16.048000000000002
- type: precision_at_100
value: 1.955
- type: precision_at_1000
value: 0.20600000000000002
- type: precision_at_20
value: 8.778
- type: precision_at_3
value: 39.163
- type: precision_at_5
value: 28.037
- type: recall_at_1
value: 40.668
- type: recall_at_10
value: 78.956
- type: recall_at_100
value: 94.504
- type: recall_at_1000
value: 99.833
- type: recall_at_20
value: 85.085
- type: recall_at_3
value: 62.379
- type: recall_at_5
value: 70.254
license: apache-2.0
language:
- fr
metrics:
- pearsonr
- spearmanr
bilingual-document-embedding
bilingual-document-embedding is the Embedding Model for document in bilingual language: french and english with context length up to 8096 tokens . This model is a specialized sentence-embedding trained specifically for the bilingual language, leveraging the robust capabilities of BGE M3, a pre-trained language model larged on the BGE M3 architecture. The model utilizes xlm-roberta to encode english-french sentences into a 1024-dimensional vector space, facilitating a wide range of applications from semantic search to text clustering. The embeddings capture the nuanced meanings of english-french sentences, reflecting both the lexical and contextual layers of the language.
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BilingualModel
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
(2): Normalize()
)
Training and Fine-tuning process
Stage 1: NLI Training
- Dataset: [(SNLI+XNLI) for english+french]
- Method: Training using Multi-Negative Ranking Loss. This stage focused on improving the model's ability to discern and rank nuanced differences in sentence semantics.
Stage 3: Continued Fine-tuning for Semantic Textual Similarity on STS Benchmark
- Dataset: [STSB-fr and en]
- Method: Fine-tuning specifically for the semantic textual similarity benchmark using Siamese BERT-Networks configured with the 'sentence-transformers' library.
Stage 4: Advanced Augmentation Fine-tuning
- Dataset: STSB with generate silver sample from gold sample
- Method: Employed an advanced strategy using Augmented SBERT with Pair Sampling Strategies, integrating both Cross-Encoder and Bi-Encoder models. This stage further refined the embeddings by enriching the training data dynamically, enhancing the model's robustness and accuracy.
Usage:
Using this model becomes easy when you have sentence-transformers installed:
pip install -U sentence-transformers
Then you can use the model like this:
from sentence_transformers import SentenceTransformer
sentences = ["Paris est une capitale de la France", "Paris is a capital of France"]
model = SentenceTransformer('Lajavaness/bilingual-document-embedding', trust_remote_code=True)
print(embeddings)
Evaluation
TODO
Citation
@article{chen2024bge,
title={Bge m3-embedding: Multi-lingual, multi-functionality, multi-granularity text embeddings through self-knowledge distillation},
author={Chen, Jianlv and Xiao, Shitao and Zhang, Peitian and Luo, Kun and Lian, Defu and Liu, Zheng},
journal={arXiv preprint arXiv:2402.03216},
year={2024}
}
@article{conneau2019unsupervised,
title={Unsupervised cross-lingual representation learning at scale},
author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin},
journal={arXiv preprint arXiv:1911.02116},
year={2019}
}
@article{reimers2019sentence,
title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
author={Nils Reimers, Iryna Gurevych},
journal={https://arxiv.org/abs/1908.10084},
year={2019}
}
@article{thakur2020augmented,
title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks},
author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna},
journal={arXiv e-prints},
pages={arXiv--2010},
year={2020}