dangvantuan's picture
Update README.md
4f99da7 verified
---
library_name: sentence-transformers
pipeline_tag: sentence-similarity
tags:
- sentence-transformers
- feature-extraction
- sentence-similarity
- transformers
- french
- english
- sentence-embedding
- mteb
model-index:
- name: 7eff199d41ff669fad99d83cad9249c393c3f14b
results:
- task:
type: Clustering
dataset:
type: lyon-nlp/alloprof
name: MTEB AlloProfClusteringP2P
config: default
split: test
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b
metrics:
- type: v_measure
value: 59.69196295449414
- type: v_measures
value: [0.6355772777559684, 0.4980707615440343, 0.5851538838323186, 0.6567709175938427, 0.5712405288636999]
- task:
type: Clustering
dataset:
type: lyon-nlp/alloprof
name: MTEB AlloProfClusteringS2S
config: default
split: test
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b
metrics:
- type: v_measure
value: 45.607106996926426
- type: v_measures
value: [0.45846869913649535, 0.42657120373128293, 0.45507356125930876, 0.4258913306353704, 0.4779122207000794]
- task:
type: Reranking
dataset:
type: lyon-nlp/mteb-fr-reranking-alloprof-s2p
name: MTEB AlloprofReranking
config: default
split: test
revision: 65393d0d7a08a10b4e348135e824f385d420b0fd
metrics:
- type: map
value: 73.51836428087765
- type: mrr
value: 74.8550285111166
- type: nAUC_map_diff1
value: 56.006169898728466
- type: nAUC_map_max
value: 27.886037223407506
- type: nAUC_mrr_diff1
value: 56.68072778248672
- type: nAUC_mrr_max
value: 29.362681962243276
- task:
type: Retrieval
dataset:
type: lyon-nlp/alloprof
name: MTEB AlloprofRetrieval
config: default
split: test
revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd
metrics:
- type: map_at_1
value: 32.080999999999996
- type: map_at_10
value: 43.582
- type: map_at_100
value: 44.381
- type: map_at_1000
value: 44.426
- type: map_at_20
value: 44.061
- type: map_at_3
value: 40.602
- type: map_at_5
value: 42.381
- type: mrr_at_1
value: 32.08117443868739
- type: mrr_at_10
value: 43.5823429832498
- type: mrr_at_100
value: 44.38068560877513
- type: mrr_at_1000
value: 44.426194305504026
- type: mrr_at_20
value: 44.06128094655753
- type: mrr_at_3
value: 40.60161197466903
- type: mrr_at_5
value: 42.380541162924715
- type: nauc_map_at_1000_diff1
value: 37.22997629352391
- type: nauc_map_at_1000_max
value: 38.65090969900466
- type: nauc_map_at_100_diff1
value: 37.22644507166512
- type: nauc_map_at_100_max
value: 38.67447923917633
- type: nauc_map_at_10_diff1
value: 37.02440573022942
- type: nauc_map_at_10_max
value: 38.52972171430789
- type: nauc_map_at_1_diff1
value: 41.18101653444774
- type: nauc_map_at_1_max
value: 34.87383192583458
- type: nauc_map_at_20_diff1
value: 37.14172285932024
- type: nauc_map_at_20_max
value: 38.66753159239803
- type: nauc_map_at_3_diff1
value: 37.53556306862998
- type: nauc_map_at_3_max
value: 37.86008195327724
- type: nauc_map_at_5_diff1
value: 37.14904081229067
- type: nauc_map_at_5_max
value: 38.267819714061105
- type: nauc_mrr_at_1000_diff1
value: 37.22997629352391
- type: nauc_mrr_at_1000_max
value: 38.65090969900466
- type: nauc_mrr_at_100_diff1
value: 37.22644507166512
- type: nauc_mrr_at_100_max
value: 38.67447923917633
- type: nauc_mrr_at_10_diff1
value: 37.02440573022942
- type: nauc_mrr_at_10_max
value: 38.52972171430789
- type: nauc_mrr_at_1_diff1
value: 41.18101653444774
- type: nauc_mrr_at_1_max
value: 34.87383192583458
- type: nauc_mrr_at_20_diff1
value: 37.14172285932024
- type: nauc_mrr_at_20_max
value: 38.66753159239803
- type: nauc_mrr_at_3_diff1
value: 37.53556306862998
- type: nauc_mrr_at_3_max
value: 37.86008195327724
- type: nauc_mrr_at_5_diff1
value: 37.14904081229067
- type: nauc_mrr_at_5_max
value: 38.267819714061105
- type: nauc_ndcg_at_1000_diff1
value: 36.313082263552204
- type: nauc_ndcg_at_1000_max
value: 40.244406213773765
- type: nauc_ndcg_at_100_diff1
value: 36.17060946689135
- type: nauc_ndcg_at_100_max
value: 41.069278488584416
- type: nauc_ndcg_at_10_diff1
value: 35.2775471480974
- type: nauc_ndcg_at_10_max
value: 40.33902753007036
- type: nauc_ndcg_at_1_diff1
value: 41.18101653444774
- type: nauc_ndcg_at_1_max
value: 34.87383192583458
- type: nauc_ndcg_at_20_diff1
value: 35.71067272175871
- type: nauc_ndcg_at_20_max
value: 40.94374381572908
- type: nauc_ndcg_at_3_diff1
value: 36.45082651868188
- type: nauc_ndcg_at_3_max
value: 38.87195110158222
- type: nauc_ndcg_at_5_diff1
value: 35.683568481780505
- type: nauc_ndcg_at_5_max
value: 39.606933866599
- type: nauc_precision_at_1000_diff1
value: 15.489726515767439
- type: nauc_precision_at_1000_max
value: 75.94259161180715
- type: nauc_precision_at_100_diff1
value: 30.033605095284656
- type: nauc_precision_at_100_max
value: 62.40786465750442
- type: nauc_precision_at_10_diff1
value: 28.617170969915
- type: nauc_precision_at_10_max
value: 47.35884745487521
- type: nauc_precision_at_1_diff1
value: 41.18101653444774
- type: nauc_precision_at_1_max
value: 34.87383192583458
- type: nauc_precision_at_20_diff1
value: 29.730952749557144
- type: nauc_precision_at_20_max
value: 52.09696741873719
- type: nauc_precision_at_3_diff1
value: 33.30844921569695
- type: nauc_precision_at_3_max
value: 41.84496633792437
- type: nauc_precision_at_5_diff1
value: 31.000246292430838
- type: nauc_precision_at_5_max
value: 43.88721507465343
- type: nauc_recall_at_1000_diff1
value: 15.48972651576705
- type: nauc_recall_at_1000_max
value: 75.94259161180725
- type: nauc_recall_at_100_diff1
value: 30.033605095284816
- type: nauc_recall_at_100_max
value: 62.40786465750426
- type: nauc_recall_at_10_diff1
value: 28.617170969914984
- type: nauc_recall_at_10_max
value: 47.35884745487525
- type: nauc_recall_at_1_diff1
value: 41.18101653444774
- type: nauc_recall_at_1_max
value: 34.87383192583458
- type: nauc_recall_at_20_diff1
value: 29.730952749557087
- type: nauc_recall_at_20_max
value: 52.09696741873715
- type: nauc_recall_at_3_diff1
value: 33.30844921569694
- type: nauc_recall_at_3_max
value: 41.84496633792433
- type: nauc_recall_at_5_diff1
value: 31.000246292430838
- type: nauc_recall_at_5_max
value: 43.88721507465339
- type: ndcg_at_1
value: 32.080999999999996
- type: ndcg_at_10
value: 49.502
- type: ndcg_at_100
value: 53.52
- type: ndcg_at_1000
value: 54.842
- type: ndcg_at_20
value: 51.219
- type: ndcg_at_3
value: 43.381
- type: ndcg_at_5
value: 46.603
- type: precision_at_1
value: 32.080999999999996
- type: precision_at_10
value: 6.822
- type: precision_at_100
value: 0.873
- type: precision_at_1000
value: 0.098
- type: precision_at_20
value: 3.7479999999999998
- type: precision_at_3
value: 17.142
- type: precision_at_5
value: 11.857
- type: recall_at_1
value: 32.080999999999996
- type: recall_at_10
value: 68.221
- type: recall_at_100
value: 87.349
- type: recall_at_1000
value: 98.014
- type: recall_at_20
value: 74.957
- type: recall_at_3
value: 51.425
- type: recall_at_5
value: 59.282999999999994
- task:
type: Classification
dataset:
type: mteb/amazon_reviews_multi
name: MTEB AmazonReviewsClassification (fr)
config: fr
split: test
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
metrics:
- type: accuracy
value: 39.892
- type: f1
value: 38.38126304364462
- type: f1_weighted
value: 38.38126304364462
- task:
type: Retrieval
dataset:
type: maastrichtlawtech/bsard
name: MTEB BSARDRetrieval
config: default
split: test
revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59
metrics:
- type: map_at_1
value: 10.811
- type: map_at_10
value: 16.414
- type: map_at_100
value: 17.647
- type: map_at_1000
value: 17.742
- type: map_at_20
value: 17.22
- type: map_at_3
value: 14.188999999999998
- type: map_at_5
value: 15.113
- type: mrr_at_1
value: 10.81081081081081
- type: mrr_at_10
value: 16.41427141427142
- type: mrr_at_100
value: 17.647339314041712
- type: mrr_at_1000
value: 17.74213263983212
- type: mrr_at_20
value: 17.219989884463573
- type: mrr_at_3
value: 14.18918918918919
- type: mrr_at_5
value: 15.112612612612612
- type: nauc_map_at_1000_diff1
value: 13.07108195916555
- type: nauc_map_at_1000_max
value: 14.000521014179807
- type: nauc_map_at_100_diff1
value: 13.087117094079332
- type: nauc_map_at_100_max
value: 13.99712558752583
- type: nauc_map_at_10_diff1
value: 13.452029501381165
- type: nauc_map_at_10_max
value: 13.3341655571542
- type: nauc_map_at_1_diff1
value: 14.990419981155167
- type: nauc_map_at_1_max
value: 8.812519082504037
- type: nauc_map_at_20_diff1
value: 12.80321357992737
- type: nauc_map_at_20_max
value: 14.020962859032371
- type: nauc_map_at_3_diff1
value: 14.84230805712973
- type: nauc_map_at_3_max
value: 11.644032755353722
- type: nauc_map_at_5_diff1
value: 15.100168959732835
- type: nauc_map_at_5_max
value: 13.634801099074355
- type: nauc_mrr_at_1000_diff1
value: 13.07108195916555
- type: nauc_mrr_at_1000_max
value: 14.000521014179807
- type: nauc_mrr_at_100_diff1
value: 13.087117094079332
- type: nauc_mrr_at_100_max
value: 13.99712558752583
- type: nauc_mrr_at_10_diff1
value: 13.452029501381165
- type: nauc_mrr_at_10_max
value: 13.3341655571542
- type: nauc_mrr_at_1_diff1
value: 14.990419981155167
- type: nauc_mrr_at_1_max
value: 8.812519082504037
- type: nauc_mrr_at_20_diff1
value: 12.80321357992737
- type: nauc_mrr_at_20_max
value: 14.020962859032371
- type: nauc_mrr_at_3_diff1
value: 14.84230805712973
- type: nauc_mrr_at_3_max
value: 11.644032755353722
- type: nauc_mrr_at_5_diff1
value: 15.100168959732835
- type: nauc_mrr_at_5_max
value: 13.634801099074355
- type: nauc_ndcg_at_1000_diff1
value: 11.335350893370972
- type: nauc_ndcg_at_1000_max
value: 16.09665875369169
- type: nauc_ndcg_at_100_diff1
value: 11.499643600969176
- type: nauc_ndcg_at_100_max
value: 15.967105414704186
- type: nauc_ndcg_at_10_diff1
value: 12.093263549786606
- type: nauc_ndcg_at_10_max
value: 14.605821897766461
- type: nauc_ndcg_at_1_diff1
value: 14.990419981155167
- type: nauc_ndcg_at_1_max
value: 8.812519082504037
- type: nauc_ndcg_at_20_diff1
value: 10.197380043193812
- type: nauc_ndcg_at_20_max
value: 16.332533239525365
- type: nauc_ndcg_at_3_diff1
value: 14.835825175950765
- type: nauc_ndcg_at_3_max
value: 11.898757954417214
- type: nauc_ndcg_at_5_diff1
value: 15.278603386081823
- type: nauc_ndcg_at_5_max
value: 15.007133861218167
- type: nauc_precision_at_1000_diff1
value: 2.7469897420865195
- type: nauc_precision_at_1000_max
value: 26.874535278616346
- type: nauc_precision_at_100_diff1
value: 7.600735526139776
- type: nauc_precision_at_100_max
value: 20.7203382946415
- type: nauc_precision_at_10_diff1
value: 8.938642089366768
- type: nauc_precision_at_10_max
value: 17.320961743140874
- type: nauc_precision_at_1_diff1
value: 14.990419981155167
- type: nauc_precision_at_1_max
value: 8.812519082504037
- type: nauc_precision_at_20_diff1
value: 3.733877816322278
- type: nauc_precision_at_20_max
value: 21.581173305923002
- type: nauc_precision_at_3_diff1
value: 14.828850401790316
- type: nauc_precision_at_3_max
value: 12.369943286612463
- type: nauc_precision_at_5_diff1
value: 15.728617939150672
- type: nauc_precision_at_5_max
value: 18.103783411900697
- type: nauc_recall_at_1000_diff1
value: 2.746989742086615
- type: nauc_recall_at_1000_max
value: 26.874535278616367
- type: nauc_recall_at_100_diff1
value: 7.600735526139775
- type: nauc_recall_at_100_max
value: 20.720338294641536
- type: nauc_recall_at_10_diff1
value: 8.93864208936673
- type: nauc_recall_at_10_max
value: 17.32096174314083
- type: nauc_recall_at_1_diff1
value: 14.990419981155167
- type: nauc_recall_at_1_max
value: 8.812519082504037
- type: nauc_recall_at_20_diff1
value: 3.733877816322231
- type: nauc_recall_at_20_max
value: 21.58117330592295
- type: nauc_recall_at_3_diff1
value: 14.828850401790339
- type: nauc_recall_at_3_max
value: 12.369943286612509
- type: nauc_recall_at_5_diff1
value: 15.72861793915063
- type: nauc_recall_at_5_max
value: 18.103783411900658
- type: ndcg_at_1
value: 10.811
- type: ndcg_at_10
value: 20.244
- type: ndcg_at_100
value: 26.526
- type: ndcg_at_1000
value: 29.217
- type: ndcg_at_20
value: 23.122
- type: ndcg_at_3
value: 15.396
- type: ndcg_at_5
value: 17.063
- type: precision_at_1
value: 10.811
- type: precision_at_10
value: 3.288
- type: precision_at_100
value: 0.631
- type: precision_at_1000
value: 0.08499999999999999
- type: precision_at_20
value: 2.207
- type: precision_at_3
value: 6.306000000000001
- type: precision_at_5
value: 4.595
- type: recall_at_1
value: 10.811
- type: recall_at_10
value: 32.883
- type: recall_at_100
value: 63.063
- type: recall_at_1000
value: 84.685
- type: recall_at_20
value: 44.144
- type: recall_at_3
value: 18.919
- type: recall_at_5
value: 22.973
- task:
type: Clustering
dataset:
type: lyon-nlp/clustering-hal-s2s
name: MTEB HALClusteringS2S
config: default
split: test
revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915
metrics:
- type: v_measure
value: 25.209561281028435
- type: v_measures
value: [0.28558356565178666, 0.2707322246129254, 0.2683693125038299, 0.2703937853835602, 0.22057190525667872]
- task:
type: Clustering
dataset:
type: reciTAL/mlsum
name: MTEB MLSUMClusteringP2P
config: default
split: test
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7
metrics:
- type: v_measure
value: 42.82528809996964
- type: v_measures
value: [0.43465029372260205, 0.42821098223656917, 0.43537879149583325, 0.4289578694928627, 0.3794307754465835]
- task:
type: Clustering
dataset:
type: reciTAL/mlsum
name: MTEB MLSUMClusteringS2S
config: default
split: test
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7
metrics:
- type: v_measure
value: 43.44172295073941
- type: v_measures
value: [0.4294163918345751, 0.46229994906725164, 0.44188446196569603, 0.43839320352264155, 0.3866853445120933]
- task:
type: Classification
dataset:
type: mteb/mtop_domain
name: MTEB MTOPDomainClassification (fr)
config: fr
split: test
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
metrics:
- type: accuracy
value: 88.33072345756342
- type: f1
value: 88.11780476022122
- type: f1_weighted
value: 88.28188145087299
- task:
type: Classification
dataset:
type: mteb/mtop_intent
name: MTEB MTOPIntentClassification (fr)
config: fr
split: test
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
metrics:
- type: accuracy
value: 57.854682117131226
- type: f1
value: 41.121569078191996
- type: f1_weighted
value: 60.04845437480532
- task:
type: Classification
dataset:
type: mteb/masakhanews
name: MTEB MasakhaNEWSClassification (fra)
config: fra
split: test
revision: 18193f187b92da67168c655c9973a165ed9593dd
metrics:
- type: accuracy
value: 76.87203791469194
- type: f1
value: 72.94847557303437
- type: f1_weighted
value: 76.9128173959562
- task:
type: Clustering
dataset:
type: masakhane/masakhanews
name: MTEB MasakhaNEWSClusteringP2P (fra)
config: fra
split: test
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60
metrics:
- type: v_measure
value: 61.32006896333715
- type: v_measures
value: [1.0, 0.6446188396257355, 0.28995363026757603, 0.40898735994696084, 0.7224436183265853]
- task:
type: Clustering
dataset:
type: masakhane/masakhanews
name: MTEB MasakhaNEWSClusteringS2S (fra)
config: fra
split: test
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60
metrics:
- type: v_measure
value: 60.509887123660256
- type: v_measures
value: [1.0, 0.022472587992562534, 0.4686320087689936, 0.811946141094871, 0.7224436183265853]
- task:
type: Classification
dataset:
type: mteb/amazon_massive_intent
name: MTEB MassiveIntentClassification (fr)
config: fr
split: test
revision: 4672e20407010da34463acc759c162ca9734bca6
metrics:
- type: accuracy
value: 64.14256893073302
- type: f1
value: 61.33068109342782
- type: f1_weighted
value: 62.74292948992287
- task:
type: Classification
dataset:
type: mteb/amazon_massive_scenario
name: MTEB MassiveScenarioClassification (fr)
config: fr
split: test
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8
metrics:
- type: accuracy
value: 70.68930733019502
- type: f1
value: 70.26641874846638
- type: f1_weighted
value: 70.35250466465047
- task:
type: Retrieval
dataset:
type: jinaai/mintakaqa
name: MTEB MintakaRetrieval (fr)
config: fr
split: test
revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e
metrics:
- type: map_at_1
value: 19.165
- type: map_at_10
value: 28.663
- type: map_at_100
value: 29.737000000000002
- type: map_at_1000
value: 29.826000000000004
- type: map_at_20
value: 29.266
- type: map_at_3
value: 26.024
- type: map_at_5
value: 27.486
- type: mrr_at_1
value: 19.164619164619165
- type: mrr_at_10
value: 28.66298116298116
- type: mrr_at_100
value: 29.737423308510476
- type: mrr_at_1000
value: 29.825744096186796
- type: mrr_at_20
value: 29.26593905045215
- type: mrr_at_3
value: 26.023751023751025
- type: mrr_at_5
value: 27.48566748566751
- type: nauc_map_at_1000_diff1
value: 23.682512151202967
- type: nauc_map_at_1000_max
value: 25.78708364723919
- type: nauc_map_at_100_diff1
value: 23.647360144907324
- type: nauc_map_at_100_max
value: 25.812420160707074
- type: nauc_map_at_10_diff1
value: 23.658224717435765
- type: nauc_map_at_10_max
value: 25.845198626323217
- type: nauc_map_at_1_diff1
value: 30.56830621718086
- type: nauc_map_at_1_max
value: 19.931526248650147
- type: nauc_map_at_20_diff1
value: 23.69662048930091
- type: nauc_map_at_20_max
value: 25.936653022318403
- type: nauc_map_at_3_diff1
value: 24.663221072349817
- type: nauc_map_at_3_max
value: 24.634011858800275
- type: nauc_map_at_5_diff1
value: 24.3650772668551
- type: nauc_map_at_5_max
value: 25.75222318469224
- type: nauc_mrr_at_1000_diff1
value: 23.682512151202967
- type: nauc_mrr_at_1000_max
value: 25.78708364723919
- type: nauc_mrr_at_100_diff1
value: 23.647360144907324
- type: nauc_mrr_at_100_max
value: 25.812420160707074
- type: nauc_mrr_at_10_diff1
value: 23.658224717435765
- type: nauc_mrr_at_10_max
value: 25.845198626323217
- type: nauc_mrr_at_1_diff1
value: 30.56830621718086
- type: nauc_mrr_at_1_max
value: 19.931526248650147
- type: nauc_mrr_at_20_diff1
value: 23.69662048930091
- type: nauc_mrr_at_20_max
value: 25.936653022318403
- type: nauc_mrr_at_3_diff1
value: 24.663221072349817
- type: nauc_mrr_at_3_max
value: 24.634011858800275
- type: nauc_mrr_at_5_diff1
value: 24.3650772668551
- type: nauc_mrr_at_5_max
value: 25.75222318469224
- type: nauc_ndcg_at_1000_diff1
value: 21.68690756038845
- type: nauc_ndcg_at_1000_max
value: 27.168575101114893
- type: nauc_ndcg_at_100_diff1
value: 20.484812648526646
- type: nauc_ndcg_at_100_max
value: 27.79987215383081
- type: nauc_ndcg_at_10_diff1
value: 20.791330920997765
- type: nauc_ndcg_at_10_max
value: 28.272774035036935
- type: nauc_ndcg_at_1_diff1
value: 30.56830621718086
- type: nauc_ndcg_at_1_max
value: 19.931526248650147
- type: nauc_ndcg_at_20_diff1
value: 20.88342749790573
- type: nauc_ndcg_at_20_max
value: 28.627184419546825
- type: nauc_ndcg_at_3_diff1
value: 22.987235018840494
- type: nauc_ndcg_at_3_max
value: 26.054144215976482
- type: nauc_ndcg_at_5_diff1
value: 22.497863289090464
- type: nauc_ndcg_at_5_max
value: 27.98879570850259
- type: nauc_precision_at_1000_diff1
value: -0.6707404502167996
- type: nauc_precision_at_1000_max
value: 31.987217077673346
- type: nauc_precision_at_100_diff1
value: 5.079765403021014
- type: nauc_precision_at_100_max
value: 34.857053312543194
- type: nauc_precision_at_10_diff1
value: 12.628771618059472
- type: nauc_precision_at_10_max
value: 35.009564954169896
- type: nauc_precision_at_1_diff1
value: 30.56830621718086
- type: nauc_precision_at_1_max
value: 19.931526248650147
- type: nauc_precision_at_20_diff1
value: 12.28251326261041
- type: nauc_precision_at_20_max
value: 36.942629359432075
- type: nauc_precision_at_3_diff1
value: 18.663775283519335
- type: nauc_precision_at_3_max
value: 29.741315837492472
- type: nauc_precision_at_5_diff1
value: 17.70442691217025
- type: nauc_precision_at_5_max
value: 33.93438470540527
- type: nauc_recall_at_1000_diff1
value: -0.6707404502171719
- type: nauc_recall_at_1000_max
value: 31.987217077672607
- type: nauc_recall_at_100_diff1
value: 5.079765403021056
- type: nauc_recall_at_100_max
value: 34.85705331254323
- type: nauc_recall_at_10_diff1
value: 12.628771618059483
- type: nauc_recall_at_10_max
value: 35.00956495416992
- type: nauc_recall_at_1_diff1
value: 30.56830621718086
- type: nauc_recall_at_1_max
value: 19.931526248650147
- type: nauc_recall_at_20_diff1
value: 12.282513262610411
- type: nauc_recall_at_20_max
value: 36.94262935943207
- type: nauc_recall_at_3_diff1
value: 18.663775283519346
- type: nauc_recall_at_3_max
value: 29.741315837492465
- type: nauc_recall_at_5_diff1
value: 17.704426912170252
- type: nauc_recall_at_5_max
value: 33.934384705405286
- type: ndcg_at_1
value: 19.165
- type: ndcg_at_10
value: 33.674
- type: ndcg_at_100
value: 39.297
- type: ndcg_at_1000
value: 41.896
- type: ndcg_at_20
value: 35.842
- type: ndcg_at_3
value: 28.238999999999997
- type: ndcg_at_5
value: 30.863000000000003
- type: precision_at_1
value: 19.165
- type: precision_at_10
value: 4.9590000000000005
- type: precision_at_100
value: 0.768
- type: precision_at_1000
value: 0.098
- type: precision_at_20
value: 2.905
- type: precision_at_3
value: 11.548
- type: precision_at_5
value: 8.198
- type: recall_at_1
value: 19.165
- type: recall_at_10
value: 49.59
- type: recall_at_100
value: 76.822
- type: recall_at_1000
value: 97.83
- type: recall_at_20
value: 58.108000000000004
- type: recall_at_3
value: 34.644000000000005
- type: recall_at_5
value: 40.991
- task:
type: PairClassification
dataset:
type: GEM/opusparcus
name: MTEB OpusparcusPC (fr)
config: fr
split: test
revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a
metrics:
- type: cos_sim_accuracy
value: 83.51498637602179
- type: cos_sim_ap
value: 94.18614574224773
- type: cos_sim_f1
value: 88.3564925730714
- type: cos_sim_precision
value: 85.37037037037037
- type: cos_sim_recall
value: 91.55908639523337
- type: dot_accuracy
value: 83.51498637602179
- type: dot_ap
value: 94.18614574224773
- type: dot_f1
value: 88.3564925730714
- type: dot_precision
value: 85.37037037037037
- type: dot_recall
value: 91.55908639523337
- type: euclidean_accuracy
value: 83.51498637602179
- type: euclidean_ap
value: 94.18614574224773
- type: euclidean_f1
value: 88.3564925730714
- type: euclidean_precision
value: 85.37037037037037
- type: euclidean_recall
value: 91.55908639523337
- type: manhattan_accuracy
value: 83.51498637602179
- type: manhattan_ap
value: 94.16717671332795
- type: manhattan_f1
value: 88.35418671799807
- type: manhattan_precision
value: 85.71428571428571
- type: manhattan_recall
value: 91.16186693147964
- type: max_accuracy
value: 83.51498637602179
- type: max_ap
value: 94.18614574224773
- type: max_f1
value: 88.3564925730714
- task:
type: PairClassification
dataset:
type: google-research-datasets/paws-x
name: MTEB PawsX (fr)
config: fr
split: test
revision: 8a04d940a42cd40658986fdd8e3da561533a3646
metrics:
- type: cos_sim_accuracy
value: 60.699999999999996
- type: cos_sim_ap
value: 60.20276173325004
- type: cos_sim_f1
value: 62.716429395921516
- type: cos_sim_precision
value: 48.05424528301887
- type: cos_sim_recall
value: 90.2547065337763
- type: dot_accuracy
value: 60.699999999999996
- type: dot_ap
value: 60.27996470746299
- type: dot_f1
value: 62.716429395921516
- type: dot_precision
value: 48.05424528301887
- type: dot_recall
value: 90.2547065337763
- type: euclidean_accuracy
value: 60.699999999999996
- type: euclidean_ap
value: 60.20276173325004
- type: euclidean_f1
value: 62.716429395921516
- type: euclidean_precision
value: 48.05424528301887
- type: euclidean_recall
value: 90.2547065337763
- type: manhattan_accuracy
value: 60.699999999999996
- type: manhattan_ap
value: 60.18010040913353
- type: manhattan_f1
value: 62.71056661562021
- type: manhattan_precision
value: 47.92276184903452
- type: manhattan_recall
value: 90.69767441860465
- type: max_accuracy
value: 60.699999999999996
- type: max_ap
value: 60.27996470746299
- type: max_f1
value: 62.716429395921516
- task:
type: STS
dataset:
type: Lajavaness/SICK-fr
name: MTEB SICKFr
config: default
split: test
revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a
metrics:
- type: cos_sim_pearson
value: 84.24496945719946
- type: cos_sim_spearman
value: 78.10001513346513
- type: euclidean_pearson
value: 81.43570951228163
- type: euclidean_spearman
value: 78.0987784421045
- type: manhattan_pearson
value: 81.31986646517238
- type: manhattan_spearman
value: 78.09610194828534
- task:
type: STS
dataset:
type: mteb/sts22-crosslingual-sts
name: MTEB STS22 (fr)
config: fr
split: test
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
metrics:
- type: cos_sim_pearson
value: 83.07721141521425
- type: cos_sim_spearman
value: 83.19199466052186
- type: euclidean_pearson
value: 82.10672022294766
- type: euclidean_spearman
value: 83.19199466052186
- type: manhattan_pearson
value: 81.92531847793633
- type: manhattan_spearman
value: 83.20694689089673
- task:
type: STS
dataset:
type: mteb/stsb_multi_mt
name: MTEB STSBenchmarkMultilingualSTS (fr)
config: fr
split: test
revision: 29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c
metrics:
- type: cos_sim_pearson
value: 83.957481748094
- type: cos_sim_spearman
value: 84.40492503459248
- type: euclidean_pearson
value: 83.8150014101056
- type: euclidean_spearman
value: 84.40686653864509
- type: manhattan_pearson
value: 83.6816837321264
- type: manhattan_spearman
value: 84.2678486368702
- task:
type: Summarization
dataset:
type: lyon-nlp/summarization-summeval-fr-p2p
name: MTEB SummEvalFr
config: default
split: test
revision: b385812de6a9577b6f4d0f88c6a6e35395a94054
metrics:
- type: cos_sim_pearson
value: 32.06592630917136
- type: cos_sim_spearman
value: 30.94878864229808
- type: dot_pearson
value: 32.06591974515864
- type: dot_spearman
value: 30.925383080565222
- task:
type: Reranking
dataset:
type: lyon-nlp/mteb-fr-reranking-syntec-s2p
name: MTEB SyntecReranking
config: default
split: test
revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad
metrics:
- type: map
value: 88.11666666666667
- type: mrr
value: 88.11666666666667
- type: nAUC_map_diff1
value: 66.27779227667267
- type: nAUC_map_max
value: 6.651414764738896
- type: nAUC_mrr_diff1
value: 66.27779227667267
- type: nAUC_mrr_max
value: 6.651414764738896
- task:
type: Retrieval
dataset:
type: lyon-nlp/mteb-fr-retrieval-syntec-s2p
name: MTEB SyntecRetrieval
config: default
split: test
revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9
metrics:
- type: map_at_1
value: 69.0
- type: map_at_10
value: 80.65
- type: map_at_100
value: 80.838
- type: map_at_1000
value: 80.838
- type: map_at_20
value: 80.838
- type: map_at_3
value: 79.833
- type: map_at_5
value: 80.483
- type: mrr_at_1
value: 69.0
- type: mrr_at_10
value: 80.64999999999999
- type: mrr_at_100
value: 80.83799019607844
- type: mrr_at_1000
value: 80.83799019607844
- type: mrr_at_20
value: 80.83799019607844
- type: mrr_at_3
value: 79.83333333333334
- type: mrr_at_5
value: 80.48333333333333
- type: nauc_map_at_1000_diff1
value: 61.46904865740055
- type: nauc_map_at_1000_max
value: 24.307826758747282
- type: nauc_map_at_100_diff1
value: 61.46904865740055
- type: nauc_map_at_100_max
value: 24.307826758747282
- type: nauc_map_at_10_diff1
value: 61.094194035098035
- type: nauc_map_at_10_max
value: 24.44687875369869
- type: nauc_map_at_1_diff1
value: 65.17628798701865
- type: nauc_map_at_1_max
value: 25.79501560929155
- type: nauc_map_at_20_diff1
value: 61.46904865740055
- type: nauc_map_at_20_max
value: 24.307826758747282
- type: nauc_map_at_3_diff1
value: 61.562719756100805
- type: nauc_map_at_3_max
value: 25.87804164282553
- type: nauc_map_at_5_diff1
value: 61.471976470716264
- type: nauc_map_at_5_max
value: 25.180513270581322
- type: nauc_mrr_at_1000_diff1
value: 61.46904865740055
- type: nauc_mrr_at_1000_max
value: 24.307826758747282
- type: nauc_mrr_at_100_diff1
value: 61.46904865740055
- type: nauc_mrr_at_100_max
value: 24.307826758747282
- type: nauc_mrr_at_10_diff1
value: 61.094194035098035
- type: nauc_mrr_at_10_max
value: 24.44687875369869
- type: nauc_mrr_at_1_diff1
value: 65.17628798701865
- type: nauc_mrr_at_1_max
value: 25.79501560929155
- type: nauc_mrr_at_20_diff1
value: 61.46904865740055
- type: nauc_mrr_at_20_max
value: 24.307826758747282
- type: nauc_mrr_at_3_diff1
value: 61.562719756100805
- type: nauc_mrr_at_3_max
value: 25.87804164282553
- type: nauc_mrr_at_5_diff1
value: 61.471976470716264
- type: nauc_mrr_at_5_max
value: 25.180513270581322
- type: nauc_ndcg_at_1000_diff1
value: 60.95477865546023
- type: nauc_ndcg_at_1000_max
value: 24.427553593893535
- type: nauc_ndcg_at_100_diff1
value: 60.95477865546023
- type: nauc_ndcg_at_100_max
value: 24.427553593893535
- type: nauc_ndcg_at_10_diff1
value: 59.101673931307396
- type: nauc_ndcg_at_10_max
value: 25.01155211084955
- type: nauc_ndcg_at_1_diff1
value: 65.17628798701865
- type: nauc_ndcg_at_1_max
value: 25.79501560929155
- type: nauc_ndcg_at_20_diff1
value: 60.95477865546023
- type: nauc_ndcg_at_20_max
value: 24.427553593893535
- type: nauc_ndcg_at_3_diff1
value: 60.333057480044616
- type: nauc_ndcg_at_3_max
value: 28.363238330232637
- type: nauc_ndcg_at_5_diff1
value: 60.15511994533307
- type: nauc_ndcg_at_5_max
value: 26.94308058940176
- type: nauc_precision_at_1000_diff1
value: nan
- type: nauc_precision_at_1000_max
value: nan
- type: nauc_precision_at_100_diff1
value: nan
- type: nauc_precision_at_100_max
value: nan
- type: nauc_precision_at_10_diff1
value: 26.657329598506518
- type: nauc_precision_at_10_max
value: 34.26704014939361
- type: nauc_precision_at_1_diff1
value: 65.17628798701865
- type: nauc_precision_at_1_max
value: 25.79501560929155
- type: nauc_precision_at_20_diff1
value: 100.0
- type: nauc_precision_at_20_max
value: 100.0
- type: nauc_precision_at_3_diff1
value: 51.834066960117276
- type: nauc_precision_at_3_max
value: 48.25930372148875
- type: nauc_precision_at_5_diff1
value: 44.992997198879706
- type: nauc_precision_at_5_max
value: 50.70028011204499
- type: nauc_recall_at_1000_diff1
value: nan
- type: nauc_recall_at_1000_max
value: nan
- type: nauc_recall_at_100_diff1
value: nan
- type: nauc_recall_at_100_max
value: nan
- type: nauc_recall_at_10_diff1
value: 26.657329598505903
- type: nauc_recall_at_10_max
value: 34.26704014939303
- type: nauc_recall_at_1_diff1
value: 65.17628798701865
- type: nauc_recall_at_1_max
value: 25.79501560929155
- type: nauc_recall_at_20_diff1
value: nan
- type: nauc_recall_at_20_max
value: nan
- type: nauc_recall_at_3_diff1
value: 51.834066960117376
- type: nauc_recall_at_3_max
value: 48.25930372148865
- type: nauc_recall_at_5_diff1
value: 44.99299719887955
- type: nauc_recall_at_5_max
value: 50.70028011204488
- type: ndcg_at_1
value: 69.0
- type: ndcg_at_10
value: 84.786
- type: ndcg_at_100
value: 85.521
- type: ndcg_at_1000
value: 85.521
- type: ndcg_at_20
value: 85.521
- type: ndcg_at_3
value: 83.226
- type: ndcg_at_5
value: 84.43
- type: precision_at_1
value: 69.0
- type: precision_at_10
value: 9.700000000000001
- type: precision_at_100
value: 1.0
- type: precision_at_1000
value: 0.1
- type: precision_at_20
value: 5.0
- type: precision_at_3
value: 31.0
- type: precision_at_5
value: 19.2
- type: recall_at_1
value: 69.0
- type: recall_at_10
value: 97.0
- type: recall_at_100
value: 100.0
- type: recall_at_1000
value: 100.0
- type: recall_at_20
value: 100.0
- type: recall_at_3
value: 93.0
- type: recall_at_5
value: 96.0
- task:
type: Retrieval
dataset:
type: jinaai/xpqa
name: MTEB XPQARetrieval (fr)
config: fr
split: test
revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f
metrics:
- type: map_at_1
value: 40.797
- type: map_at_10
value: 62.71099999999999
- type: map_at_100
value: 64.261
- type: map_at_1000
value: 64.306
- type: map_at_20
value: 63.693
- type: map_at_3
value: 56.686
- type: map_at_5
value: 60.653999999999996
- type: mrr_at_1
value: 64.08544726301736
- type: mrr_at_10
value: 71.24790726259349
- type: mrr_at_100
value: 71.7835679704396
- type: mrr_at_1000
value: 71.79095567140973
- type: mrr_at_20
value: 71.5854708410262
- type: mrr_at_3
value: 69.55941255006672
- type: mrr_at_5
value: 70.60747663551396
- type: nauc_map_at_1000_diff1
value: 47.803181417639365
- type: nauc_map_at_1000_max
value: 51.22073368230412
- type: nauc_map_at_100_diff1
value: 47.771573391555755
- type: nauc_map_at_100_max
value: 51.20370234778812
- type: nauc_map_at_10_diff1
value: 47.340833389771625
- type: nauc_map_at_10_max
value: 50.41256517180715
- type: nauc_map_at_1_diff1
value: 55.14983744702445
- type: nauc_map_at_1_max
value: 31.104750896985728
- type: nauc_map_at_20_diff1
value: 47.64026863999484
- type: nauc_map_at_20_max
value: 50.87670909266768
- type: nauc_map_at_3_diff1
value: 47.681906747352635
- type: nauc_map_at_3_max
value: 43.47246277661219
- type: nauc_map_at_5_diff1
value: 46.874943002794815
- type: nauc_map_at_5_max
value: 48.469495140739724
- type: nauc_mrr_at_1000_diff1
value: 57.34098736669957
- type: nauc_mrr_at_1000_max
value: 60.179095583193444
- type: nauc_mrr_at_100_diff1
value: 57.339862158018796
- type: nauc_mrr_at_100_max
value: 60.18082273539442
- type: nauc_mrr_at_10_diff1
value: 57.210874058908814
- type: nauc_mrr_at_10_max
value: 60.043680803697086
- type: nauc_mrr_at_1_diff1
value: 59.69074056197331
- type: nauc_mrr_at_1_max
value: 60.90082316300324
- type: nauc_mrr_at_20_diff1
value: 57.35434243512763
- type: nauc_mrr_at_20_max
value: 60.18873377253912
- type: nauc_mrr_at_3_diff1
value: 57.26933631425754
- type: nauc_mrr_at_3_max
value: 60.05458089795687
- type: nauc_mrr_at_5_diff1
value: 57.045411517214276
- type: nauc_mrr_at_5_max
value: 59.981421712413685
- type: nauc_ndcg_at_1000_diff1
value: 50.232929738614814
- type: nauc_ndcg_at_1000_max
value: 55.01594185277396
- type: nauc_ndcg_at_100_diff1
value: 49.876825728406786
- type: nauc_ndcg_at_100_max
value: 54.87898182661215
- type: nauc_ndcg_at_10_diff1
value: 48.40787615482867
- type: nauc_ndcg_at_10_max
value: 52.84877289626636
- type: nauc_ndcg_at_1_diff1
value: 59.69074056197331
- type: nauc_ndcg_at_1_max
value: 60.90082316300324
- type: nauc_ndcg_at_20_diff1
value: 49.08453974591539
- type: nauc_ndcg_at_20_max
value: 53.80319392912378
- type: nauc_ndcg_at_3_diff1
value: 48.21830414023458
- type: nauc_ndcg_at_3_max
value: 51.321799626032714
- type: nauc_ndcg_at_5_diff1
value: 47.614495954542605
- type: nauc_ndcg_at_5_max
value: 50.803800463597405
- type: nauc_precision_at_1000_diff1
value: -15.87250509394414
- type: nauc_precision_at_1000_max
value: 16.09830137145176
- type: nauc_precision_at_100_diff1
value: -13.720930651556534
- type: nauc_precision_at_100_max
value: 19.94363871765946
- type: nauc_precision_at_10_diff1
value: -3.9626074014054136
- type: nauc_precision_at_10_max
value: 30.48732389685921
- type: nauc_precision_at_1_diff1
value: 59.69074056197331
- type: nauc_precision_at_1_max
value: 60.90082316300324
- type: nauc_precision_at_20_diff1
value: -8.144148640034853
- type: nauc_precision_at_20_max
value: 26.183545158653338
- type: nauc_precision_at_3_diff1
value: 7.1166818076254605
- type: nauc_precision_at_3_max
value: 37.64665636029093
- type: nauc_precision_at_5_diff1
value: 0.3455996928663316
- type: nauc_precision_at_5_max
value: 34.95245204298077
- type: nauc_recall_at_1000_diff1
value: 47.93171740380228
- type: nauc_recall_at_1000_max
value: 89.21354057542635
- type: nauc_recall_at_100_diff1
value: 34.93973412699365
- type: nauc_recall_at_100_max
value: 47.89216950421148
- type: nauc_recall_at_10_diff1
value: 38.58556368247737
- type: nauc_recall_at_10_max
value: 45.13227163006313
- type: nauc_recall_at_1_diff1
value: 55.14983744702445
- type: nauc_recall_at_1_max
value: 31.104750896985728
- type: nauc_recall_at_20_diff1
value: 38.53568097509877
- type: nauc_recall_at_20_max
value: 46.37328875121808
- type: nauc_recall_at_3_diff1
value: 41.49659886305561
- type: nauc_recall_at_3_max
value: 38.59476562231703
- type: nauc_recall_at_5_diff1
value: 38.489499442628016
- type: nauc_recall_at_5_max
value: 43.06848825600403
- type: ndcg_at_1
value: 64.08500000000001
- type: ndcg_at_10
value: 68.818
- type: ndcg_at_100
value: 73.66
- type: ndcg_at_1000
value: 74.309
- type: ndcg_at_20
value: 71.147
- type: ndcg_at_3
value: 64.183
- type: ndcg_at_5
value: 65.668
- type: precision_at_1
value: 64.08500000000001
- type: precision_at_10
value: 15.728
- type: precision_at_100
value: 1.9720000000000002
- type: precision_at_1000
value: 0.207
- type: precision_at_20
value: 8.705
- type: precision_at_3
value: 39.03
- type: precision_at_5
value: 27.717000000000002
- type: recall_at_1
value: 40.797
- type: recall_at_10
value: 77.432
- type: recall_at_100
value: 95.68100000000001
- type: recall_at_1000
value: 99.666
- type: recall_at_20
value: 84.773
- type: recall_at_3
value: 62.083
- type: recall_at_5
value: 69.786
license: apache-2.0
language:
- fr
- en
---
## Model Description:
[**french-document-embedding**](https://huggingface.co/dangvantuan/french-document-embedding) is an embedding model for documents in the French-English language, with a context length of up to 8096 tokens. This model is a specialized text-embedding model trained specifically for the French-English language. It is built upon [gte-multilingual](Alibaba-NLP/gte-multilingual-base) and trained using the [SimilarityLoss], [Multi-Negative Ranking Loss](https://arxiv.org/abs/1705.00652), [Matryoshka2dLoss](https://arxiv.org/html/2402.14776v1) and [GISTEmbedLoss](https://arxiv.org/abs/2402.16829) using [guide model](https://huggingface.co/Lajavaness/bilingual-embedding-large). This model embeds and converts long texts or documents into vectors with 786 dimensions, making it useful for vector databases serving semantic search or RAG (Retrieval-Augmented Generation).
## Full Model Architecture
```
SentenceTransformer(
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: BilingualModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
(2): Normalize()
)
```
## Usage:
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
```
pip install -U sentence-transformers
```
Then you can use the model like this:
```python
from sentence_transformers import SentenceTransformer
sentences = ["Paris est une capitale de la France", "Paris is a capital of France"]
model = SentenceTransformer('dangvantuan/french-document-embedding', trust_remote_code=True)
embeddings = model.encode(sentences)
print(embeddings)
```
## Evaluation
## Citation
@article{reimers2019sentence,
title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks},
author={Nils Reimers, Iryna Gurevych},
journal={https://arxiv.org/abs/1908.10084},
year={2019}
}
@article{zhang2024mgte,
title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
journal={arXiv preprint arXiv:2407.19669},
year={2024}
}
@article{li2023towards,
title={Towards general text embeddings with multi-stage contrastive learning},
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
journal={arXiv preprint arXiv:2308.03281},
year={2023}
}
@article{li20242d,
title={2d matryoshka sentence embeddings},
author={Li, Xianming and Li, Zongxi and Li, Jing and Xie, Haoran and Li, Qing},
journal={arXiv preprint arXiv:2402.14776},
year={2024}
}
@misc{henderson2017efficient,
title={Efficient Natural Language Response Suggestion for Smart Reply},
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
year={2017},
eprint={1705.00652},
archivePrefix={arXiv},
primaryClass={cs.CL}
}
@misc{solatorio2024gistembed,
title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning},
author={Aivin V. Solatorio},
year={2024},
eprint={2402.16829},
archivePrefix={arXiv},
primaryClass={cs.LG}
}