--- library_name: sentence-transformers pipeline_tag: sentence-similarity tags: - sentence-transformers - feature-extraction - sentence-similarity - transformers - french - english - sentence-embedding - mteb model-index: - name: 7eff199d41ff669fad99d83cad9249c393c3f14b results: - task: type: Clustering dataset: type: lyon-nlp/alloprof name: MTEB AlloProfClusteringP2P config: default split: test revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b metrics: - type: v_measure value: 59.69196295449414 - type: v_measures value: [0.6355772777559684, 0.4980707615440343, 0.5851538838323186, 0.6567709175938427, 0.5712405288636999] - task: type: Clustering dataset: type: lyon-nlp/alloprof name: MTEB AlloProfClusteringS2S config: default split: test revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b metrics: - type: v_measure value: 45.607106996926426 - type: v_measures value: [0.45846869913649535, 0.42657120373128293, 0.45507356125930876, 0.4258913306353704, 0.4779122207000794] - task: type: Reranking dataset: type: lyon-nlp/mteb-fr-reranking-alloprof-s2p name: MTEB AlloprofReranking config: default split: test revision: 65393d0d7a08a10b4e348135e824f385d420b0fd metrics: - type: map value: 73.51836428087765 - type: mrr value: 74.8550285111166 - type: nAUC_map_diff1 value: 56.006169898728466 - type: nAUC_map_max value: 27.886037223407506 - type: nAUC_mrr_diff1 value: 56.68072778248672 - type: nAUC_mrr_max value: 29.362681962243276 - task: type: Retrieval dataset: type: lyon-nlp/alloprof name: MTEB AlloprofRetrieval config: default split: test revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd metrics: - type: map_at_1 value: 32.080999999999996 - type: map_at_10 value: 43.582 - type: map_at_100 value: 44.381 - type: map_at_1000 value: 44.426 - type: map_at_20 value: 44.061 - type: map_at_3 value: 40.602 - type: map_at_5 value: 42.381 - type: mrr_at_1 value: 32.08117443868739 - type: mrr_at_10 value: 43.5823429832498 - type: mrr_at_100 value: 44.38068560877513 - type: mrr_at_1000 value: 44.426194305504026 - type: mrr_at_20 value: 44.06128094655753 - type: mrr_at_3 value: 40.60161197466903 - type: mrr_at_5 value: 42.380541162924715 - type: nauc_map_at_1000_diff1 value: 37.22997629352391 - type: nauc_map_at_1000_max value: 38.65090969900466 - type: nauc_map_at_100_diff1 value: 37.22644507166512 - type: nauc_map_at_100_max value: 38.67447923917633 - type: nauc_map_at_10_diff1 value: 37.02440573022942 - type: nauc_map_at_10_max value: 38.52972171430789 - type: nauc_map_at_1_diff1 value: 41.18101653444774 - type: nauc_map_at_1_max value: 34.87383192583458 - type: nauc_map_at_20_diff1 value: 37.14172285932024 - type: nauc_map_at_20_max value: 38.66753159239803 - type: nauc_map_at_3_diff1 value: 37.53556306862998 - type: nauc_map_at_3_max value: 37.86008195327724 - type: nauc_map_at_5_diff1 value: 37.14904081229067 - type: nauc_map_at_5_max value: 38.267819714061105 - type: nauc_mrr_at_1000_diff1 value: 37.22997629352391 - type: nauc_mrr_at_1000_max value: 38.65090969900466 - type: nauc_mrr_at_100_diff1 value: 37.22644507166512 - type: nauc_mrr_at_100_max value: 38.67447923917633 - type: nauc_mrr_at_10_diff1 value: 37.02440573022942 - type: nauc_mrr_at_10_max value: 38.52972171430789 - type: nauc_mrr_at_1_diff1 value: 41.18101653444774 - type: nauc_mrr_at_1_max value: 34.87383192583458 - type: nauc_mrr_at_20_diff1 value: 37.14172285932024 - type: nauc_mrr_at_20_max value: 38.66753159239803 - type: nauc_mrr_at_3_diff1 value: 37.53556306862998 - type: nauc_mrr_at_3_max value: 37.86008195327724 - type: nauc_mrr_at_5_diff1 value: 37.14904081229067 - type: nauc_mrr_at_5_max value: 38.267819714061105 - type: nauc_ndcg_at_1000_diff1 value: 36.313082263552204 - type: nauc_ndcg_at_1000_max value: 40.244406213773765 - type: nauc_ndcg_at_100_diff1 value: 36.17060946689135 - type: nauc_ndcg_at_100_max value: 41.069278488584416 - type: nauc_ndcg_at_10_diff1 value: 35.2775471480974 - type: nauc_ndcg_at_10_max value: 40.33902753007036 - type: nauc_ndcg_at_1_diff1 value: 41.18101653444774 - type: nauc_ndcg_at_1_max value: 34.87383192583458 - type: nauc_ndcg_at_20_diff1 value: 35.71067272175871 - type: nauc_ndcg_at_20_max value: 40.94374381572908 - type: nauc_ndcg_at_3_diff1 value: 36.45082651868188 - type: nauc_ndcg_at_3_max value: 38.87195110158222 - type: nauc_ndcg_at_5_diff1 value: 35.683568481780505 - type: nauc_ndcg_at_5_max value: 39.606933866599 - type: nauc_precision_at_1000_diff1 value: 15.489726515767439 - type: nauc_precision_at_1000_max value: 75.94259161180715 - type: nauc_precision_at_100_diff1 value: 30.033605095284656 - type: nauc_precision_at_100_max value: 62.40786465750442 - type: nauc_precision_at_10_diff1 value: 28.617170969915 - type: nauc_precision_at_10_max value: 47.35884745487521 - type: nauc_precision_at_1_diff1 value: 41.18101653444774 - type: nauc_precision_at_1_max value: 34.87383192583458 - type: nauc_precision_at_20_diff1 value: 29.730952749557144 - type: nauc_precision_at_20_max value: 52.09696741873719 - type: nauc_precision_at_3_diff1 value: 33.30844921569695 - type: nauc_precision_at_3_max value: 41.84496633792437 - type: nauc_precision_at_5_diff1 value: 31.000246292430838 - type: nauc_precision_at_5_max value: 43.88721507465343 - type: nauc_recall_at_1000_diff1 value: 15.48972651576705 - type: nauc_recall_at_1000_max value: 75.94259161180725 - type: nauc_recall_at_100_diff1 value: 30.033605095284816 - type: nauc_recall_at_100_max value: 62.40786465750426 - type: nauc_recall_at_10_diff1 value: 28.617170969914984 - type: nauc_recall_at_10_max value: 47.35884745487525 - type: nauc_recall_at_1_diff1 value: 41.18101653444774 - type: nauc_recall_at_1_max value: 34.87383192583458 - type: nauc_recall_at_20_diff1 value: 29.730952749557087 - type: nauc_recall_at_20_max value: 52.09696741873715 - type: nauc_recall_at_3_diff1 value: 33.30844921569694 - type: nauc_recall_at_3_max value: 41.84496633792433 - type: nauc_recall_at_5_diff1 value: 31.000246292430838 - type: nauc_recall_at_5_max value: 43.88721507465339 - type: ndcg_at_1 value: 32.080999999999996 - type: ndcg_at_10 value: 49.502 - type: ndcg_at_100 value: 53.52 - type: ndcg_at_1000 value: 54.842 - type: ndcg_at_20 value: 51.219 - type: ndcg_at_3 value: 43.381 - type: ndcg_at_5 value: 46.603 - type: precision_at_1 value: 32.080999999999996 - type: precision_at_10 value: 6.822 - type: precision_at_100 value: 0.873 - type: precision_at_1000 value: 0.098 - type: precision_at_20 value: 3.7479999999999998 - type: precision_at_3 value: 17.142 - type: precision_at_5 value: 11.857 - type: recall_at_1 value: 32.080999999999996 - type: recall_at_10 value: 68.221 - type: recall_at_100 value: 87.349 - type: recall_at_1000 value: 98.014 - type: recall_at_20 value: 74.957 - type: recall_at_3 value: 51.425 - type: recall_at_5 value: 59.282999999999994 - task: type: Classification dataset: type: mteb/amazon_reviews_multi name: MTEB AmazonReviewsClassification (fr) config: fr split: test revision: 1399c76144fd37290681b995c656ef9b2e06e26d metrics: - type: accuracy value: 39.892 - type: f1 value: 38.38126304364462 - type: f1_weighted value: 38.38126304364462 - task: type: Retrieval dataset: type: maastrichtlawtech/bsard name: MTEB BSARDRetrieval config: default split: test revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59 metrics: - type: map_at_1 value: 10.811 - type: map_at_10 value: 16.414 - type: map_at_100 value: 17.647 - type: map_at_1000 value: 17.742 - type: map_at_20 value: 17.22 - type: map_at_3 value: 14.188999999999998 - type: map_at_5 value: 15.113 - type: mrr_at_1 value: 10.81081081081081 - type: mrr_at_10 value: 16.41427141427142 - type: mrr_at_100 value: 17.647339314041712 - type: mrr_at_1000 value: 17.74213263983212 - type: mrr_at_20 value: 17.219989884463573 - type: mrr_at_3 value: 14.18918918918919 - type: mrr_at_5 value: 15.112612612612612 - type: nauc_map_at_1000_diff1 value: 13.07108195916555 - type: nauc_map_at_1000_max value: 14.000521014179807 - type: nauc_map_at_100_diff1 value: 13.087117094079332 - type: nauc_map_at_100_max value: 13.99712558752583 - type: nauc_map_at_10_diff1 value: 13.452029501381165 - type: nauc_map_at_10_max value: 13.3341655571542 - type: nauc_map_at_1_diff1 value: 14.990419981155167 - type: nauc_map_at_1_max value: 8.812519082504037 - type: nauc_map_at_20_diff1 value: 12.80321357992737 - type: nauc_map_at_20_max value: 14.020962859032371 - type: nauc_map_at_3_diff1 value: 14.84230805712973 - type: nauc_map_at_3_max value: 11.644032755353722 - type: nauc_map_at_5_diff1 value: 15.100168959732835 - type: nauc_map_at_5_max value: 13.634801099074355 - type: nauc_mrr_at_1000_diff1 value: 13.07108195916555 - type: nauc_mrr_at_1000_max value: 14.000521014179807 - type: nauc_mrr_at_100_diff1 value: 13.087117094079332 - type: nauc_mrr_at_100_max value: 13.99712558752583 - type: nauc_mrr_at_10_diff1 value: 13.452029501381165 - type: nauc_mrr_at_10_max value: 13.3341655571542 - type: nauc_mrr_at_1_diff1 value: 14.990419981155167 - type: nauc_mrr_at_1_max value: 8.812519082504037 - type: nauc_mrr_at_20_diff1 value: 12.80321357992737 - type: nauc_mrr_at_20_max value: 14.020962859032371 - type: nauc_mrr_at_3_diff1 value: 14.84230805712973 - type: nauc_mrr_at_3_max value: 11.644032755353722 - type: nauc_mrr_at_5_diff1 value: 15.100168959732835 - type: nauc_mrr_at_5_max value: 13.634801099074355 - type: nauc_ndcg_at_1000_diff1 value: 11.335350893370972 - type: nauc_ndcg_at_1000_max value: 16.09665875369169 - type: nauc_ndcg_at_100_diff1 value: 11.499643600969176 - type: nauc_ndcg_at_100_max value: 15.967105414704186 - type: nauc_ndcg_at_10_diff1 value: 12.093263549786606 - type: nauc_ndcg_at_10_max value: 14.605821897766461 - type: nauc_ndcg_at_1_diff1 value: 14.990419981155167 - type: nauc_ndcg_at_1_max value: 8.812519082504037 - type: nauc_ndcg_at_20_diff1 value: 10.197380043193812 - type: nauc_ndcg_at_20_max value: 16.332533239525365 - type: nauc_ndcg_at_3_diff1 value: 14.835825175950765 - type: nauc_ndcg_at_3_max value: 11.898757954417214 - type: nauc_ndcg_at_5_diff1 value: 15.278603386081823 - type: nauc_ndcg_at_5_max value: 15.007133861218167 - type: nauc_precision_at_1000_diff1 value: 2.7469897420865195 - type: nauc_precision_at_1000_max value: 26.874535278616346 - type: nauc_precision_at_100_diff1 value: 7.600735526139776 - type: nauc_precision_at_100_max value: 20.7203382946415 - type: nauc_precision_at_10_diff1 value: 8.938642089366768 - type: nauc_precision_at_10_max value: 17.320961743140874 - type: nauc_precision_at_1_diff1 value: 14.990419981155167 - type: nauc_precision_at_1_max value: 8.812519082504037 - type: nauc_precision_at_20_diff1 value: 3.733877816322278 - type: nauc_precision_at_20_max value: 21.581173305923002 - type: nauc_precision_at_3_diff1 value: 14.828850401790316 - type: nauc_precision_at_3_max value: 12.369943286612463 - type: nauc_precision_at_5_diff1 value: 15.728617939150672 - type: nauc_precision_at_5_max value: 18.103783411900697 - type: nauc_recall_at_1000_diff1 value: 2.746989742086615 - type: nauc_recall_at_1000_max value: 26.874535278616367 - type: nauc_recall_at_100_diff1 value: 7.600735526139775 - type: nauc_recall_at_100_max value: 20.720338294641536 - type: nauc_recall_at_10_diff1 value: 8.93864208936673 - type: nauc_recall_at_10_max value: 17.32096174314083 - type: nauc_recall_at_1_diff1 value: 14.990419981155167 - type: nauc_recall_at_1_max value: 8.812519082504037 - type: nauc_recall_at_20_diff1 value: 3.733877816322231 - type: nauc_recall_at_20_max value: 21.58117330592295 - type: nauc_recall_at_3_diff1 value: 14.828850401790339 - type: nauc_recall_at_3_max value: 12.369943286612509 - type: nauc_recall_at_5_diff1 value: 15.72861793915063 - type: nauc_recall_at_5_max value: 18.103783411900658 - type: ndcg_at_1 value: 10.811 - type: ndcg_at_10 value: 20.244 - type: ndcg_at_100 value: 26.526 - type: ndcg_at_1000 value: 29.217 - type: ndcg_at_20 value: 23.122 - type: ndcg_at_3 value: 15.396 - type: ndcg_at_5 value: 17.063 - type: precision_at_1 value: 10.811 - type: precision_at_10 value: 3.288 - type: precision_at_100 value: 0.631 - type: precision_at_1000 value: 0.08499999999999999 - type: precision_at_20 value: 2.207 - type: precision_at_3 value: 6.306000000000001 - type: precision_at_5 value: 4.595 - type: recall_at_1 value: 10.811 - type: recall_at_10 value: 32.883 - type: recall_at_100 value: 63.063 - type: recall_at_1000 value: 84.685 - type: recall_at_20 value: 44.144 - type: recall_at_3 value: 18.919 - type: recall_at_5 value: 22.973 - task: type: Clustering dataset: type: lyon-nlp/clustering-hal-s2s name: MTEB HALClusteringS2S config: default split: test revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915 metrics: - type: v_measure value: 25.209561281028435 - type: v_measures value: [0.28558356565178666, 0.2707322246129254, 0.2683693125038299, 0.2703937853835602, 0.22057190525667872] - task: type: Clustering dataset: type: reciTAL/mlsum name: MTEB MLSUMClusteringP2P config: default split: test revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 metrics: - type: v_measure value: 42.82528809996964 - type: v_measures value: [0.43465029372260205, 0.42821098223656917, 0.43537879149583325, 0.4289578694928627, 0.3794307754465835] - task: type: Clustering dataset: type: reciTAL/mlsum name: MTEB MLSUMClusteringS2S config: default split: test revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 metrics: - type: v_measure value: 43.44172295073941 - type: v_measures value: [0.4294163918345751, 0.46229994906725164, 0.44188446196569603, 0.43839320352264155, 0.3866853445120933] - task: type: Classification dataset: type: mteb/mtop_domain name: MTEB MTOPDomainClassification (fr) config: fr split: test revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf metrics: - type: accuracy value: 88.33072345756342 - type: f1 value: 88.11780476022122 - type: f1_weighted value: 88.28188145087299 - task: type: Classification dataset: type: mteb/mtop_intent name: MTEB MTOPIntentClassification (fr) config: fr split: test revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba metrics: - type: accuracy value: 57.854682117131226 - type: f1 value: 41.121569078191996 - type: f1_weighted value: 60.04845437480532 - task: type: Classification dataset: type: mteb/masakhanews name: MTEB MasakhaNEWSClassification (fra) config: fra split: test revision: 18193f187b92da67168c655c9973a165ed9593dd metrics: - type: accuracy value: 76.87203791469194 - type: f1 value: 72.94847557303437 - type: f1_weighted value: 76.9128173959562 - task: type: Clustering dataset: type: masakhane/masakhanews name: MTEB MasakhaNEWSClusteringP2P (fra) config: fra split: test revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 metrics: - type: v_measure value: 61.32006896333715 - type: v_measures value: [1.0, 0.6446188396257355, 0.28995363026757603, 0.40898735994696084, 0.7224436183265853] - task: type: Clustering dataset: type: masakhane/masakhanews name: MTEB MasakhaNEWSClusteringS2S (fra) config: fra split: test revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 metrics: - type: v_measure value: 60.509887123660256 - type: v_measures value: [1.0, 0.022472587992562534, 0.4686320087689936, 0.811946141094871, 0.7224436183265853] - task: type: Classification dataset: type: mteb/amazon_massive_intent name: MTEB MassiveIntentClassification (fr) config: fr split: test revision: 4672e20407010da34463acc759c162ca9734bca6 metrics: - type: accuracy value: 64.14256893073302 - type: f1 value: 61.33068109342782 - type: f1_weighted value: 62.74292948992287 - task: type: Classification dataset: type: mteb/amazon_massive_scenario name: MTEB MassiveScenarioClassification (fr) config: fr split: test revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8 metrics: - type: accuracy value: 70.68930733019502 - type: f1 value: 70.26641874846638 - type: f1_weighted value: 70.35250466465047 - task: type: Retrieval dataset: type: jinaai/mintakaqa name: MTEB MintakaRetrieval (fr) config: fr split: test revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e metrics: - type: map_at_1 value: 19.165 - type: map_at_10 value: 28.663 - type: map_at_100 value: 29.737000000000002 - type: map_at_1000 value: 29.826000000000004 - type: map_at_20 value: 29.266 - type: map_at_3 value: 26.024 - type: map_at_5 value: 27.486 - type: mrr_at_1 value: 19.164619164619165 - type: mrr_at_10 value: 28.66298116298116 - type: mrr_at_100 value: 29.737423308510476 - type: mrr_at_1000 value: 29.825744096186796 - type: mrr_at_20 value: 29.26593905045215 - type: mrr_at_3 value: 26.023751023751025 - type: mrr_at_5 value: 27.48566748566751 - type: nauc_map_at_1000_diff1 value: 23.682512151202967 - type: nauc_map_at_1000_max value: 25.78708364723919 - type: nauc_map_at_100_diff1 value: 23.647360144907324 - type: nauc_map_at_100_max value: 25.812420160707074 - type: nauc_map_at_10_diff1 value: 23.658224717435765 - type: nauc_map_at_10_max value: 25.845198626323217 - type: nauc_map_at_1_diff1 value: 30.56830621718086 - type: nauc_map_at_1_max value: 19.931526248650147 - type: nauc_map_at_20_diff1 value: 23.69662048930091 - type: nauc_map_at_20_max value: 25.936653022318403 - type: nauc_map_at_3_diff1 value: 24.663221072349817 - type: nauc_map_at_3_max value: 24.634011858800275 - type: nauc_map_at_5_diff1 value: 24.3650772668551 - type: nauc_map_at_5_max value: 25.75222318469224 - type: nauc_mrr_at_1000_diff1 value: 23.682512151202967 - type: nauc_mrr_at_1000_max value: 25.78708364723919 - type: nauc_mrr_at_100_diff1 value: 23.647360144907324 - type: nauc_mrr_at_100_max value: 25.812420160707074 - type: nauc_mrr_at_10_diff1 value: 23.658224717435765 - type: nauc_mrr_at_10_max value: 25.845198626323217 - type: nauc_mrr_at_1_diff1 value: 30.56830621718086 - type: nauc_mrr_at_1_max value: 19.931526248650147 - type: nauc_mrr_at_20_diff1 value: 23.69662048930091 - type: nauc_mrr_at_20_max value: 25.936653022318403 - type: nauc_mrr_at_3_diff1 value: 24.663221072349817 - type: nauc_mrr_at_3_max value: 24.634011858800275 - type: nauc_mrr_at_5_diff1 value: 24.3650772668551 - type: nauc_mrr_at_5_max value: 25.75222318469224 - type: nauc_ndcg_at_1000_diff1 value: 21.68690756038845 - type: nauc_ndcg_at_1000_max value: 27.168575101114893 - type: nauc_ndcg_at_100_diff1 value: 20.484812648526646 - type: nauc_ndcg_at_100_max value: 27.79987215383081 - type: nauc_ndcg_at_10_diff1 value: 20.791330920997765 - type: nauc_ndcg_at_10_max value: 28.272774035036935 - type: nauc_ndcg_at_1_diff1 value: 30.56830621718086 - type: nauc_ndcg_at_1_max value: 19.931526248650147 - type: nauc_ndcg_at_20_diff1 value: 20.88342749790573 - type: nauc_ndcg_at_20_max value: 28.627184419546825 - type: nauc_ndcg_at_3_diff1 value: 22.987235018840494 - type: nauc_ndcg_at_3_max value: 26.054144215976482 - type: nauc_ndcg_at_5_diff1 value: 22.497863289090464 - type: nauc_ndcg_at_5_max value: 27.98879570850259 - type: nauc_precision_at_1000_diff1 value: -0.6707404502167996 - type: nauc_precision_at_1000_max value: 31.987217077673346 - type: nauc_precision_at_100_diff1 value: 5.079765403021014 - type: nauc_precision_at_100_max value: 34.857053312543194 - type: nauc_precision_at_10_diff1 value: 12.628771618059472 - type: nauc_precision_at_10_max value: 35.009564954169896 - type: nauc_precision_at_1_diff1 value: 30.56830621718086 - type: nauc_precision_at_1_max value: 19.931526248650147 - type: nauc_precision_at_20_diff1 value: 12.28251326261041 - type: nauc_precision_at_20_max value: 36.942629359432075 - type: nauc_precision_at_3_diff1 value: 18.663775283519335 - type: nauc_precision_at_3_max value: 29.741315837492472 - type: nauc_precision_at_5_diff1 value: 17.70442691217025 - type: nauc_precision_at_5_max value: 33.93438470540527 - type: nauc_recall_at_1000_diff1 value: -0.6707404502171719 - type: nauc_recall_at_1000_max value: 31.987217077672607 - type: nauc_recall_at_100_diff1 value: 5.079765403021056 - type: nauc_recall_at_100_max value: 34.85705331254323 - type: nauc_recall_at_10_diff1 value: 12.628771618059483 - type: nauc_recall_at_10_max value: 35.00956495416992 - type: nauc_recall_at_1_diff1 value: 30.56830621718086 - type: nauc_recall_at_1_max value: 19.931526248650147 - type: nauc_recall_at_20_diff1 value: 12.282513262610411 - type: nauc_recall_at_20_max value: 36.94262935943207 - type: nauc_recall_at_3_diff1 value: 18.663775283519346 - type: nauc_recall_at_3_max value: 29.741315837492465 - type: nauc_recall_at_5_diff1 value: 17.704426912170252 - type: nauc_recall_at_5_max value: 33.934384705405286 - type: ndcg_at_1 value: 19.165 - type: ndcg_at_10 value: 33.674 - type: ndcg_at_100 value: 39.297 - type: ndcg_at_1000 value: 41.896 - type: ndcg_at_20 value: 35.842 - type: ndcg_at_3 value: 28.238999999999997 - type: ndcg_at_5 value: 30.863000000000003 - type: precision_at_1 value: 19.165 - type: precision_at_10 value: 4.9590000000000005 - type: precision_at_100 value: 0.768 - type: precision_at_1000 value: 0.098 - type: precision_at_20 value: 2.905 - type: precision_at_3 value: 11.548 - type: precision_at_5 value: 8.198 - type: recall_at_1 value: 19.165 - type: recall_at_10 value: 49.59 - type: recall_at_100 value: 76.822 - type: recall_at_1000 value: 97.83 - type: recall_at_20 value: 58.108000000000004 - type: recall_at_3 value: 34.644000000000005 - type: recall_at_5 value: 40.991 - task: type: PairClassification dataset: type: GEM/opusparcus name: MTEB OpusparcusPC (fr) config: fr split: test revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a metrics: - type: cos_sim_accuracy value: 83.51498637602179 - type: cos_sim_ap value: 94.18614574224773 - type: cos_sim_f1 value: 88.3564925730714 - type: cos_sim_precision value: 85.37037037037037 - type: cos_sim_recall value: 91.55908639523337 - type: dot_accuracy value: 83.51498637602179 - type: dot_ap value: 94.18614574224773 - type: dot_f1 value: 88.3564925730714 - type: dot_precision value: 85.37037037037037 - type: dot_recall value: 91.55908639523337 - type: euclidean_accuracy value: 83.51498637602179 - type: euclidean_ap value: 94.18614574224773 - type: euclidean_f1 value: 88.3564925730714 - type: euclidean_precision value: 85.37037037037037 - type: euclidean_recall value: 91.55908639523337 - type: manhattan_accuracy value: 83.51498637602179 - type: manhattan_ap value: 94.16717671332795 - type: manhattan_f1 value: 88.35418671799807 - type: manhattan_precision value: 85.71428571428571 - type: manhattan_recall value: 91.16186693147964 - type: max_accuracy value: 83.51498637602179 - type: max_ap value: 94.18614574224773 - type: max_f1 value: 88.3564925730714 - task: type: PairClassification dataset: type: google-research-datasets/paws-x name: MTEB PawsX (fr) config: fr split: test revision: 8a04d940a42cd40658986fdd8e3da561533a3646 metrics: - type: cos_sim_accuracy value: 60.699999999999996 - type: cos_sim_ap value: 60.20276173325004 - type: cos_sim_f1 value: 62.716429395921516 - type: cos_sim_precision value: 48.05424528301887 - type: cos_sim_recall value: 90.2547065337763 - type: dot_accuracy value: 60.699999999999996 - type: dot_ap value: 60.27996470746299 - type: dot_f1 value: 62.716429395921516 - type: dot_precision value: 48.05424528301887 - type: dot_recall value: 90.2547065337763 - type: euclidean_accuracy value: 60.699999999999996 - type: euclidean_ap value: 60.20276173325004 - type: euclidean_f1 value: 62.716429395921516 - type: euclidean_precision value: 48.05424528301887 - type: euclidean_recall value: 90.2547065337763 - type: manhattan_accuracy value: 60.699999999999996 - type: manhattan_ap value: 60.18010040913353 - type: manhattan_f1 value: 62.71056661562021 - type: manhattan_precision value: 47.92276184903452 - type: manhattan_recall value: 90.69767441860465 - type: max_accuracy value: 60.699999999999996 - type: max_ap value: 60.27996470746299 - type: max_f1 value: 62.716429395921516 - task: type: STS dataset: type: Lajavaness/SICK-fr name: MTEB SICKFr config: default split: test revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a metrics: - type: cos_sim_pearson value: 84.24496945719946 - type: cos_sim_spearman value: 78.10001513346513 - type: euclidean_pearson value: 81.43570951228163 - type: euclidean_spearman value: 78.0987784421045 - type: manhattan_pearson value: 81.31986646517238 - type: manhattan_spearman value: 78.09610194828534 - task: type: STS dataset: type: mteb/sts22-crosslingual-sts name: MTEB STS22 (fr) config: fr split: test revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3 metrics: - type: cos_sim_pearson value: 83.07721141521425 - type: cos_sim_spearman value: 83.19199466052186 - type: euclidean_pearson value: 82.10672022294766 - type: euclidean_spearman value: 83.19199466052186 - type: manhattan_pearson value: 81.92531847793633 - type: manhattan_spearman value: 83.20694689089673 - task: type: STS dataset: type: mteb/stsb_multi_mt name: MTEB STSBenchmarkMultilingualSTS (fr) config: fr split: test revision: 29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c metrics: - type: cos_sim_pearson value: 83.957481748094 - type: cos_sim_spearman value: 84.40492503459248 - type: euclidean_pearson value: 83.8150014101056 - type: euclidean_spearman value: 84.40686653864509 - type: manhattan_pearson value: 83.6816837321264 - type: manhattan_spearman value: 84.2678486368702 - task: type: Summarization dataset: type: lyon-nlp/summarization-summeval-fr-p2p name: MTEB SummEvalFr config: default split: test revision: b385812de6a9577b6f4d0f88c6a6e35395a94054 metrics: - type: cos_sim_pearson value: 32.06592630917136 - type: cos_sim_spearman value: 30.94878864229808 - type: dot_pearson value: 32.06591974515864 - type: dot_spearman value: 30.925383080565222 - task: type: Reranking dataset: type: lyon-nlp/mteb-fr-reranking-syntec-s2p name: MTEB SyntecReranking config: default split: test revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad metrics: - type: map value: 88.11666666666667 - type: mrr value: 88.11666666666667 - type: nAUC_map_diff1 value: 66.27779227667267 - type: nAUC_map_max value: 6.651414764738896 - type: nAUC_mrr_diff1 value: 66.27779227667267 - type: nAUC_mrr_max value: 6.651414764738896 - task: type: Retrieval dataset: type: lyon-nlp/mteb-fr-retrieval-syntec-s2p name: MTEB SyntecRetrieval config: default split: test revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9 metrics: - type: map_at_1 value: 69.0 - type: map_at_10 value: 80.65 - type: map_at_100 value: 80.838 - type: map_at_1000 value: 80.838 - type: map_at_20 value: 80.838 - type: map_at_3 value: 79.833 - type: map_at_5 value: 80.483 - type: mrr_at_1 value: 69.0 - type: mrr_at_10 value: 80.64999999999999 - type: mrr_at_100 value: 80.83799019607844 - type: mrr_at_1000 value: 80.83799019607844 - type: mrr_at_20 value: 80.83799019607844 - type: mrr_at_3 value: 79.83333333333334 - type: mrr_at_5 value: 80.48333333333333 - type: nauc_map_at_1000_diff1 value: 61.46904865740055 - type: nauc_map_at_1000_max value: 24.307826758747282 - type: nauc_map_at_100_diff1 value: 61.46904865740055 - type: nauc_map_at_100_max value: 24.307826758747282 - type: nauc_map_at_10_diff1 value: 61.094194035098035 - type: nauc_map_at_10_max value: 24.44687875369869 - type: nauc_map_at_1_diff1 value: 65.17628798701865 - type: nauc_map_at_1_max value: 25.79501560929155 - type: nauc_map_at_20_diff1 value: 61.46904865740055 - type: nauc_map_at_20_max value: 24.307826758747282 - type: nauc_map_at_3_diff1 value: 61.562719756100805 - type: nauc_map_at_3_max value: 25.87804164282553 - type: nauc_map_at_5_diff1 value: 61.471976470716264 - type: nauc_map_at_5_max value: 25.180513270581322 - type: nauc_mrr_at_1000_diff1 value: 61.46904865740055 - type: nauc_mrr_at_1000_max value: 24.307826758747282 - type: nauc_mrr_at_100_diff1 value: 61.46904865740055 - type: nauc_mrr_at_100_max value: 24.307826758747282 - type: nauc_mrr_at_10_diff1 value: 61.094194035098035 - type: nauc_mrr_at_10_max value: 24.44687875369869 - type: nauc_mrr_at_1_diff1 value: 65.17628798701865 - type: nauc_mrr_at_1_max value: 25.79501560929155 - type: nauc_mrr_at_20_diff1 value: 61.46904865740055 - type: nauc_mrr_at_20_max value: 24.307826758747282 - type: nauc_mrr_at_3_diff1 value: 61.562719756100805 - type: nauc_mrr_at_3_max value: 25.87804164282553 - type: nauc_mrr_at_5_diff1 value: 61.471976470716264 - type: nauc_mrr_at_5_max value: 25.180513270581322 - type: nauc_ndcg_at_1000_diff1 value: 60.95477865546023 - type: nauc_ndcg_at_1000_max value: 24.427553593893535 - type: nauc_ndcg_at_100_diff1 value: 60.95477865546023 - type: nauc_ndcg_at_100_max value: 24.427553593893535 - type: nauc_ndcg_at_10_diff1 value: 59.101673931307396 - type: nauc_ndcg_at_10_max value: 25.01155211084955 - type: nauc_ndcg_at_1_diff1 value: 65.17628798701865 - type: nauc_ndcg_at_1_max value: 25.79501560929155 - type: nauc_ndcg_at_20_diff1 value: 60.95477865546023 - type: nauc_ndcg_at_20_max value: 24.427553593893535 - type: nauc_ndcg_at_3_diff1 value: 60.333057480044616 - type: nauc_ndcg_at_3_max value: 28.363238330232637 - type: nauc_ndcg_at_5_diff1 value: 60.15511994533307 - type: nauc_ndcg_at_5_max value: 26.94308058940176 - type: nauc_precision_at_1000_diff1 value: nan - type: nauc_precision_at_1000_max value: nan - type: nauc_precision_at_100_diff1 value: nan - type: nauc_precision_at_100_max value: nan - type: nauc_precision_at_10_diff1 value: 26.657329598506518 - type: nauc_precision_at_10_max value: 34.26704014939361 - type: nauc_precision_at_1_diff1 value: 65.17628798701865 - type: nauc_precision_at_1_max value: 25.79501560929155 - type: nauc_precision_at_20_diff1 value: 100.0 - type: nauc_precision_at_20_max value: 100.0 - type: nauc_precision_at_3_diff1 value: 51.834066960117276 - type: nauc_precision_at_3_max value: 48.25930372148875 - type: nauc_precision_at_5_diff1 value: 44.992997198879706 - type: nauc_precision_at_5_max value: 50.70028011204499 - type: nauc_recall_at_1000_diff1 value: nan - type: nauc_recall_at_1000_max value: nan - type: nauc_recall_at_100_diff1 value: nan - type: nauc_recall_at_100_max value: nan - type: nauc_recall_at_10_diff1 value: 26.657329598505903 - type: nauc_recall_at_10_max value: 34.26704014939303 - type: nauc_recall_at_1_diff1 value: 65.17628798701865 - type: nauc_recall_at_1_max value: 25.79501560929155 - type: nauc_recall_at_20_diff1 value: nan - type: nauc_recall_at_20_max value: nan - type: nauc_recall_at_3_diff1 value: 51.834066960117376 - type: nauc_recall_at_3_max value: 48.25930372148865 - type: nauc_recall_at_5_diff1 value: 44.99299719887955 - type: nauc_recall_at_5_max value: 50.70028011204488 - type: ndcg_at_1 value: 69.0 - type: ndcg_at_10 value: 84.786 - type: ndcg_at_100 value: 85.521 - type: ndcg_at_1000 value: 85.521 - type: ndcg_at_20 value: 85.521 - type: ndcg_at_3 value: 83.226 - type: ndcg_at_5 value: 84.43 - type: precision_at_1 value: 69.0 - type: precision_at_10 value: 9.700000000000001 - type: precision_at_100 value: 1.0 - type: precision_at_1000 value: 0.1 - type: precision_at_20 value: 5.0 - type: precision_at_3 value: 31.0 - type: precision_at_5 value: 19.2 - type: recall_at_1 value: 69.0 - type: recall_at_10 value: 97.0 - type: recall_at_100 value: 100.0 - type: recall_at_1000 value: 100.0 - type: recall_at_20 value: 100.0 - type: recall_at_3 value: 93.0 - type: recall_at_5 value: 96.0 - task: type: Retrieval dataset: type: jinaai/xpqa name: MTEB XPQARetrieval (fr) config: fr split: test revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f metrics: - type: map_at_1 value: 40.797 - type: map_at_10 value: 62.71099999999999 - type: map_at_100 value: 64.261 - type: map_at_1000 value: 64.306 - type: map_at_20 value: 63.693 - type: map_at_3 value: 56.686 - type: map_at_5 value: 60.653999999999996 - type: mrr_at_1 value: 64.08544726301736 - type: mrr_at_10 value: 71.24790726259349 - type: mrr_at_100 value: 71.7835679704396 - type: mrr_at_1000 value: 71.79095567140973 - type: mrr_at_20 value: 71.5854708410262 - type: mrr_at_3 value: 69.55941255006672 - type: mrr_at_5 value: 70.60747663551396 - type: nauc_map_at_1000_diff1 value: 47.803181417639365 - type: nauc_map_at_1000_max value: 51.22073368230412 - type: nauc_map_at_100_diff1 value: 47.771573391555755 - type: nauc_map_at_100_max value: 51.20370234778812 - type: nauc_map_at_10_diff1 value: 47.340833389771625 - type: nauc_map_at_10_max value: 50.41256517180715 - type: nauc_map_at_1_diff1 value: 55.14983744702445 - type: nauc_map_at_1_max value: 31.104750896985728 - type: nauc_map_at_20_diff1 value: 47.64026863999484 - type: nauc_map_at_20_max value: 50.87670909266768 - type: nauc_map_at_3_diff1 value: 47.681906747352635 - type: nauc_map_at_3_max value: 43.47246277661219 - type: nauc_map_at_5_diff1 value: 46.874943002794815 - type: nauc_map_at_5_max value: 48.469495140739724 - type: nauc_mrr_at_1000_diff1 value: 57.34098736669957 - type: nauc_mrr_at_1000_max value: 60.179095583193444 - type: nauc_mrr_at_100_diff1 value: 57.339862158018796 - type: nauc_mrr_at_100_max value: 60.18082273539442 - type: nauc_mrr_at_10_diff1 value: 57.210874058908814 - type: nauc_mrr_at_10_max value: 60.043680803697086 - type: nauc_mrr_at_1_diff1 value: 59.69074056197331 - type: nauc_mrr_at_1_max value: 60.90082316300324 - type: nauc_mrr_at_20_diff1 value: 57.35434243512763 - type: nauc_mrr_at_20_max value: 60.18873377253912 - type: nauc_mrr_at_3_diff1 value: 57.26933631425754 - type: nauc_mrr_at_3_max value: 60.05458089795687 - type: nauc_mrr_at_5_diff1 value: 57.045411517214276 - type: nauc_mrr_at_5_max value: 59.981421712413685 - type: nauc_ndcg_at_1000_diff1 value: 50.232929738614814 - type: nauc_ndcg_at_1000_max value: 55.01594185277396 - type: nauc_ndcg_at_100_diff1 value: 49.876825728406786 - type: nauc_ndcg_at_100_max value: 54.87898182661215 - type: nauc_ndcg_at_10_diff1 value: 48.40787615482867 - type: nauc_ndcg_at_10_max value: 52.84877289626636 - type: nauc_ndcg_at_1_diff1 value: 59.69074056197331 - type: nauc_ndcg_at_1_max value: 60.90082316300324 - type: nauc_ndcg_at_20_diff1 value: 49.08453974591539 - type: nauc_ndcg_at_20_max value: 53.80319392912378 - type: nauc_ndcg_at_3_diff1 value: 48.21830414023458 - type: nauc_ndcg_at_3_max value: 51.321799626032714 - type: nauc_ndcg_at_5_diff1 value: 47.614495954542605 - type: nauc_ndcg_at_5_max value: 50.803800463597405 - type: nauc_precision_at_1000_diff1 value: -15.87250509394414 - type: nauc_precision_at_1000_max value: 16.09830137145176 - type: nauc_precision_at_100_diff1 value: -13.720930651556534 - type: nauc_precision_at_100_max value: 19.94363871765946 - type: nauc_precision_at_10_diff1 value: -3.9626074014054136 - type: nauc_precision_at_10_max value: 30.48732389685921 - type: nauc_precision_at_1_diff1 value: 59.69074056197331 - type: nauc_precision_at_1_max value: 60.90082316300324 - type: nauc_precision_at_20_diff1 value: -8.144148640034853 - type: nauc_precision_at_20_max value: 26.183545158653338 - type: nauc_precision_at_3_diff1 value: 7.1166818076254605 - type: nauc_precision_at_3_max value: 37.64665636029093 - type: nauc_precision_at_5_diff1 value: 0.3455996928663316 - type: nauc_precision_at_5_max value: 34.95245204298077 - type: nauc_recall_at_1000_diff1 value: 47.93171740380228 - type: nauc_recall_at_1000_max value: 89.21354057542635 - type: nauc_recall_at_100_diff1 value: 34.93973412699365 - type: nauc_recall_at_100_max value: 47.89216950421148 - type: nauc_recall_at_10_diff1 value: 38.58556368247737 - type: nauc_recall_at_10_max value: 45.13227163006313 - type: nauc_recall_at_1_diff1 value: 55.14983744702445 - type: nauc_recall_at_1_max value: 31.104750896985728 - type: nauc_recall_at_20_diff1 value: 38.53568097509877 - type: nauc_recall_at_20_max value: 46.37328875121808 - type: nauc_recall_at_3_diff1 value: 41.49659886305561 - type: nauc_recall_at_3_max value: 38.59476562231703 - type: nauc_recall_at_5_diff1 value: 38.489499442628016 - type: nauc_recall_at_5_max value: 43.06848825600403 - type: ndcg_at_1 value: 64.08500000000001 - type: ndcg_at_10 value: 68.818 - type: ndcg_at_100 value: 73.66 - type: ndcg_at_1000 value: 74.309 - type: ndcg_at_20 value: 71.147 - type: ndcg_at_3 value: 64.183 - type: ndcg_at_5 value: 65.668 - type: precision_at_1 value: 64.08500000000001 - type: precision_at_10 value: 15.728 - type: precision_at_100 value: 1.9720000000000002 - type: precision_at_1000 value: 0.207 - type: precision_at_20 value: 8.705 - type: precision_at_3 value: 39.03 - type: precision_at_5 value: 27.717000000000002 - type: recall_at_1 value: 40.797 - type: recall_at_10 value: 77.432 - type: recall_at_100 value: 95.68100000000001 - type: recall_at_1000 value: 99.666 - type: recall_at_20 value: 84.773 - type: recall_at_3 value: 62.083 - type: recall_at_5 value: 69.786 license: apache-2.0 language: - fr - en --- ## Model Description: [**french-document-embedding**](https://huggingface.co/dangvantuan/french-document-embedding) is an embedding model for documents in the French-English language, with a context length of up to 8096 tokens. This model is a specialized text-embedding model trained specifically for the French-English language. It is built upon [gte-multilingual](Alibaba-NLP/gte-multilingual-base) and trained using the [SimilarityLoss], [Multi-Negative Ranking Loss](https://arxiv.org/abs/1705.00652), [Matryoshka2dLoss](https://arxiv.org/html/2402.14776v1) and [GISTEmbedLoss](https://arxiv.org/abs/2402.16829) using [guide model](https://huggingface.co/Lajavaness/bilingual-embedding-large). This model embeds and converts long texts or documents into vectors with 786 dimensions, making it useful for vector databases serving semantic search or RAG (Retrieval-Augmented Generation). ## Full Model Architecture ``` SentenceTransformer( (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: BilingualModel (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) (2): Normalize() ) ``` ## Usage: Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed: ``` pip install -U sentence-transformers ``` Then you can use the model like this: ```python from sentence_transformers import SentenceTransformer sentences = ["Paris est une capitale de la France", "Paris is a capital of France"] model = SentenceTransformer('dangvantuan/french-document-embedding', trust_remote_code=True) embeddings = model.encode(sentences) print(embeddings) ``` ## Evaluation ## Citation @article{reimers2019sentence, title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}, author={Nils Reimers, Iryna Gurevych}, journal={https://arxiv.org/abs/1908.10084}, year={2019} } @article{zhang2024mgte, title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval}, author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others}, journal={arXiv preprint arXiv:2407.19669}, year={2024} } @article{li2023towards, title={Towards general text embeddings with multi-stage contrastive learning}, author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan}, journal={arXiv preprint arXiv:2308.03281}, year={2023} } @article{li20242d, title={2d matryoshka sentence embeddings}, author={Li, Xianming and Li, Zongxi and Li, Jing and Xie, Haoran and Li, Qing}, journal={arXiv preprint arXiv:2402.14776}, year={2024} } @misc{henderson2017efficient, title={Efficient Natural Language Response Suggestion for Smart Reply}, author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, year={2017}, eprint={1705.00652}, archivePrefix={arXiv}, primaryClass={cs.CL} } @misc{solatorio2024gistembed, title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning}, author={Aivin V. Solatorio}, year={2024}, eprint={2402.16829}, archivePrefix={arXiv}, primaryClass={cs.LG} }