|
--- |
|
library_name: sentence-transformers |
|
pipeline_tag: sentence-similarity |
|
tags: |
|
- sentence-transformers |
|
- feature-extraction |
|
- sentence-similarity |
|
- transformers |
|
- french |
|
- english |
|
- sentence-embedding |
|
- mteb |
|
model-index: |
|
- name: 7eff199d41ff669fad99d83cad9249c393c3f14b |
|
results: |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringP2P |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 59.69196295449414 |
|
- type: v_measures |
|
value: [0.6355772777559684, 0.4980707615440343, 0.5851538838323186, 0.6567709175938427, 0.5712405288636999] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringS2S |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 45.607106996926426 |
|
- type: v_measures |
|
value: [0.45846869913649535, 0.42657120373128293, 0.45507356125930876, 0.4258913306353704, 0.4779122207000794] |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-alloprof-s2p |
|
name: MTEB AlloprofReranking |
|
config: default |
|
split: test |
|
revision: 65393d0d7a08a10b4e348135e824f385d420b0fd |
|
metrics: |
|
- type: map |
|
value: 73.51836428087765 |
|
- type: mrr |
|
value: 74.8550285111166 |
|
- type: nAUC_map_diff1 |
|
value: 56.006169898728466 |
|
- type: nAUC_map_max |
|
value: 27.886037223407506 |
|
- type: nAUC_mrr_diff1 |
|
value: 56.68072778248672 |
|
- type: nAUC_mrr_max |
|
value: 29.362681962243276 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloprofRetrieval |
|
config: default |
|
split: test |
|
revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd |
|
metrics: |
|
- type: map_at_1 |
|
value: 32.080999999999996 |
|
- type: map_at_10 |
|
value: 43.582 |
|
- type: map_at_100 |
|
value: 44.381 |
|
- type: map_at_1000 |
|
value: 44.426 |
|
- type: map_at_20 |
|
value: 44.061 |
|
- type: map_at_3 |
|
value: 40.602 |
|
- type: map_at_5 |
|
value: 42.381 |
|
- type: mrr_at_1 |
|
value: 32.08117443868739 |
|
- type: mrr_at_10 |
|
value: 43.5823429832498 |
|
- type: mrr_at_100 |
|
value: 44.38068560877513 |
|
- type: mrr_at_1000 |
|
value: 44.426194305504026 |
|
- type: mrr_at_20 |
|
value: 44.06128094655753 |
|
- type: mrr_at_3 |
|
value: 40.60161197466903 |
|
- type: mrr_at_5 |
|
value: 42.380541162924715 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 37.22997629352391 |
|
- type: nauc_map_at_1000_max |
|
value: 38.65090969900466 |
|
- type: nauc_map_at_100_diff1 |
|
value: 37.22644507166512 |
|
- type: nauc_map_at_100_max |
|
value: 38.67447923917633 |
|
- type: nauc_map_at_10_diff1 |
|
value: 37.02440573022942 |
|
- type: nauc_map_at_10_max |
|
value: 38.52972171430789 |
|
- type: nauc_map_at_1_diff1 |
|
value: 41.18101653444774 |
|
- type: nauc_map_at_1_max |
|
value: 34.87383192583458 |
|
- type: nauc_map_at_20_diff1 |
|
value: 37.14172285932024 |
|
- type: nauc_map_at_20_max |
|
value: 38.66753159239803 |
|
- type: nauc_map_at_3_diff1 |
|
value: 37.53556306862998 |
|
- type: nauc_map_at_3_max |
|
value: 37.86008195327724 |
|
- type: nauc_map_at_5_diff1 |
|
value: 37.14904081229067 |
|
- type: nauc_map_at_5_max |
|
value: 38.267819714061105 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 37.22997629352391 |
|
- type: nauc_mrr_at_1000_max |
|
value: 38.65090969900466 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 37.22644507166512 |
|
- type: nauc_mrr_at_100_max |
|
value: 38.67447923917633 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 37.02440573022942 |
|
- type: nauc_mrr_at_10_max |
|
value: 38.52972171430789 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 41.18101653444774 |
|
- type: nauc_mrr_at_1_max |
|
value: 34.87383192583458 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 37.14172285932024 |
|
- type: nauc_mrr_at_20_max |
|
value: 38.66753159239803 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 37.53556306862998 |
|
- type: nauc_mrr_at_3_max |
|
value: 37.86008195327724 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 37.14904081229067 |
|
- type: nauc_mrr_at_5_max |
|
value: 38.267819714061105 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 36.313082263552204 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 40.244406213773765 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 36.17060946689135 |
|
- type: nauc_ndcg_at_100_max |
|
value: 41.069278488584416 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 35.2775471480974 |
|
- type: nauc_ndcg_at_10_max |
|
value: 40.33902753007036 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 41.18101653444774 |
|
- type: nauc_ndcg_at_1_max |
|
value: 34.87383192583458 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 35.71067272175871 |
|
- type: nauc_ndcg_at_20_max |
|
value: 40.94374381572908 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 36.45082651868188 |
|
- type: nauc_ndcg_at_3_max |
|
value: 38.87195110158222 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 35.683568481780505 |
|
- type: nauc_ndcg_at_5_max |
|
value: 39.606933866599 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 15.489726515767439 |
|
- type: nauc_precision_at_1000_max |
|
value: 75.94259161180715 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 30.033605095284656 |
|
- type: nauc_precision_at_100_max |
|
value: 62.40786465750442 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 28.617170969915 |
|
- type: nauc_precision_at_10_max |
|
value: 47.35884745487521 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 41.18101653444774 |
|
- type: nauc_precision_at_1_max |
|
value: 34.87383192583458 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 29.730952749557144 |
|
- type: nauc_precision_at_20_max |
|
value: 52.09696741873719 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 33.30844921569695 |
|
- type: nauc_precision_at_3_max |
|
value: 41.84496633792437 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 31.000246292430838 |
|
- type: nauc_precision_at_5_max |
|
value: 43.88721507465343 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 15.48972651576705 |
|
- type: nauc_recall_at_1000_max |
|
value: 75.94259161180725 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 30.033605095284816 |
|
- type: nauc_recall_at_100_max |
|
value: 62.40786465750426 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 28.617170969914984 |
|
- type: nauc_recall_at_10_max |
|
value: 47.35884745487525 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 41.18101653444774 |
|
- type: nauc_recall_at_1_max |
|
value: 34.87383192583458 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 29.730952749557087 |
|
- type: nauc_recall_at_20_max |
|
value: 52.09696741873715 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 33.30844921569694 |
|
- type: nauc_recall_at_3_max |
|
value: 41.84496633792433 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 31.000246292430838 |
|
- type: nauc_recall_at_5_max |
|
value: 43.88721507465339 |
|
- type: ndcg_at_1 |
|
value: 32.080999999999996 |
|
- type: ndcg_at_10 |
|
value: 49.502 |
|
- type: ndcg_at_100 |
|
value: 53.52 |
|
- type: ndcg_at_1000 |
|
value: 54.842 |
|
- type: ndcg_at_20 |
|
value: 51.219 |
|
- type: ndcg_at_3 |
|
value: 43.381 |
|
- type: ndcg_at_5 |
|
value: 46.603 |
|
- type: precision_at_1 |
|
value: 32.080999999999996 |
|
- type: precision_at_10 |
|
value: 6.822 |
|
- type: precision_at_100 |
|
value: 0.873 |
|
- type: precision_at_1000 |
|
value: 0.098 |
|
- type: precision_at_20 |
|
value: 3.7479999999999998 |
|
- type: precision_at_3 |
|
value: 17.142 |
|
- type: precision_at_5 |
|
value: 11.857 |
|
- type: recall_at_1 |
|
value: 32.080999999999996 |
|
- type: recall_at_10 |
|
value: 68.221 |
|
- type: recall_at_100 |
|
value: 87.349 |
|
- type: recall_at_1000 |
|
value: 98.014 |
|
- type: recall_at_20 |
|
value: 74.957 |
|
- type: recall_at_3 |
|
value: 51.425 |
|
- type: recall_at_5 |
|
value: 59.282999999999994 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_reviews_multi |
|
name: MTEB AmazonReviewsClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
metrics: |
|
- type: accuracy |
|
value: 39.892 |
|
- type: f1 |
|
value: 38.38126304364462 |
|
- type: f1_weighted |
|
value: 38.38126304364462 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: maastrichtlawtech/bsard |
|
name: MTEB BSARDRetrieval |
|
config: default |
|
split: test |
|
revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59 |
|
metrics: |
|
- type: map_at_1 |
|
value: 10.811 |
|
- type: map_at_10 |
|
value: 16.414 |
|
- type: map_at_100 |
|
value: 17.647 |
|
- type: map_at_1000 |
|
value: 17.742 |
|
- type: map_at_20 |
|
value: 17.22 |
|
- type: map_at_3 |
|
value: 14.188999999999998 |
|
- type: map_at_5 |
|
value: 15.113 |
|
- type: mrr_at_1 |
|
value: 10.81081081081081 |
|
- type: mrr_at_10 |
|
value: 16.41427141427142 |
|
- type: mrr_at_100 |
|
value: 17.647339314041712 |
|
- type: mrr_at_1000 |
|
value: 17.74213263983212 |
|
- type: mrr_at_20 |
|
value: 17.219989884463573 |
|
- type: mrr_at_3 |
|
value: 14.18918918918919 |
|
- type: mrr_at_5 |
|
value: 15.112612612612612 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 13.07108195916555 |
|
- type: nauc_map_at_1000_max |
|
value: 14.000521014179807 |
|
- type: nauc_map_at_100_diff1 |
|
value: 13.087117094079332 |
|
- type: nauc_map_at_100_max |
|
value: 13.99712558752583 |
|
- type: nauc_map_at_10_diff1 |
|
value: 13.452029501381165 |
|
- type: nauc_map_at_10_max |
|
value: 13.3341655571542 |
|
- type: nauc_map_at_1_diff1 |
|
value: 14.990419981155167 |
|
- type: nauc_map_at_1_max |
|
value: 8.812519082504037 |
|
- type: nauc_map_at_20_diff1 |
|
value: 12.80321357992737 |
|
- type: nauc_map_at_20_max |
|
value: 14.020962859032371 |
|
- type: nauc_map_at_3_diff1 |
|
value: 14.84230805712973 |
|
- type: nauc_map_at_3_max |
|
value: 11.644032755353722 |
|
- type: nauc_map_at_5_diff1 |
|
value: 15.100168959732835 |
|
- type: nauc_map_at_5_max |
|
value: 13.634801099074355 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 13.07108195916555 |
|
- type: nauc_mrr_at_1000_max |
|
value: 14.000521014179807 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 13.087117094079332 |
|
- type: nauc_mrr_at_100_max |
|
value: 13.99712558752583 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 13.452029501381165 |
|
- type: nauc_mrr_at_10_max |
|
value: 13.3341655571542 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 14.990419981155167 |
|
- type: nauc_mrr_at_1_max |
|
value: 8.812519082504037 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 12.80321357992737 |
|
- type: nauc_mrr_at_20_max |
|
value: 14.020962859032371 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 14.84230805712973 |
|
- type: nauc_mrr_at_3_max |
|
value: 11.644032755353722 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 15.100168959732835 |
|
- type: nauc_mrr_at_5_max |
|
value: 13.634801099074355 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 11.335350893370972 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 16.09665875369169 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 11.499643600969176 |
|
- type: nauc_ndcg_at_100_max |
|
value: 15.967105414704186 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 12.093263549786606 |
|
- type: nauc_ndcg_at_10_max |
|
value: 14.605821897766461 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 14.990419981155167 |
|
- type: nauc_ndcg_at_1_max |
|
value: 8.812519082504037 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 10.197380043193812 |
|
- type: nauc_ndcg_at_20_max |
|
value: 16.332533239525365 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 14.835825175950765 |
|
- type: nauc_ndcg_at_3_max |
|
value: 11.898757954417214 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 15.278603386081823 |
|
- type: nauc_ndcg_at_5_max |
|
value: 15.007133861218167 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 2.7469897420865195 |
|
- type: nauc_precision_at_1000_max |
|
value: 26.874535278616346 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 7.600735526139776 |
|
- type: nauc_precision_at_100_max |
|
value: 20.7203382946415 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 8.938642089366768 |
|
- type: nauc_precision_at_10_max |
|
value: 17.320961743140874 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 14.990419981155167 |
|
- type: nauc_precision_at_1_max |
|
value: 8.812519082504037 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 3.733877816322278 |
|
- type: nauc_precision_at_20_max |
|
value: 21.581173305923002 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 14.828850401790316 |
|
- type: nauc_precision_at_3_max |
|
value: 12.369943286612463 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 15.728617939150672 |
|
- type: nauc_precision_at_5_max |
|
value: 18.103783411900697 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 2.746989742086615 |
|
- type: nauc_recall_at_1000_max |
|
value: 26.874535278616367 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 7.600735526139775 |
|
- type: nauc_recall_at_100_max |
|
value: 20.720338294641536 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 8.93864208936673 |
|
- type: nauc_recall_at_10_max |
|
value: 17.32096174314083 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 14.990419981155167 |
|
- type: nauc_recall_at_1_max |
|
value: 8.812519082504037 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 3.733877816322231 |
|
- type: nauc_recall_at_20_max |
|
value: 21.58117330592295 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 14.828850401790339 |
|
- type: nauc_recall_at_3_max |
|
value: 12.369943286612509 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 15.72861793915063 |
|
- type: nauc_recall_at_5_max |
|
value: 18.103783411900658 |
|
- type: ndcg_at_1 |
|
value: 10.811 |
|
- type: ndcg_at_10 |
|
value: 20.244 |
|
- type: ndcg_at_100 |
|
value: 26.526 |
|
- type: ndcg_at_1000 |
|
value: 29.217 |
|
- type: ndcg_at_20 |
|
value: 23.122 |
|
- type: ndcg_at_3 |
|
value: 15.396 |
|
- type: ndcg_at_5 |
|
value: 17.063 |
|
- type: precision_at_1 |
|
value: 10.811 |
|
- type: precision_at_10 |
|
value: 3.288 |
|
- type: precision_at_100 |
|
value: 0.631 |
|
- type: precision_at_1000 |
|
value: 0.08499999999999999 |
|
- type: precision_at_20 |
|
value: 2.207 |
|
- type: precision_at_3 |
|
value: 6.306000000000001 |
|
- type: precision_at_5 |
|
value: 4.595 |
|
- type: recall_at_1 |
|
value: 10.811 |
|
- type: recall_at_10 |
|
value: 32.883 |
|
- type: recall_at_100 |
|
value: 63.063 |
|
- type: recall_at_1000 |
|
value: 84.685 |
|
- type: recall_at_20 |
|
value: 44.144 |
|
- type: recall_at_3 |
|
value: 18.919 |
|
- type: recall_at_5 |
|
value: 22.973 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/clustering-hal-s2s |
|
name: MTEB HALClusteringS2S |
|
config: default |
|
split: test |
|
revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915 |
|
metrics: |
|
- type: v_measure |
|
value: 25.209561281028435 |
|
- type: v_measures |
|
value: [0.28558356565178666, 0.2707322246129254, 0.2683693125038299, 0.2703937853835602, 0.22057190525667872] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: reciTAL/mlsum |
|
name: MTEB MLSUMClusteringP2P |
|
config: default |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 42.82528809996964 |
|
- type: v_measures |
|
value: [0.43465029372260205, 0.42821098223656917, 0.43537879149583325, 0.4289578694928627, 0.3794307754465835] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: reciTAL/mlsum |
|
name: MTEB MLSUMClusteringS2S |
|
config: default |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 43.44172295073941 |
|
- type: v_measures |
|
value: [0.4294163918345751, 0.46229994906725164, 0.44188446196569603, 0.43839320352264155, 0.3866853445120933] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_domain |
|
name: MTEB MTOPDomainClassification (fr) |
|
config: fr |
|
split: test |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
metrics: |
|
- type: accuracy |
|
value: 88.33072345756342 |
|
- type: f1 |
|
value: 88.11780476022122 |
|
- type: f1_weighted |
|
value: 88.28188145087299 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_intent |
|
name: MTEB MTOPIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
metrics: |
|
- type: accuracy |
|
value: 57.854682117131226 |
|
- type: f1 |
|
value: 41.121569078191996 |
|
- type: f1_weighted |
|
value: 60.04845437480532 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/masakhanews |
|
name: MTEB MasakhaNEWSClassification (fra) |
|
config: fra |
|
split: test |
|
revision: 18193f187b92da67168c655c9973a165ed9593dd |
|
metrics: |
|
- type: accuracy |
|
value: 76.87203791469194 |
|
- type: f1 |
|
value: 72.94847557303437 |
|
- type: f1_weighted |
|
value: 76.9128173959562 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringP2P (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 61.32006896333715 |
|
- type: v_measures |
|
value: [1.0, 0.6446188396257355, 0.28995363026757603, 0.40898735994696084, 0.7224436183265853] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringS2S (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 60.509887123660256 |
|
- type: v_measures |
|
value: [1.0, 0.022472587992562534, 0.4686320087689936, 0.811946141094871, 0.7224436183265853] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_intent |
|
name: MTEB MassiveIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 4672e20407010da34463acc759c162ca9734bca6 |
|
metrics: |
|
- type: accuracy |
|
value: 64.14256893073302 |
|
- type: f1 |
|
value: 61.33068109342782 |
|
- type: f1_weighted |
|
value: 62.74292948992287 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_scenario |
|
name: MTEB MassiveScenarioClassification (fr) |
|
config: fr |
|
split: test |
|
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8 |
|
metrics: |
|
- type: accuracy |
|
value: 70.68930733019502 |
|
- type: f1 |
|
value: 70.26641874846638 |
|
- type: f1_weighted |
|
value: 70.35250466465047 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/mintakaqa |
|
name: MTEB MintakaRetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e |
|
metrics: |
|
- type: map_at_1 |
|
value: 19.165 |
|
- type: map_at_10 |
|
value: 28.663 |
|
- type: map_at_100 |
|
value: 29.737000000000002 |
|
- type: map_at_1000 |
|
value: 29.826000000000004 |
|
- type: map_at_20 |
|
value: 29.266 |
|
- type: map_at_3 |
|
value: 26.024 |
|
- type: map_at_5 |
|
value: 27.486 |
|
- type: mrr_at_1 |
|
value: 19.164619164619165 |
|
- type: mrr_at_10 |
|
value: 28.66298116298116 |
|
- type: mrr_at_100 |
|
value: 29.737423308510476 |
|
- type: mrr_at_1000 |
|
value: 29.825744096186796 |
|
- type: mrr_at_20 |
|
value: 29.26593905045215 |
|
- type: mrr_at_3 |
|
value: 26.023751023751025 |
|
- type: mrr_at_5 |
|
value: 27.48566748566751 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 23.682512151202967 |
|
- type: nauc_map_at_1000_max |
|
value: 25.78708364723919 |
|
- type: nauc_map_at_100_diff1 |
|
value: 23.647360144907324 |
|
- type: nauc_map_at_100_max |
|
value: 25.812420160707074 |
|
- type: nauc_map_at_10_diff1 |
|
value: 23.658224717435765 |
|
- type: nauc_map_at_10_max |
|
value: 25.845198626323217 |
|
- type: nauc_map_at_1_diff1 |
|
value: 30.56830621718086 |
|
- type: nauc_map_at_1_max |
|
value: 19.931526248650147 |
|
- type: nauc_map_at_20_diff1 |
|
value: 23.69662048930091 |
|
- type: nauc_map_at_20_max |
|
value: 25.936653022318403 |
|
- type: nauc_map_at_3_diff1 |
|
value: 24.663221072349817 |
|
- type: nauc_map_at_3_max |
|
value: 24.634011858800275 |
|
- type: nauc_map_at_5_diff1 |
|
value: 24.3650772668551 |
|
- type: nauc_map_at_5_max |
|
value: 25.75222318469224 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 23.682512151202967 |
|
- type: nauc_mrr_at_1000_max |
|
value: 25.78708364723919 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 23.647360144907324 |
|
- type: nauc_mrr_at_100_max |
|
value: 25.812420160707074 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 23.658224717435765 |
|
- type: nauc_mrr_at_10_max |
|
value: 25.845198626323217 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 30.56830621718086 |
|
- type: nauc_mrr_at_1_max |
|
value: 19.931526248650147 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 23.69662048930091 |
|
- type: nauc_mrr_at_20_max |
|
value: 25.936653022318403 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 24.663221072349817 |
|
- type: nauc_mrr_at_3_max |
|
value: 24.634011858800275 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 24.3650772668551 |
|
- type: nauc_mrr_at_5_max |
|
value: 25.75222318469224 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 21.68690756038845 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 27.168575101114893 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 20.484812648526646 |
|
- type: nauc_ndcg_at_100_max |
|
value: 27.79987215383081 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 20.791330920997765 |
|
- type: nauc_ndcg_at_10_max |
|
value: 28.272774035036935 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 30.56830621718086 |
|
- type: nauc_ndcg_at_1_max |
|
value: 19.931526248650147 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 20.88342749790573 |
|
- type: nauc_ndcg_at_20_max |
|
value: 28.627184419546825 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 22.987235018840494 |
|
- type: nauc_ndcg_at_3_max |
|
value: 26.054144215976482 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 22.497863289090464 |
|
- type: nauc_ndcg_at_5_max |
|
value: 27.98879570850259 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -0.6707404502167996 |
|
- type: nauc_precision_at_1000_max |
|
value: 31.987217077673346 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 5.079765403021014 |
|
- type: nauc_precision_at_100_max |
|
value: 34.857053312543194 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 12.628771618059472 |
|
- type: nauc_precision_at_10_max |
|
value: 35.009564954169896 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 30.56830621718086 |
|
- type: nauc_precision_at_1_max |
|
value: 19.931526248650147 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 12.28251326261041 |
|
- type: nauc_precision_at_20_max |
|
value: 36.942629359432075 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 18.663775283519335 |
|
- type: nauc_precision_at_3_max |
|
value: 29.741315837492472 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 17.70442691217025 |
|
- type: nauc_precision_at_5_max |
|
value: 33.93438470540527 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: -0.6707404502171719 |
|
- type: nauc_recall_at_1000_max |
|
value: 31.987217077672607 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 5.079765403021056 |
|
- type: nauc_recall_at_100_max |
|
value: 34.85705331254323 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 12.628771618059483 |
|
- type: nauc_recall_at_10_max |
|
value: 35.00956495416992 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 30.56830621718086 |
|
- type: nauc_recall_at_1_max |
|
value: 19.931526248650147 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 12.282513262610411 |
|
- type: nauc_recall_at_20_max |
|
value: 36.94262935943207 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 18.663775283519346 |
|
- type: nauc_recall_at_3_max |
|
value: 29.741315837492465 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 17.704426912170252 |
|
- type: nauc_recall_at_5_max |
|
value: 33.934384705405286 |
|
- type: ndcg_at_1 |
|
value: 19.165 |
|
- type: ndcg_at_10 |
|
value: 33.674 |
|
- type: ndcg_at_100 |
|
value: 39.297 |
|
- type: ndcg_at_1000 |
|
value: 41.896 |
|
- type: ndcg_at_20 |
|
value: 35.842 |
|
- type: ndcg_at_3 |
|
value: 28.238999999999997 |
|
- type: ndcg_at_5 |
|
value: 30.863000000000003 |
|
- type: precision_at_1 |
|
value: 19.165 |
|
- type: precision_at_10 |
|
value: 4.9590000000000005 |
|
- type: precision_at_100 |
|
value: 0.768 |
|
- type: precision_at_1000 |
|
value: 0.098 |
|
- type: precision_at_20 |
|
value: 2.905 |
|
- type: precision_at_3 |
|
value: 11.548 |
|
- type: precision_at_5 |
|
value: 8.198 |
|
- type: recall_at_1 |
|
value: 19.165 |
|
- type: recall_at_10 |
|
value: 49.59 |
|
- type: recall_at_100 |
|
value: 76.822 |
|
- type: recall_at_1000 |
|
value: 97.83 |
|
- type: recall_at_20 |
|
value: 58.108000000000004 |
|
- type: recall_at_3 |
|
value: 34.644000000000005 |
|
- type: recall_at_5 |
|
value: 40.991 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: GEM/opusparcus |
|
name: MTEB OpusparcusPC (fr) |
|
config: fr |
|
split: test |
|
revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 83.51498637602179 |
|
- type: cos_sim_ap |
|
value: 94.18614574224773 |
|
- type: cos_sim_f1 |
|
value: 88.3564925730714 |
|
- type: cos_sim_precision |
|
value: 85.37037037037037 |
|
- type: cos_sim_recall |
|
value: 91.55908639523337 |
|
- type: dot_accuracy |
|
value: 83.51498637602179 |
|
- type: dot_ap |
|
value: 94.18614574224773 |
|
- type: dot_f1 |
|
value: 88.3564925730714 |
|
- type: dot_precision |
|
value: 85.37037037037037 |
|
- type: dot_recall |
|
value: 91.55908639523337 |
|
- type: euclidean_accuracy |
|
value: 83.51498637602179 |
|
- type: euclidean_ap |
|
value: 94.18614574224773 |
|
- type: euclidean_f1 |
|
value: 88.3564925730714 |
|
- type: euclidean_precision |
|
value: 85.37037037037037 |
|
- type: euclidean_recall |
|
value: 91.55908639523337 |
|
- type: manhattan_accuracy |
|
value: 83.51498637602179 |
|
- type: manhattan_ap |
|
value: 94.16717671332795 |
|
- type: manhattan_f1 |
|
value: 88.35418671799807 |
|
- type: manhattan_precision |
|
value: 85.71428571428571 |
|
- type: manhattan_recall |
|
value: 91.16186693147964 |
|
- type: max_accuracy |
|
value: 83.51498637602179 |
|
- type: max_ap |
|
value: 94.18614574224773 |
|
- type: max_f1 |
|
value: 88.3564925730714 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: google-research-datasets/paws-x |
|
name: MTEB PawsX (fr) |
|
config: fr |
|
split: test |
|
revision: 8a04d940a42cd40658986fdd8e3da561533a3646 |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 60.699999999999996 |
|
- type: cos_sim_ap |
|
value: 60.20276173325004 |
|
- type: cos_sim_f1 |
|
value: 62.716429395921516 |
|
- type: cos_sim_precision |
|
value: 48.05424528301887 |
|
- type: cos_sim_recall |
|
value: 90.2547065337763 |
|
- type: dot_accuracy |
|
value: 60.699999999999996 |
|
- type: dot_ap |
|
value: 60.27996470746299 |
|
- type: dot_f1 |
|
value: 62.716429395921516 |
|
- type: dot_precision |
|
value: 48.05424528301887 |
|
- type: dot_recall |
|
value: 90.2547065337763 |
|
- type: euclidean_accuracy |
|
value: 60.699999999999996 |
|
- type: euclidean_ap |
|
value: 60.20276173325004 |
|
- type: euclidean_f1 |
|
value: 62.716429395921516 |
|
- type: euclidean_precision |
|
value: 48.05424528301887 |
|
- type: euclidean_recall |
|
value: 90.2547065337763 |
|
- type: manhattan_accuracy |
|
value: 60.699999999999996 |
|
- type: manhattan_ap |
|
value: 60.18010040913353 |
|
- type: manhattan_f1 |
|
value: 62.71056661562021 |
|
- type: manhattan_precision |
|
value: 47.92276184903452 |
|
- type: manhattan_recall |
|
value: 90.69767441860465 |
|
- type: max_accuracy |
|
value: 60.699999999999996 |
|
- type: max_ap |
|
value: 60.27996470746299 |
|
- type: max_f1 |
|
value: 62.716429395921516 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: Lajavaness/SICK-fr |
|
name: MTEB SICKFr |
|
config: default |
|
split: test |
|
revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 84.24496945719946 |
|
- type: cos_sim_spearman |
|
value: 78.10001513346513 |
|
- type: euclidean_pearson |
|
value: 81.43570951228163 |
|
- type: euclidean_spearman |
|
value: 78.0987784421045 |
|
- type: manhattan_pearson |
|
value: 81.31986646517238 |
|
- type: manhattan_spearman |
|
value: 78.09610194828534 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts22-crosslingual-sts |
|
name: MTEB STS22 (fr) |
|
config: fr |
|
split: test |
|
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 83.07721141521425 |
|
- type: cos_sim_spearman |
|
value: 83.19199466052186 |
|
- type: euclidean_pearson |
|
value: 82.10672022294766 |
|
- type: euclidean_spearman |
|
value: 83.19199466052186 |
|
- type: manhattan_pearson |
|
value: 81.92531847793633 |
|
- type: manhattan_spearman |
|
value: 83.20694689089673 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/stsb_multi_mt |
|
name: MTEB STSBenchmarkMultilingualSTS (fr) |
|
config: fr |
|
split: test |
|
revision: 29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 83.957481748094 |
|
- type: cos_sim_spearman |
|
value: 84.40492503459248 |
|
- type: euclidean_pearson |
|
value: 83.8150014101056 |
|
- type: euclidean_spearman |
|
value: 84.40686653864509 |
|
- type: manhattan_pearson |
|
value: 83.6816837321264 |
|
- type: manhattan_spearman |
|
value: 84.2678486368702 |
|
- task: |
|
type: Summarization |
|
dataset: |
|
type: lyon-nlp/summarization-summeval-fr-p2p |
|
name: MTEB SummEvalFr |
|
config: default |
|
split: test |
|
revision: b385812de6a9577b6f4d0f88c6a6e35395a94054 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 32.06592630917136 |
|
- type: cos_sim_spearman |
|
value: 30.94878864229808 |
|
- type: dot_pearson |
|
value: 32.06591974515864 |
|
- type: dot_spearman |
|
value: 30.925383080565222 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-syntec-s2p |
|
name: MTEB SyntecReranking |
|
config: default |
|
split: test |
|
revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad |
|
metrics: |
|
- type: map |
|
value: 88.11666666666667 |
|
- type: mrr |
|
value: 88.11666666666667 |
|
- type: nAUC_map_diff1 |
|
value: 66.27779227667267 |
|
- type: nAUC_map_max |
|
value: 6.651414764738896 |
|
- type: nAUC_mrr_diff1 |
|
value: 66.27779227667267 |
|
- type: nAUC_mrr_max |
|
value: 6.651414764738896 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/mteb-fr-retrieval-syntec-s2p |
|
name: MTEB SyntecRetrieval |
|
config: default |
|
split: test |
|
revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9 |
|
metrics: |
|
- type: map_at_1 |
|
value: 69.0 |
|
- type: map_at_10 |
|
value: 80.65 |
|
- type: map_at_100 |
|
value: 80.838 |
|
- type: map_at_1000 |
|
value: 80.838 |
|
- type: map_at_20 |
|
value: 80.838 |
|
- type: map_at_3 |
|
value: 79.833 |
|
- type: map_at_5 |
|
value: 80.483 |
|
- type: mrr_at_1 |
|
value: 69.0 |
|
- type: mrr_at_10 |
|
value: 80.64999999999999 |
|
- type: mrr_at_100 |
|
value: 80.83799019607844 |
|
- type: mrr_at_1000 |
|
value: 80.83799019607844 |
|
- type: mrr_at_20 |
|
value: 80.83799019607844 |
|
- type: mrr_at_3 |
|
value: 79.83333333333334 |
|
- type: mrr_at_5 |
|
value: 80.48333333333333 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 61.46904865740055 |
|
- type: nauc_map_at_1000_max |
|
value: 24.307826758747282 |
|
- type: nauc_map_at_100_diff1 |
|
value: 61.46904865740055 |
|
- type: nauc_map_at_100_max |
|
value: 24.307826758747282 |
|
- type: nauc_map_at_10_diff1 |
|
value: 61.094194035098035 |
|
- type: nauc_map_at_10_max |
|
value: 24.44687875369869 |
|
- type: nauc_map_at_1_diff1 |
|
value: 65.17628798701865 |
|
- type: nauc_map_at_1_max |
|
value: 25.79501560929155 |
|
- type: nauc_map_at_20_diff1 |
|
value: 61.46904865740055 |
|
- type: nauc_map_at_20_max |
|
value: 24.307826758747282 |
|
- type: nauc_map_at_3_diff1 |
|
value: 61.562719756100805 |
|
- type: nauc_map_at_3_max |
|
value: 25.87804164282553 |
|
- type: nauc_map_at_5_diff1 |
|
value: 61.471976470716264 |
|
- type: nauc_map_at_5_max |
|
value: 25.180513270581322 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 61.46904865740055 |
|
- type: nauc_mrr_at_1000_max |
|
value: 24.307826758747282 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 61.46904865740055 |
|
- type: nauc_mrr_at_100_max |
|
value: 24.307826758747282 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 61.094194035098035 |
|
- type: nauc_mrr_at_10_max |
|
value: 24.44687875369869 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 65.17628798701865 |
|
- type: nauc_mrr_at_1_max |
|
value: 25.79501560929155 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 61.46904865740055 |
|
- type: nauc_mrr_at_20_max |
|
value: 24.307826758747282 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 61.562719756100805 |
|
- type: nauc_mrr_at_3_max |
|
value: 25.87804164282553 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 61.471976470716264 |
|
- type: nauc_mrr_at_5_max |
|
value: 25.180513270581322 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 60.95477865546023 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 24.427553593893535 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 60.95477865546023 |
|
- type: nauc_ndcg_at_100_max |
|
value: 24.427553593893535 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 59.101673931307396 |
|
- type: nauc_ndcg_at_10_max |
|
value: 25.01155211084955 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 65.17628798701865 |
|
- type: nauc_ndcg_at_1_max |
|
value: 25.79501560929155 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 60.95477865546023 |
|
- type: nauc_ndcg_at_20_max |
|
value: 24.427553593893535 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 60.333057480044616 |
|
- type: nauc_ndcg_at_3_max |
|
value: 28.363238330232637 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 60.15511994533307 |
|
- type: nauc_ndcg_at_5_max |
|
value: 26.94308058940176 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: nan |
|
- type: nauc_precision_at_1000_max |
|
value: nan |
|
- type: nauc_precision_at_100_diff1 |
|
value: nan |
|
- type: nauc_precision_at_100_max |
|
value: nan |
|
- type: nauc_precision_at_10_diff1 |
|
value: 26.657329598506518 |
|
- type: nauc_precision_at_10_max |
|
value: 34.26704014939361 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 65.17628798701865 |
|
- type: nauc_precision_at_1_max |
|
value: 25.79501560929155 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 100.0 |
|
- type: nauc_precision_at_20_max |
|
value: 100.0 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 51.834066960117276 |
|
- type: nauc_precision_at_3_max |
|
value: 48.25930372148875 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 44.992997198879706 |
|
- type: nauc_precision_at_5_max |
|
value: 50.70028011204499 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: nan |
|
- type: nauc_recall_at_1000_max |
|
value: nan |
|
- type: nauc_recall_at_100_diff1 |
|
value: nan |
|
- type: nauc_recall_at_100_max |
|
value: nan |
|
- type: nauc_recall_at_10_diff1 |
|
value: 26.657329598505903 |
|
- type: nauc_recall_at_10_max |
|
value: 34.26704014939303 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 65.17628798701865 |
|
- type: nauc_recall_at_1_max |
|
value: 25.79501560929155 |
|
- type: nauc_recall_at_20_diff1 |
|
value: nan |
|
- type: nauc_recall_at_20_max |
|
value: nan |
|
- type: nauc_recall_at_3_diff1 |
|
value: 51.834066960117376 |
|
- type: nauc_recall_at_3_max |
|
value: 48.25930372148865 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 44.99299719887955 |
|
- type: nauc_recall_at_5_max |
|
value: 50.70028011204488 |
|
- type: ndcg_at_1 |
|
value: 69.0 |
|
- type: ndcg_at_10 |
|
value: 84.786 |
|
- type: ndcg_at_100 |
|
value: 85.521 |
|
- type: ndcg_at_1000 |
|
value: 85.521 |
|
- type: ndcg_at_20 |
|
value: 85.521 |
|
- type: ndcg_at_3 |
|
value: 83.226 |
|
- type: ndcg_at_5 |
|
value: 84.43 |
|
- type: precision_at_1 |
|
value: 69.0 |
|
- type: precision_at_10 |
|
value: 9.700000000000001 |
|
- type: precision_at_100 |
|
value: 1.0 |
|
- type: precision_at_1000 |
|
value: 0.1 |
|
- type: precision_at_20 |
|
value: 5.0 |
|
- type: precision_at_3 |
|
value: 31.0 |
|
- type: precision_at_5 |
|
value: 19.2 |
|
- type: recall_at_1 |
|
value: 69.0 |
|
- type: recall_at_10 |
|
value: 97.0 |
|
- type: recall_at_100 |
|
value: 100.0 |
|
- type: recall_at_1000 |
|
value: 100.0 |
|
- type: recall_at_20 |
|
value: 100.0 |
|
- type: recall_at_3 |
|
value: 93.0 |
|
- type: recall_at_5 |
|
value: 96.0 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/xpqa |
|
name: MTEB XPQARetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f |
|
metrics: |
|
- type: map_at_1 |
|
value: 40.797 |
|
- type: map_at_10 |
|
value: 62.71099999999999 |
|
- type: map_at_100 |
|
value: 64.261 |
|
- type: map_at_1000 |
|
value: 64.306 |
|
- type: map_at_20 |
|
value: 63.693 |
|
- type: map_at_3 |
|
value: 56.686 |
|
- type: map_at_5 |
|
value: 60.653999999999996 |
|
- type: mrr_at_1 |
|
value: 64.08544726301736 |
|
- type: mrr_at_10 |
|
value: 71.24790726259349 |
|
- type: mrr_at_100 |
|
value: 71.7835679704396 |
|
- type: mrr_at_1000 |
|
value: 71.79095567140973 |
|
- type: mrr_at_20 |
|
value: 71.5854708410262 |
|
- type: mrr_at_3 |
|
value: 69.55941255006672 |
|
- type: mrr_at_5 |
|
value: 70.60747663551396 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 47.803181417639365 |
|
- type: nauc_map_at_1000_max |
|
value: 51.22073368230412 |
|
- type: nauc_map_at_100_diff1 |
|
value: 47.771573391555755 |
|
- type: nauc_map_at_100_max |
|
value: 51.20370234778812 |
|
- type: nauc_map_at_10_diff1 |
|
value: 47.340833389771625 |
|
- type: nauc_map_at_10_max |
|
value: 50.41256517180715 |
|
- type: nauc_map_at_1_diff1 |
|
value: 55.14983744702445 |
|
- type: nauc_map_at_1_max |
|
value: 31.104750896985728 |
|
- type: nauc_map_at_20_diff1 |
|
value: 47.64026863999484 |
|
- type: nauc_map_at_20_max |
|
value: 50.87670909266768 |
|
- type: nauc_map_at_3_diff1 |
|
value: 47.681906747352635 |
|
- type: nauc_map_at_3_max |
|
value: 43.47246277661219 |
|
- type: nauc_map_at_5_diff1 |
|
value: 46.874943002794815 |
|
- type: nauc_map_at_5_max |
|
value: 48.469495140739724 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 57.34098736669957 |
|
- type: nauc_mrr_at_1000_max |
|
value: 60.179095583193444 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 57.339862158018796 |
|
- type: nauc_mrr_at_100_max |
|
value: 60.18082273539442 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 57.210874058908814 |
|
- type: nauc_mrr_at_10_max |
|
value: 60.043680803697086 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 59.69074056197331 |
|
- type: nauc_mrr_at_1_max |
|
value: 60.90082316300324 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 57.35434243512763 |
|
- type: nauc_mrr_at_20_max |
|
value: 60.18873377253912 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 57.26933631425754 |
|
- type: nauc_mrr_at_3_max |
|
value: 60.05458089795687 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 57.045411517214276 |
|
- type: nauc_mrr_at_5_max |
|
value: 59.981421712413685 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 50.232929738614814 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 55.01594185277396 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 49.876825728406786 |
|
- type: nauc_ndcg_at_100_max |
|
value: 54.87898182661215 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 48.40787615482867 |
|
- type: nauc_ndcg_at_10_max |
|
value: 52.84877289626636 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 59.69074056197331 |
|
- type: nauc_ndcg_at_1_max |
|
value: 60.90082316300324 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 49.08453974591539 |
|
- type: nauc_ndcg_at_20_max |
|
value: 53.80319392912378 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 48.21830414023458 |
|
- type: nauc_ndcg_at_3_max |
|
value: 51.321799626032714 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 47.614495954542605 |
|
- type: nauc_ndcg_at_5_max |
|
value: 50.803800463597405 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -15.87250509394414 |
|
- type: nauc_precision_at_1000_max |
|
value: 16.09830137145176 |
|
- type: nauc_precision_at_100_diff1 |
|
value: -13.720930651556534 |
|
- type: nauc_precision_at_100_max |
|
value: 19.94363871765946 |
|
- type: nauc_precision_at_10_diff1 |
|
value: -3.9626074014054136 |
|
- type: nauc_precision_at_10_max |
|
value: 30.48732389685921 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 59.69074056197331 |
|
- type: nauc_precision_at_1_max |
|
value: 60.90082316300324 |
|
- type: nauc_precision_at_20_diff1 |
|
value: -8.144148640034853 |
|
- type: nauc_precision_at_20_max |
|
value: 26.183545158653338 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 7.1166818076254605 |
|
- type: nauc_precision_at_3_max |
|
value: 37.64665636029093 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 0.3455996928663316 |
|
- type: nauc_precision_at_5_max |
|
value: 34.95245204298077 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 47.93171740380228 |
|
- type: nauc_recall_at_1000_max |
|
value: 89.21354057542635 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 34.93973412699365 |
|
- type: nauc_recall_at_100_max |
|
value: 47.89216950421148 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 38.58556368247737 |
|
- type: nauc_recall_at_10_max |
|
value: 45.13227163006313 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 55.14983744702445 |
|
- type: nauc_recall_at_1_max |
|
value: 31.104750896985728 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 38.53568097509877 |
|
- type: nauc_recall_at_20_max |
|
value: 46.37328875121808 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 41.49659886305561 |
|
- type: nauc_recall_at_3_max |
|
value: 38.59476562231703 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 38.489499442628016 |
|
- type: nauc_recall_at_5_max |
|
value: 43.06848825600403 |
|
- type: ndcg_at_1 |
|
value: 64.08500000000001 |
|
- type: ndcg_at_10 |
|
value: 68.818 |
|
- type: ndcg_at_100 |
|
value: 73.66 |
|
- type: ndcg_at_1000 |
|
value: 74.309 |
|
- type: ndcg_at_20 |
|
value: 71.147 |
|
- type: ndcg_at_3 |
|
value: 64.183 |
|
- type: ndcg_at_5 |
|
value: 65.668 |
|
- type: precision_at_1 |
|
value: 64.08500000000001 |
|
- type: precision_at_10 |
|
value: 15.728 |
|
- type: precision_at_100 |
|
value: 1.9720000000000002 |
|
- type: precision_at_1000 |
|
value: 0.207 |
|
- type: precision_at_20 |
|
value: 8.705 |
|
- type: precision_at_3 |
|
value: 39.03 |
|
- type: precision_at_5 |
|
value: 27.717000000000002 |
|
- type: recall_at_1 |
|
value: 40.797 |
|
- type: recall_at_10 |
|
value: 77.432 |
|
- type: recall_at_100 |
|
value: 95.68100000000001 |
|
- type: recall_at_1000 |
|
value: 99.666 |
|
- type: recall_at_20 |
|
value: 84.773 |
|
- type: recall_at_3 |
|
value: 62.083 |
|
- type: recall_at_5 |
|
value: 69.786 |
|
license: apache-2.0 |
|
language: |
|
- fr |
|
- en |
|
--- |
|
## Model Description: |
|
[**french-document-embedding**](https://huggingface.co/dangvantuan/french-document-embedding) is an embedding model for documents in the French-English language, with a context length of up to 8096 tokens. This model is a specialized text-embedding model trained specifically for the French-English language. It is built upon [gte-multilingual](Alibaba-NLP/gte-multilingual-base) and trained using the [SimilarityLoss], [Multi-Negative Ranking Loss](https://arxiv.org/abs/1705.00652), [Matryoshka2dLoss](https://arxiv.org/html/2402.14776v1) and [GISTEmbedLoss](https://arxiv.org/abs/2402.16829) using [guide model](https://huggingface.co/Lajavaness/bilingual-embedding-large). This model embeds and converts long texts or documents into vectors with 786 dimensions, making it useful for vector databases serving semantic search or RAG (Retrieval-Augmented Generation). |
|
|
|
## Full Model Architecture |
|
``` |
|
SentenceTransformer( |
|
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: BilingualModel |
|
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) |
|
(2): Normalize() |
|
) |
|
``` |
|
|
|
|
|
## Usage: |
|
|
|
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed: |
|
|
|
``` |
|
pip install -U sentence-transformers |
|
``` |
|
|
|
Then you can use the model like this: |
|
|
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
sentences = ["Paris est une capitale de la France", "Paris is a capital of France"] |
|
|
|
|
|
|
|
model = SentenceTransformer('dangvantuan/french-document-embedding', trust_remote_code=True) |
|
embeddings = model.encode(sentences) |
|
print(embeddings) |
|
|
|
``` |
|
|
|
|
|
## Evaluation |
|
|
|
|
|
|
|
|
|
|
|
|
|
## Citation |
|
|
|
|
|
@article{reimers2019sentence, |
|
title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}, |
|
author={Nils Reimers, Iryna Gurevych}, |
|
journal={https://arxiv.org/abs/1908.10084}, |
|
year={2019} |
|
} |
|
|
|
@article{zhang2024mgte, |
|
title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval}, |
|
author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others}, |
|
journal={arXiv preprint arXiv:2407.19669}, |
|
year={2024} |
|
} |
|
|
|
@article{li2023towards, |
|
title={Towards general text embeddings with multi-stage contrastive learning}, |
|
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan}, |
|
journal={arXiv preprint arXiv:2308.03281}, |
|
year={2023} |
|
} |
|
|
|
@article{li20242d, |
|
title={2d matryoshka sentence embeddings}, |
|
author={Li, Xianming and Li, Zongxi and Li, Jing and Xie, Haoran and Li, Qing}, |
|
journal={arXiv preprint arXiv:2402.14776}, |
|
year={2024} |
|
} |
|
|
|
@misc{henderson2017efficient, |
|
title={Efficient Natural Language Response Suggestion for Smart Reply}, |
|
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil}, |
|
year={2017}, |
|
eprint={1705.00652}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CL} |
|
} |
|
|
|
@misc{solatorio2024gistembed, |
|
title={GISTEmbed: Guided In-sample Selection of Training Negatives for Text Embedding Fine-tuning}, |
|
author={Aivin V. Solatorio}, |
|
year={2024}, |
|
eprint={2402.16829}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.LG} |
|
} |