diff --git "a/EXTERNAL_MODEL_RESULTS.json" "b/EXTERNAL_MODEL_RESULTS.json" --- "a/EXTERNAL_MODEL_RESULTS.json" +++ "b/EXTERNAL_MODEL_RESULTS.json" @@ -960,190 +960,6 @@ ] } }, - "all-MiniLM-L6-v2": { - "BitextMining": { - "f1": [ - { - "Model": "all-MiniLM-L6-v2", - "BornholmBitextMining": 29.68 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "all-MiniLM-L6-v2", - "AmazonCounterfactualClassification (en)": 64.15, - "AmazonPolarityClassification": 62.58, - "AmazonReviewsClassification (en)": 31.79, - "AngryTweetsClassification": 42.49, - "Banking77Classification": 79.75, - "DKHateClassification": 55.05, - "DanishPoliticalCommentsClassification": 26.96, - "EmotionClassification": 38.43, - "ImdbClassification": 60.66, - "LccSentimentClassification": 38.47, - "MTOPDomainClassification (en)": 91.56, - "MTOPIntentClassification (en)": 62.18, - "MasakhaNEWSClassification (fra)": 74.05, - "MassiveIntentClassification (en)": 67.4, - "MassiveIntentClassification (da)": 40.99, - "MassiveIntentClassification (nb)": 39.34, - "MassiveIntentClassification (sv)": 38.1, - "MassiveScenarioClassification (en)": 75.76, - "MassiveScenarioClassification (da)": 47.01, - "MassiveScenarioClassification (nb)": 44.67, - "MassiveScenarioClassification (sv)": 42.93, - "NoRecClassification": 40.02, - "NordicLangClassification": 54.71, - "NorwegianParliament": 54.8, - "ScalaDaClassification": 50.03, - "ScalaNbClassification": 50.17, - "ToxicConversationsClassification": 66.99, - "TweetSentimentExtractionClassification": 55.41 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "all-MiniLM-L6-v2", - "AlloProfClusteringP2P": 51.83, - "AlloProfClusteringS2S": 32.07, - "ArxivClusteringP2P": 46.55, - "ArxivClusteringS2S": 37.86, - "BiorxivClusteringP2P": 38.48, - "BiorxivClusteringS2S": 33.17, - "HALClusteringS2S": 18.84, - "MLSUMClusteringP2P": 36.74, - "MLSUMClusteringS2S": 28.12, - "MasakhaNEWSClusteringP2P (fra)": 34.92, - "MasakhaNEWSClusteringS2S (fra)": 40.58, - "MedrxivClusteringP2P": 34.41, - "MedrxivClusteringS2S": 32.29, - "RedditClustering": 50.67, - "RedditClusteringP2P": 54.15, - "StackExchangeClustering": 53.36, - "StackExchangeClusteringP2P": 38.0, - "TwentyNewsgroupsClustering": 46.86 - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "all-MiniLM-L6-v2", - "OpusparcusPC (fr)": 86.53, - "PawsX (fr)": 55.4, - "SprintDuplicateQuestions": 94.55, - "TwitterSemEval2015": 67.86, - "TwitterURLCorpus": 84.7 - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "all-MiniLM-L6-v2", - "AlloprofReranking": 31.69, - "AskUbuntuDupQuestions": 63.48, - "MindSmallReranking": 30.8, - "SciDocsRR": 87.12, - "StackOverflowDupQuestions": 50.76, - "SyntecReranking": 59.57 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "all-MiniLM-L6-v2", - "AlloprofRetrieval": 28.41, - "ArguAna": 50.17, - "BSARDRetrieval": 0.0, - "CQADupstackRetrieval": 41.32, - "ClimateFEVER": 20.27, - "DBPedia": 32.33, - "FEVER": 51.93, - "FiQA2018": 36.87, - "HotpotQA": 46.51, - "MSMARCO": 36.54, - "MintakaRetrieval (fr)": 9.19, - "NFCorpus": 31.59, - "NQ": 43.87, - "QuoraRetrieval": 87.56, - "SCIDOCS": 21.64, - "SciFact": 64.51, - "SyntecRetrieval": 60.15, - "TRECCOVID": 47.25, - "Touche2020": 16.9, - "XPQARetrieval (fr)": 51.79 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "all-MiniLM-L6-v2", - "BIOSSES": 81.64, - "SICK-R": 77.58, - "SICKFr": 62.48, - "STS12": 72.37, - "STS13": 80.6, - "STS14": 75.59, - "STS15": 85.39, - "STS16": 78.99, - "STS17 (ar-ar)": 50.89, - "STS17 (en-ar)": -4.28, - "STS17 (en-de)": 35.82, - "STS17 (en-en)": 87.59, - "STS17 (en-tr)": 4.5, - "STS17 (es-en)": 16.31, - "STS17 (es-es)": 76.12, - "STS17 (fr-en)": 37.09, - "STS17 (it-en)": 24.45, - "STS17 (ko-ko)": 43.39, - "STS17 (nl-en)": 29.0, - "STS22 (ar)": 22.64, - "STS22 (de)": 31.04, - "STS22 (de-en)": 44.04, - "STS22 (de-fr)": 30.07, - "STS22 (de-pl)": 4.93, - "STS22 (en)": 67.21, - "STS22 (es)": 54.78, - "STS22 (es-en)": 53.42, - "STS22 (es-it)": 44.27, - "STS22 (fr)": 77.0, - "STS22 (fr-pl)": 50.71, - "STS22 (it)": 60.4, - "STS22 (pl)": 26.77, - "STS22 (pl-en)": 32.8, - "STS22 (ru)": 14.72, - "STS22 (tr)": 33.69, - "STS22 (zh)": 44.93, - "STS22 (zh-en)": 41.64, - "STSBenchmark": 82.03, - "STSBenchmarkMultilingualSTS (fr)": 64.93 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "all-MiniLM-L6-v2", - "SummEval": 30.81, - "SummEvalFr": 28.28 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "all-MiniLM-L6-v2" - } - ] - } - }, "bge-small-zh-v1.5": { "BitextMining": { "f1": [ @@ -5400,184 +5216,62 @@ ] } }, - "text-embedding-3-small": { + "DanskBERT": { "BitextMining": { "f1": [ { - "Model": "text-embedding-3-small" + "Model": "DanskBERT", + "BornholmBitextMining": 6.34 } ] }, "Classification": { "accuracy": [ { - "Model": "text-embedding-3-small", - "AmazonCounterfactualClassification (en)": 76.42, - "AmazonPolarityClassification": 90.84, - "AmazonReviewsClassification (en)": 45.73, - "Banking77Classification": 83.01, - "EmotionClassification": 50.63, - "ImdbClassification": 83.66, - "MTOPDomainClassification (en)": 93.91, - "MTOPIntentClassification (en)": 70.98, - "MassiveIntentClassification (en)": 72.86, - "MassiveScenarioClassification (en)": 76.84, - "ToxicConversationsClassification": 71.91, - "TweetSentimentExtractionClassification": 61.72 + "Model": "DanskBERT", + "AngryTweetsClassification": 54.28, + "DKHateClassification": 59.3, + "DanishPoliticalCommentsClassification": 39.81, + "LccSentimentClassification": 58.0, + "MassiveIntentClassification (da)": 54.68, + "MassiveIntentClassification (nb)": 45.38, + "MassiveIntentClassification (sv)": 40.82, + "MassiveScenarioClassification (da)": 59.56, + "MassiveScenarioClassification (nb)": 47.55, + "MassiveScenarioClassification (sv)": 40.14, + "NoRecClassification": 46.06, + "NordicLangClassification": 74.25, + "NorwegianParliament": 56.79, + "ScalaDaClassification": 66.59, + "ScalaNbClassification": 59.99 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-embedding-3-small", - "ArxivClusteringP2P": 46.57, - "ArxivClusteringS2S": 39.35, - "BiorxivClusteringP2P": 37.77, - "BiorxivClusteringS2S": 34.68, - "MedrxivClusteringP2P": 32.77, - "MedrxivClusteringS2S": 31.85, - "RedditClustering": 64.09, - "RedditClusteringP2P": 65.12, - "StackExchangeClustering": 72.05, - "StackExchangeClusteringP2P": 34.04, - "TwentyNewsgroupsClustering": 54.81 + "Model": "DanskBERT" } ] }, "PairClassification": { "ap": [ { - "Model": "text-embedding-3-small", - "OpusparcusPC (fr)": 94.45, - "SprintDuplicateQuestions": 94.58, - "TwitterSemEval2015": 73.33, - "TwitterURLCorpus": 87.21 + "Model": "DanskBERT" } ] }, "Reranking": { "map": [ { - "Model": "text-embedding-3-small", - "AskUbuntuDupQuestions": 62.18, - "MindSmallReranking": 29.93, - "SciDocsRR": 83.25, - "StackOverflowDupQuestions": 51.53 + "Model": "DanskBERT" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-embedding-3-small", - "ArguAna": 55.49, - "CQADupstackRetrieval": 42.58, - "ClimateFEVER": 26.86, - "DBPedia": 39.97, - "FEVER": 79.42, - "FiQA2018": 44.91, - "HotpotQA": 63.63, - "MSMARCO": 37.02, - "NFCorpus": 38.33, - "NQ": 52.86, - "QuoraRetrieval": 88.83, - "SCIDOCS": 20.8, - "SciFact": 73.37, - "TRECCOVID": 77.9, - "Touche2020": 24.28 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "text-embedding-3-small", - "BIOSSES": 88.72, - "SICK-R": 76.73, - "STS12": 73.09, - "STS13": 84.92, - "STS14": 79.81, - "STS15": 88.01, - "STS16": 84.41, - "STS17 (en-en)": 90.94, - "STS22 (en)": 64.96, - "STSBenchmark": 84.24 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "text-embedding-3-small", - "SummEval": 31.12 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "text-embedding-3-small" - } - ] - } - }, - "DanskBERT": { - "BitextMining": { - "f1": [ - { - "Model": "DanskBERT", - "BornholmBitextMining": 6.34 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "DanskBERT", - "AngryTweetsClassification": 54.28, - "DKHateClassification": 59.3, - "DanishPoliticalCommentsClassification": 39.81, - "LccSentimentClassification": 58.0, - "MassiveIntentClassification (da)": 54.68, - "MassiveIntentClassification (nb)": 45.38, - "MassiveIntentClassification (sv)": 40.82, - "MassiveScenarioClassification (da)": 59.56, - "MassiveScenarioClassification (nb)": 47.55, - "MassiveScenarioClassification (sv)": 40.14, - "NoRecClassification": 46.06, - "NordicLangClassification": 74.25, - "NorwegianParliament": 56.79, - "ScalaDaClassification": 66.59, - "ScalaNbClassification": 59.99 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "DanskBERT" - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "DanskBERT" - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "DanskBERT" - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "DanskBERT" + "Model": "DanskBERT" } ] }, @@ -9598,280 +9292,98 @@ ] } }, - "text-embedding-ada-002": { + "multilingual-e5-large": { "BitextMining": { "f1": [ { - "Model": "text-embedding-ada-002" + "Model": "multilingual-e5-large", + "BornholmBitextMining": 44.16 } ] }, "Classification": { "accuracy": [ { - "Model": "text-embedding-ada-002", - "AmazonCounterfactualClassification (en)": 75.94, - "AmazonPolarityClassification": 86.72, - "AmazonReviewsClassification (zh)": 38.3, - "AmazonReviewsClassification (en)": 44.78, - "AmazonReviewsClassification (fr)": 43.76, - "Banking77Classification": 80.66, - "EmotionClassification": 48.74, - "IFlyTek": 44.62, - "ImdbClassification": 77.98, - "JDReview": 74.6, - "MTOPDomainClassification (en)": 92.13, - "MTOPDomainClassification (fr)": 89.38, - "MTOPIntentClassification (en)": 64.68, - "MTOPIntentClassification (fr)": 64.45, - "MasakhaNEWSClassification (fra)": 81.52, - "MassiveIntentClassification (zh-CN)": 64.81, - "MassiveIntentClassification (en)": 70.15, - "MassiveIntentClassification (fr)": 65.42, - "MassiveScenarioClassification (zh-CN)": 71.4, - "MassiveScenarioClassification (en)": 75.33, - "MassiveScenarioClassification (fr)": 71.11, - "MultilingualSentiment": 67.99, - "OnlineShopping": 88.94, - "TNews": 45.77, - "ToxicConversationsClassification": 72.29, - "TweetSentimentExtractionClassification": 61.81, - "Waimai": 82.37 + "Model": "multilingual-e5-large", + "AllegroReviews": 41.14, + "AmazonReviewsClassification (fr)": 41.91, + "AngryTweetsClassification": 54.95, + "CBD": 69.9, + "DKHateClassification": 66.02, + "DanishPoliticalCommentsClassification": 38.27, + "IFlyTek": 45.47, + "JDReview": 80.99, + "LccSentimentClassification": 59.6, + "MTOPDomainClassification (fr)": 86.41, + "MTOPIntentClassification (fr)": 59.43, + "MasakhaNEWSClassification (fra)": 79.38, + "MassiveIntentClassification (da)": 60.16, + "MassiveIntentClassification (nb)": 59.83, + "MassiveIntentClassification (sv)": 61.78, + "MassiveIntentClassification (pl)": 65.07, + "MassiveScenarioClassification (da)": 67.46, + "MassiveScenarioClassification (nb)": 66.18, + "MassiveScenarioClassification (sv)": 69.15, + "MassiveScenarioClassification (pl)": 69.82, + "MultilingualSentiment": 68.58, + "NoRecClassification": 62.76, + "NordicLangClassification": 82.29, + "NorwegianParliament": 60.36, + "OnlineShopping": 90.81, + "PAC": 70.37, + "PolEmo2.0-IN": 77.06, + "PolEmo2.0-OUT": 53.38, + "ScalaDaClassification": 50.77, + "ScalaNbClassification": 50.44, + "TNews": 48.38, + "Waimai": 85.02 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-embedding-ada-002", - "AlloProfClusteringP2P": 64.83, - "AlloProfClusteringS2S": 53.52, - "ArxivClusteringP2P": 45.01, - "ArxivClusteringS2S": 36.85, - "BiorxivClusteringP2P": 36.66, - "BiorxivClusteringS2S": 34.21, - "CLSClusteringP2P": 38.26, - "CLSClusteringS2S": 35.91, - "HALClusteringS2S": 26.18, - "MLSUMClusteringP2P": 44.59, - "MLSUMClusteringS2S": 41.67, - "MasakhaNEWSClusteringP2P (fra)": 68.35, - "MasakhaNEWSClusteringS2S (fra)": 48.58, - "MedrxivClusteringP2P": 32.6, - "MedrxivClusteringS2S": 30.8, - "RedditClustering": 61.42, - "RedditClusteringP2P": 64.13, - "StackExchangeClustering": 72.22, - "StackExchangeClusteringP2P": 38.49, - "ThuNewsClusteringP2P": 58.71, - "ThuNewsClusteringS2S": 49.86, - "TwentyNewsgroupsClustering": 52.56 + "Model": "multilingual-e5-large", + "8TagsClustering": 33.88, + "AlloProfClusteringP2P": 62.99, + "AlloProfClusteringS2S": 32.26, + "CLSClusteringP2P": 40.68, + "CLSClusteringS2S": 38.59, + "HALClusteringS2S": 22.44, + "MLSUMClusteringP2P": 44.04, + "MLSUMClusteringS2S": 37.65, + "MasakhaNEWSClusteringP2P (fra)": 40.94, + "MasakhaNEWSClusteringS2S (fra)": 30.56, + "ThuNewsClusteringP2P": 58.05, + "ThuNewsClusteringS2S": 55.59 } ] }, "PairClassification": { "ap": [ { - "Model": "text-embedding-ada-002", - "Cmnli": 76.03, - "Ocnli": 63.08, - "OpusparcusPC (fr)": 94.12, - "PawsX (fr)": 60.16, - "SprintDuplicateQuestions": 92.17, - "TwitterSemEval2015": 75.28, - "TwitterURLCorpus": 87.22 + "Model": "multilingual-e5-large", + "CDSC-E": 74.47, + "Cmnli": 78.18, + "Ocnli": 61.6, + "OpusparcusPC (fr)": 93.89, + "PPC": 92.18, + "PSC": 99.39, + "PawsX (fr)": 58.5, + "SICK-E-PL": 75.96 } ] }, "Reranking": { "map": [ { - "Model": "text-embedding-ada-002", - "AskUbuntuDupQuestions": 62.05, - "CMedQAv1": 63.08, - "CMedQAv2": 64.02, - "MMarcoReranking": 23.39, - "MindSmallReranking": 31.45, - "SciDocsRR": 81.22, - "StackOverflowDupQuestions": 50.54, - "SyntecReranking": 89.87, - "T2Reranking": 66.65 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "text-embedding-ada-002", - "AlloprofRetrieval": 51.64, - "ArguAna": 57.44, - "BSARDRetrieval": 0.61, - "CQADupstackRetrieval": 41.69, - "ClimateFEVER": 21.64, - "CmedqaRetrieval": 22.36, - "CovidRetrieval": 57.21, - "DBPedia": 39.39, - "DuRetrieval": 71.17, - "EcomRetrieval": 44.49, - "FEVER": 74.99, - "FiQA2018": 44.41, - "HotpotQA": 60.9, - "MMarcoRetrieval": 69.86, - "MSMARCO": 40.91, - "MedicalRetrieval": 37.92, - "MintakaRetrieval (fr)": 29.94, - "NFCorpus": 36.97, - "NQ": 51.58, - "QuoraRetrieval": 87.6, - "SCIDOCS": 18.36, - "SciFact": 72.75, - "SyntecRetrieval": 85.97, - "T2Retrieval": 69.14, - "TRECCOVID": 68.47, - "Touche2020": 21.61, - "VideoRetrieval": 43.85, - "XPQARetrieval (fr)": 73.0 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "text-embedding-ada-002", - "AFQMC": 23.88, - "ATEC": 29.25, - "BIOSSES": 86.35, - "BQ": 45.33, - "LCQMC": 68.41, - "PAWSX": 16.55, - "QBQTC": 30.27, - "SICK-R": 80.6, - "SICKFr": 76.28, - "STS12": 69.8, - "STS13": 83.27, - "STS14": 76.09, - "STS15": 86.12, - "STS16": 85.96, - "STS17 (en-en)": 90.25, - "STS22 (zh)": 62.53, - "STS22 (en)": 68.12, - "STS22 (tr)": 64.5, - "STS22 (fr)": 81.09, - "STSB": 70.61, - "STSBenchmark": 83.17, - "STSBenchmarkMultilingualSTS (fr)": 77.55 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "text-embedding-ada-002", - "SummEval": 30.8, - "SummEvalFr": 30.5 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "text-embedding-ada-002" - } - ] - } - }, - "multilingual-e5-large": { - "BitextMining": { - "f1": [ - { - "Model": "multilingual-e5-large", - "BornholmBitextMining": 44.16 - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "multilingual-e5-large", - "AllegroReviews": 41.14, - "AmazonReviewsClassification (fr)": 41.91, - "AngryTweetsClassification": 54.95, - "CBD": 69.9, - "DKHateClassification": 66.02, - "DanishPoliticalCommentsClassification": 38.27, - "IFlyTek": 45.47, - "JDReview": 80.99, - "LccSentimentClassification": 59.6, - "MTOPDomainClassification (fr)": 86.41, - "MTOPIntentClassification (fr)": 59.43, - "MasakhaNEWSClassification (fra)": 79.38, - "MassiveIntentClassification (da)": 60.16, - "MassiveIntentClassification (nb)": 59.83, - "MassiveIntentClassification (sv)": 61.78, - "MassiveIntentClassification (pl)": 65.07, - "MassiveScenarioClassification (da)": 67.46, - "MassiveScenarioClassification (nb)": 66.18, - "MassiveScenarioClassification (sv)": 69.15, - "MassiveScenarioClassification (pl)": 69.82, - "MultilingualSentiment": 68.58, - "NoRecClassification": 62.76, - "NordicLangClassification": 82.29, - "NorwegianParliament": 60.36, - "OnlineShopping": 90.81, - "PAC": 70.37, - "PolEmo2.0-IN": 77.06, - "PolEmo2.0-OUT": 53.38, - "ScalaDaClassification": 50.77, - "ScalaNbClassification": 50.44, - "TNews": 48.38, - "Waimai": 85.02 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "multilingual-e5-large", - "8TagsClustering": 33.88, - "AlloProfClusteringP2P": 62.99, - "AlloProfClusteringS2S": 32.26, - "CLSClusteringP2P": 40.68, - "CLSClusteringS2S": 38.59, - "HALClusteringS2S": 22.44, - "MLSUMClusteringP2P": 44.04, - "MLSUMClusteringS2S": 37.65, - "MasakhaNEWSClusteringP2P (fra)": 40.94, - "MasakhaNEWSClusteringS2S (fra)": 30.56, - "ThuNewsClusteringP2P": 58.05, - "ThuNewsClusteringS2S": 55.59 - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "multilingual-e5-large", - "CDSC-E": 74.47, - "Cmnli": 78.18, - "Ocnli": 61.6, - "OpusparcusPC (fr)": 93.89, - "PPC": 92.18, - "PSC": 99.39, - "PawsX (fr)": 58.5, - "SICK-E-PL": 75.96 - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "multilingual-e5-large", - "AlloprofReranking": 57.37, - "CMedQAv1": 68.25, - "CMedQAv2": 68.56, - "MMarcoReranking": 21.34, - "SyntecReranking": 86.9, - "T2Reranking": 65.83 + "Model": "multilingual-e5-large", + "AlloprofReranking": 57.37, + "CMedQAv1": 68.25, + "CMedQAv2": 68.56, + "MMarcoReranking": 21.34, + "SyntecReranking": 86.9, + "T2Reranking": 65.83 } ] }, @@ -10421,297 +9933,11 @@ ] } }, - "all-MiniLM-L12-v2": { + "sentence-camembert-base": { "BitextMining": { "f1": [ { - "Model": "all-MiniLM-L12-v2" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "all-MiniLM-L12-v2", - "AmazonCounterfactualClassification (de)": 57.1, - "AmazonCounterfactualClassification (en)": 65.28, - "AmazonCounterfactualClassification (en-ext)": 67.24, - "AmazonCounterfactualClassification (ja)": 59.91, - "AmazonPolarityClassification": 62.98, - "AmazonReviewsClassification (de)": 25.91, - "AmazonReviewsClassification (en)": 30.79, - "AmazonReviewsClassification (es)": 27.63, - "AmazonReviewsClassification (fr)": 27.54, - "AmazonReviewsClassification (ja)": 23.57, - "AmazonReviewsClassification (zh)": 22.99, - "Banking77Classification": 80.4, - "EmotionClassification": 41.17, - "ImdbClassification": 59.76, - "MTOPDomainClassification (de)": 72.04, - "MTOPDomainClassification (en)": 91.9, - "MTOPDomainClassification (es)": 72.99, - "MTOPDomainClassification (fr)": 75.59, - "MTOPDomainClassification (hi)": 40.36, - "MTOPDomainClassification (th)": 17.1, - "MTOPIntentClassification (de)": 43.41, - "MTOPIntentClassification (en)": 62.84, - "MTOPIntentClassification (es)": 41.88, - "MTOPIntentClassification (fr)": 38.94, - "MTOPIntentClassification (hi)": 17.75, - "MTOPIntentClassification (th)": 5.63, - "MasakhaNEWSClassification (fra)": 72.2, - "MassiveIntentClassification (af)": 38.94, - "MassiveIntentClassification (am)": 2.45, - "MassiveIntentClassification (ar)": 20.94, - "MassiveIntentClassification (az)": 34.25, - "MassiveIntentClassification (bn)": 13.67, - "MassiveIntentClassification (cy)": 35.71, - "MassiveIntentClassification (da)": 44.43, - "MassiveIntentClassification (de)": 44.17, - "MassiveIntentClassification (el)": 28.7, - "MassiveIntentClassification (en)": 67.15, - "MassiveIntentClassification (es)": 40.91, - "MassiveIntentClassification (fa)": 23.52, - "MassiveIntentClassification (fi)": 39.27, - "MassiveIntentClassification (fr)": 44.82, - "MassiveIntentClassification (he)": 23.65, - "MassiveIntentClassification (hi)": 17.98, - "MassiveIntentClassification (hu)": 38.0, - "MassiveIntentClassification (hy)": 8.69, - "MassiveIntentClassification (id)": 39.66, - "MassiveIntentClassification (is)": 35.14, - "MassiveIntentClassification (it)": 43.17, - "MassiveIntentClassification (ja)": 30.94, - "MassiveIntentClassification (jv)": 36.69, - "MassiveIntentClassification (ka)": 9.17, - "MassiveIntentClassification (km)": 4.99, - "MassiveIntentClassification (kn)": 3.08, - "MassiveIntentClassification (ko)": 19.97, - "MassiveIntentClassification (lv)": 38.61, - "MassiveIntentClassification (ml)": 2.85, - "MassiveIntentClassification (mn)": 23.25, - "MassiveIntentClassification (ms)": 36.21, - "MassiveIntentClassification (my)": 4.38, - "MassiveIntentClassification (nb)": 41.91, - "MassiveIntentClassification (nl)": 41.85, - "MassiveIntentClassification (pl)": 37.63, - "MassiveIntentClassification (pt)": 45.12, - "MassiveIntentClassification (ro)": 41.71, - "MassiveIntentClassification (ru)": 26.33, - "MassiveIntentClassification (sl)": 38.52, - "MassiveIntentClassification (sq)": 41.62, - "MassiveIntentClassification (sv)": 40.42, - "MassiveIntentClassification (sw)": 35.28, - "MassiveIntentClassification (ta)": 13.1, - "MassiveIntentClassification (te)": 2.56, - "MassiveIntentClassification (th)": 10.54, - "MassiveIntentClassification (tl)": 38.56, - "MassiveIntentClassification (tr)": 35.9, - "MassiveIntentClassification (ur)": 16.18, - "MassiveIntentClassification (vi)": 37.38, - "MassiveIntentClassification (zh-CN)": 23.74, - "MassiveIntentClassification (zh-TW)": 22.39, - "MassiveScenarioClassification (af)": 45.71, - "MassiveScenarioClassification (am)": 7.41, - "MassiveScenarioClassification (ar)": 27.62, - "MassiveScenarioClassification (az)": 39.58, - "MassiveScenarioClassification (bn)": 18.98, - "MassiveScenarioClassification (cy)": 41.4, - "MassiveScenarioClassification (da)": 49.47, - "MassiveScenarioClassification (de)": 52.07, - "MassiveScenarioClassification (el)": 35.51, - "MassiveScenarioClassification (en)": 74.58, - "MassiveScenarioClassification (es)": 50.74, - "MassiveScenarioClassification (fa)": 29.0, - "MassiveScenarioClassification (fi)": 45.8, - "MassiveScenarioClassification (fr)": 53.76, - "MassiveScenarioClassification (he)": 25.68, - "MassiveScenarioClassification (hi)": 23.02, - "MassiveScenarioClassification (hu)": 44.09, - "MassiveScenarioClassification (hy)": 14.83, - "MassiveScenarioClassification (id)": 44.35, - "MassiveScenarioClassification (is)": 43.08, - "MassiveScenarioClassification (it)": 51.71, - "MassiveScenarioClassification (ja)": 36.75, - "MassiveScenarioClassification (jv)": 44.57, - "MassiveScenarioClassification (ka)": 14.84, - "MassiveScenarioClassification (km)": 9.75, - "MassiveScenarioClassification (kn)": 8.32, - "MassiveScenarioClassification (ko)": 25.72, - "MassiveScenarioClassification (lv)": 42.75, - "MassiveScenarioClassification (ml)": 7.25, - "MassiveScenarioClassification (mn)": 29.03, - "MassiveScenarioClassification (ms)": 44.65, - "MassiveScenarioClassification (my)": 10.07, - "MassiveScenarioClassification (nb)": 47.36, - "MassiveScenarioClassification (nl)": 49.15, - "MassiveScenarioClassification (pl)": 44.72, - "MassiveScenarioClassification (pt)": 53.0, - "MassiveScenarioClassification (ro)": 49.97, - "MassiveScenarioClassification (ru)": 28.75, - "MassiveScenarioClassification (sl)": 42.26, - "MassiveScenarioClassification (sq)": 49.14, - "MassiveScenarioClassification (sv)": 46.83, - "MassiveScenarioClassification (sw)": 43.18, - "MassiveScenarioClassification (ta)": 19.38, - "MassiveScenarioClassification (te)": 7.74, - "MassiveScenarioClassification (th)": 18.32, - "MassiveScenarioClassification (tl)": 48.31, - "MassiveScenarioClassification (tr)": 41.79, - "MassiveScenarioClassification (ur)": 24.46, - "MassiveScenarioClassification (vi)": 40.94, - "MassiveScenarioClassification (zh-CN)": 33.18, - "MassiveScenarioClassification (zh-TW)": 31.16, - "ToxicConversationsClassification": 67.47, - "TweetSentimentExtractionClassification": 54.25 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "all-MiniLM-L12-v2", - "AlloProfClusteringP2P": 46.03, - "AlloProfClusteringS2S": 31.83, - "ArxivClusteringP2P": 46.07, - "ArxivClusteringS2S": 37.5, - "BiorxivClusteringP2P": 36.99, - "BiorxivClusteringS2S": 33.21, - "HALClusteringS2S": 19.58, - "MLSUMClusteringP2P": 34.35, - "MLSUMClusteringS2S": 29.3, - "MasakhaNEWSClusteringP2P (fra)": 42.72, - "MasakhaNEWSClusteringS2S (fra)": 32.47, - "MedrxivClusteringP2P": 34.25, - "MedrxivClusteringS2S": 32.24, - "RedditClustering": 51.18, - "RedditClusteringP2P": 54.8, - "StackExchangeClustering": 53.05, - "StackExchangeClusteringP2P": 33.13, - "TwentyNewsgroupsClustering": 47.47 - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "all-MiniLM-L12-v2", - "OpusparcusPC (fr)": 87.35, - "PawsX (fr)": 55.53, - "SprintDuplicateQuestions": 92.45, - "TwitterSemEval2015": 70.02, - "TwitterURLCorpus": 84.77 - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "all-MiniLM-L12-v2", - "AlloprofReranking": 45.73, - "AskUbuntuDupQuestions": 64.06, - "MindSmallReranking": 31.02, - "SciDocsRR": 87.2, - "StackOverflowDupQuestions": 51.47, - "SyntecReranking": 68.33 - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "all-MiniLM-L12-v2", - "AlloprofRetrieval": 33.2, - "ArguAna": 47.13, - "BSARDRetrieval": 0.0, - "CQADupstackRetrieval": 42.53, - "ClimateFEVER": 21.57, - "DBPedia": 33.35, - "FEVER": 55.9, - "FiQA2018": 37.27, - "HotpotQA": 44.59, - "MSMARCO": 39.03, - "MintakaRetrieval (fr)": 16.08, - "NFCorpus": 32.25, - "NQ": 46.47, - "QuoraRetrieval": 87.75, - "SCIDOCS": 21.82, - "SciFact": 62.64, - "SyntecRetrieval": 60.8, - "TRECCOVID": 50.82, - "Touche2020": 17.22, - "XPQARetrieval (fr)": 55.9 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "all-MiniLM-L12-v2", - "BIOSSES": 83.57, - "SICK-R": 79.32, - "SICKFr": 63.16, - "STS12": 73.08, - "STS13": 82.13, - "STS14": 76.73, - "STS15": 85.58, - "STS16": 80.23, - "STS17 (ar-ar)": 58.71, - "STS17 (en-ar)": 0.54, - "STS17 (en-de)": 27.54, - "STS17 (en-en)": 88.63, - "STS17 (en-tr)": 0.43, - "STS17 (es-en)": 22.01, - "STS17 (es-es)": 78.37, - "STS17 (fr-en)": 30.7, - "STS17 (it-en)": 24.28, - "STS17 (ko-ko)": 43.37, - "STS17 (nl-en)": 24.51, - "STS22 (ar)": 17.54, - "STS22 (de)": 22.53, - "STS22 (de-en)": 42.86, - "STS22 (de-fr)": 43.52, - "STS22 (de-pl)": 1.63, - "STS22 (en)": 65.67, - "STS22 (es)": 43.98, - "STS22 (es-en)": 53.99, - "STS22 (es-it)": 40.71, - "STS22 (fr)": 69.51, - "STS22 (fr-pl)": 16.9, - "STS22 (it)": 47.48, - "STS22 (pl)": 19.22, - "STS22 (pl-en)": 42.67, - "STS22 (ru)": 11.19, - "STS22 (tr)": 21.6, - "STS22 (zh)": 33.15, - "STS22 (zh-en)": 44.39, - "STSBenchmark": 83.09, - "STSBenchmarkMultilingualSTS (fr)": 66.68 - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "all-MiniLM-L12-v2", - "SummEval": 27.9, - "SummEvalFr": 26.63 - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "all-MiniLM-L12-v2" - } - ] - } - }, - "sentence-camembert-base": { - "BitextMining": { - "f1": [ - { - "Model": "sentence-camembert-base" + "Model": "sentence-camembert-base" } ] }, @@ -15468,77 +14694,6 @@ ] } }, - "bge-m3": { - "BitextMining": { - "f1": [ - { - "Model": "bge-m3" - } - ] - }, - "Classification": { - "accuracy": [ - { - "Model": "bge-m3" - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "bge-m3" - } - ] - }, - "PairClassification": { - "ap": [ - { - "Model": "bge-m3" - } - ] - }, - "Reranking": { - "map": [ - { - "Model": "bge-m3" - } - ] - }, - "Retrieval": { - "ndcg_at_10": [ - { - "Model": "bge-m3", - "LEMBNarrativeQARetrieval": 45.76, - "LEMBNeedleRetrieval": 40.25, - "LEMBPasskeyRetrieval": 46.0, - "LEMBQMSumRetrieval": 35.54, - "LEMBSummScreenFDRetrieval": 94.09, - "LEMBWikimQARetrieval": 77.73 - } - ] - }, - "STS": { - "spearman": [ - { - "Model": "bge-m3" - } - ] - }, - "Summarization": { - "spearman": [ - { - "Model": "bge-m3" - } - ] - }, - "InstructionRetrieval": { - "p-MRR": [ - { - "Model": "bge-m3" - } - ] - } - }, "jina-embeddings-v2-base-en": { "BitextMining": { "f1": [ @@ -16314,837 +15469,571 @@ ] } }, - "all-mpnet-base-v2": { + "gte-Qwen1.5-7B-instruct": { "BitextMining": { "f1": [ { - "Model": "all-mpnet-base-v2", - "BornholmBitextMining (dan-Latn)": 27.44, - "Tatoeba (pol-Latn_eng-Latn)": 4.09, - "Tatoeba (ita-Latn_eng-Latn)": 11.1, - "Tatoeba (cat-Latn_eng-Latn)": 9.44, - "Tatoeba (aze-Latn_eng-Latn)": 1.49, - "Tatoeba (eus-Latn_eng-Latn)": 3.94, - "Tatoeba (epo-Latn_eng-Latn)": 7.15, - "Tatoeba (lit-Latn_eng-Latn)": 1.02, - "Tatoeba (ast-Latn_eng-Latn)": 9.78, - "Tatoeba (bul-Cyrl_eng-Latn)": 0.35, - "Tatoeba (ceb-Latn_eng-Latn)": 4.41, - "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0, - "Tatoeba (tzl-Latn_eng-Latn)": 3.55, - "Tatoeba (zsm-Latn_eng-Latn)": 4.75, - "Tatoeba (mhr-Cyrl_eng-Latn)": 0.17, - "Tatoeba (pam-Latn_eng-Latn)": 4.32, - "Tatoeba (amh-Ethi_eng-Latn)": 0.0, - "Tatoeba (slv-Latn_eng-Latn)": 3.73, - "Tatoeba (lvs-Latn_eng-Latn)": 2.98, - "Tatoeba (sqi-Latn_eng-Latn)": 3.45, - "Tatoeba (orv-Cyrl_eng-Latn)": 0.0, - "Tatoeba (vie-Latn_eng-Latn)": 4.96, - "Tatoeba (pes-Arab_eng-Latn)": 0.2, - "Tatoeba (por-Latn_eng-Latn)": 10.48, - "Tatoeba (dtp-Latn_eng-Latn)": 3.54, - "Tatoeba (yid-Hebr_eng-Latn)": 0.08, - "Tatoeba (isl-Latn_eng-Latn)": 3.86, - "Tatoeba (cha-Latn_eng-Latn)": 12.2, - "Tatoeba (ron-Latn_eng-Latn)": 7.34, - "Tatoeba (hye-Armn_eng-Latn)": 0.14, - "Tatoeba (mar-Deva_eng-Latn)": 0.11, - "Tatoeba (hin-Deva_eng-Latn)": 0.02, - "Tatoeba (kor-Hang_eng-Latn)": 0.32, - "Tatoeba (srp-Cyrl_eng-Latn)": 1.89, - "Tatoeba (csb-Latn_eng-Latn)": 4.19, - "Tatoeba (jpn-Jpan_eng-Latn)": 1.71, - "Tatoeba (ber-Tfng_eng-Latn)": 4.56, - "Tatoeba (wuu-Hans_eng-Latn)": 0.91, - "Tatoeba (jav-Latn_eng-Latn)": 3.17, - "Tatoeba (nob-Latn_eng-Latn)": 4.37, - "Tatoeba (bre-Latn_eng-Latn)": 3.65, - "Tatoeba (kzj-Latn_eng-Latn)": 3.62, - "Tatoeba (urd-Arab_eng-Latn)": 0.0, - "Tatoeba (ces-Latn_eng-Latn)": 3.56, - "Tatoeba (cbk-Latn_eng-Latn)": 9.33, - "Tatoeba (gla-Latn_eng-Latn)": 2.04, - "Tatoeba (war-Latn_eng-Latn)": 5.14, - "Tatoeba (swh-Latn_eng-Latn)": 6.01, - "Tatoeba (swg-Latn_eng-Latn)": 7.86, - "Tatoeba (glg-Latn_eng-Latn)": 12.0, - "Tatoeba (fao-Latn_eng-Latn)": 7.08, - "Tatoeba (gsw-Latn_eng-Latn)": 10.67, - "Tatoeba (rus-Cyrl_eng-Latn)": 0.14, - "Tatoeba (kaz-Cyrl_eng-Latn)": 0.52, - "Tatoeba (gle-Latn_eng-Latn)": 2.19, - "Tatoeba (slk-Latn_eng-Latn)": 3.4, - "Tatoeba (nno-Latn_eng-Latn)": 5.75, - "Tatoeba (cor-Latn_eng-Latn)": 2.42, - "Tatoeba (nov-Latn_eng-Latn)": 16.61, - "Tatoeba (swe-Latn_eng-Latn)": 6.55, - "Tatoeba (max-Deva_eng-Latn)": 6.46, - "Tatoeba (oci-Latn_eng-Latn)": 8.57, - "Tatoeba (lfn-Latn_eng-Latn)": 6.1, - "Tatoeba (fra-Latn_eng-Latn)": 16.9, - "Tatoeba (ben-Beng_eng-Latn)": 0.0, - "Tatoeba (bel-Cyrl_eng-Latn)": 0.65, - "Tatoeba (lat-Latn_eng-Latn)": 5.78, - "Tatoeba (cmn-Hans_eng-Latn)": 2.22, - "Tatoeba (kat-Geor_eng-Latn)": 0.43, - "Tatoeba (bos-Latn_eng-Latn)": 4.6, - "Tatoeba (xho-Latn_eng-Latn)": 3.3, - "Tatoeba (tha-Thai_eng-Latn)": 0.0, - "Tatoeba (cym-Latn_eng-Latn)": 4.88, - "Tatoeba (deu-Latn_eng-Latn)": 11.46, - "Tatoeba (awa-Deva_eng-Latn)": 0.44, - "Tatoeba (ido-Latn_eng-Latn)": 9.84, - "Tatoeba (tat-Cyrl_eng-Latn)": 0.24, - "Tatoeba (kab-Latn_eng-Latn)": 1.31, - "Tatoeba (uzb-Latn_eng-Latn)": 1.98, - "Tatoeba (heb-Hebr_eng-Latn)": 0.28, - "Tatoeba (ara-Arab_eng-Latn)": 0.1, - "Tatoeba (fry-Latn_eng-Latn)": 12.43, - "Tatoeba (afr-Latn_eng-Latn)": 6.08, - "Tatoeba (kur-Latn_eng-Latn)": 3.65, - "Tatoeba (pms-Latn_eng-Latn)": 7.63, - "Tatoeba (ell-Grek_eng-Latn)": 0.0, - "Tatoeba (spa-Latn_eng-Latn)": 10.12, - "Tatoeba (dsb-Latn_eng-Latn)": 2.96, - "Tatoeba (uig-Arab_eng-Latn)": 0.33, - "Tatoeba (nld-Latn_eng-Latn)": 9.29, - "Tatoeba (tel-Telu_eng-Latn)": 0.73, - "Tatoeba (hrv-Latn_eng-Latn)": 3.77, - "Tatoeba (nds-Latn_eng-Latn)": 10.96, - "Tatoeba (hun-Latn_eng-Latn)": 3.23, - "Tatoeba (est-Latn_eng-Latn)": 2.35, - "Tatoeba (mal-Mlym_eng-Latn)": 0.15, - "Tatoeba (khm-Khmr_eng-Latn)": 0.28, - "Tatoeba (hsb-Latn_eng-Latn)": 3.12, - "Tatoeba (tgl-Latn_eng-Latn)": 4.06, - "Tatoeba (ang-Latn_eng-Latn)": 9.77, - "Tatoeba (tur-Latn_eng-Latn)": 3.16, - "Tatoeba (tuk-Latn_eng-Latn)": 2.23, - "Tatoeba (ile-Latn_eng-Latn)": 17.84, - "Tatoeba (mon-Cyrl_eng-Latn)": 0.81, - "Tatoeba (yue-Hant_eng-Latn)": 1.16, - "Tatoeba (ina-Latn_eng-Latn)": 22.55, - "Tatoeba (tam-Taml_eng-Latn)": 0.73, - "Tatoeba (ukr-Cyrl_eng-Latn)": 0.5, - "Tatoeba (dan-Latn_eng-Latn)": 10.01, - "Tatoeba (arq-Arab_eng-Latn)": 0.33, - "Tatoeba (arz-Arab_eng-Latn)": 0.0, - "Tatoeba (fin-Latn_eng-Latn)": 3.82, - "Tatoeba (ind-Latn_eng-Latn)": 4.88 + "Model": "gte-Qwen1.5-7B-instruct" } ] }, "Classification": { "accuracy": [ { - "Model": "all-mpnet-base-v2", - "AllegroReviews (pol-Latn)": 22.99, - "AmazonCounterfactualClassification (en-ext)": 67.5, - "AmazonCounterfactualClassification (en)": 65.03, - "AmazonCounterfactualClassification (deu-Latn)": 55.66, - "AmazonCounterfactualClassification (jpn-Jpan)": 60.69, - "AmazonPolarityClassification (default)": 67.14, - "AmazonReviewsClassification (en)": 31.44, - "AmazonReviewsClassification (deu-Latn)": 26.05, - "AmazonReviewsClassification (spa-Latn)": 27.73, - "AmazonReviewsClassification (fra-Latn)": 28.49, - "AmazonReviewsClassification (jpn-Jpan)": 23.65, - "AmazonReviewsClassification (cmn-Hans)": 23.62, - "AngryTweetsClassification (dan-Latn)": 44.13, - "Banking77Classification (default)": 81.7, - "CBD (pol-Latn)": 50.25, - "DanishPoliticalCommentsClassification (dan-Latn)": 28.31, - "EmotionClassification (default)": 42.22, - "IFlyTek (cmn-Hans)": 17.18, - "ImdbClassification (default)": 71.17, - "JDReview (cmn-Hans)": 60.19, - "LccSentimentClassification (dan-Latn)": 39.27, - "MTOPDomainClassification (en)": 91.89, - "MTOPDomainClassification (deu-Latn)": 71.86, - "MTOPDomainClassification (spa-Latn)": 71.3, - "MTOPDomainClassification (fra-Latn)": 74.88, - "MTOPDomainClassification (hin-Deva)": 39.93, - "MTOPDomainClassification (tha-Thai)": 17.54, - "MTOPIntentClassification (en)": 68.27, - "MTOPIntentClassification (deu-Latn)": 44.36, - "MTOPIntentClassification (spa-Latn)": 39.48, - "MTOPIntentClassification (fra-Latn)": 37.57, - "MTOPIntentClassification (hin-Deva)": 18.63, - "MTOPIntentClassification (tha-Thai)": 5.42, - "MasakhaNEWSClassification (amh-Ethi)": 36.49, - "MasakhaNEWSClassification (eng)": 79.75, - "MasakhaNEWSClassification (fra-Latn)": 77.77, - "MasakhaNEWSClassification (hau-Latn)": 59.22, - "MasakhaNEWSClassification (ibo-Latn)": 61.64, - "MasakhaNEWSClassification (lin-Latn)": 74.0, - "MasakhaNEWSClassification (lug-Latn)": 58.43, - "MasakhaNEWSClassification (orm-Ethi)": 48.15, - "MasakhaNEWSClassification (pcm-Latn)": 92.2, - "MasakhaNEWSClassification (run-Latn)": 64.72, - "MasakhaNEWSClassification (sna-Latn)": 73.69, - "MasakhaNEWSClassification (som-Latn)": 49.97, - "MasakhaNEWSClassification (swa-Latn)": 55.15, - "MasakhaNEWSClassification (tir-Ethi)": 27.46, - "MasakhaNEWSClassification (xho-Latn)": 60.98, - "MasakhaNEWSClassification (yor-Latn)": 63.33, - "MassiveIntentClassification (en)": 69.76, - "MassiveIntentClassification (jav-Latn)": 31.75, - "MassiveIntentClassification (fra-Latn)": 44.27, - "MassiveIntentClassification (msa-Latn)": 30.53, - "MassiveIntentClassification (hun-Latn)": 34.38, - "MassiveIntentClassification (pol-Latn)": 34.26, - "MassiveIntentClassification (nld-Latn)": 38.49, - "MassiveIntentClassification (tha-Thai)": 8.51, - "MassiveIntentClassification (tur-Latn)": 32.02, - "MassiveIntentClassification (tam-Taml)": 9.25, - "MassiveIntentClassification (hye-Armn)": 10.11, - "MassiveIntentClassification (khm-Khmr)": 4.74, - "MassiveIntentClassification (lav-Latn)": 35.08, - "MassiveIntentClassification (deu-Latn)": 44.54, - "MassiveIntentClassification (spa-Latn)": 39.75, - "MassiveIntentClassification (ben-Beng)": 12.35, - "MassiveIntentClassification (por-Latn)": 42.83, - "MassiveIntentClassification (ara-Arab)": 20.42, - "MassiveIntentClassification (cym-Latn)": 30.82, - "MassiveIntentClassification (dan-Latn)": 42.36, - "MassiveIntentClassification (mya-Mymr)": 4.6, - "MassiveIntentClassification (heb-Hebr)": 23.6, - "MassiveIntentClassification (kan-Knda)": 3.76, - "MassiveIntentClassification (swa-Latn)": 31.82, - "MassiveIntentClassification (fas-Arab)": 22.45, - "MassiveIntentClassification (hin-Deva)": 17.68, - "MassiveIntentClassification (kat-Geor)": 7.66, - "MassiveIntentClassification (mal-Mlym)": 2.64, - "MassiveIntentClassification (fin-Latn)": 34.58, - "MassiveIntentClassification (slv-Latn)": 34.49, - "MassiveIntentClassification (afr-Latn)": 36.49, - "MassiveIntentClassification (urd-Arab)": 12.86, - "MassiveIntentClassification (ron-Latn)": 38.07, - "MassiveIntentClassification (sqi-Latn)": 37.26, - "MassiveIntentClassification (cmo-Hant)": 22.43, - "MassiveIntentClassification (ita-Latn)": 40.29, - "MassiveIntentClassification (ind-Latn)": 36.31, - "MassiveIntentClassification (nob-Latn)": 39.3, - "MassiveIntentClassification (jpn-Jpan)": 33.13, - "MassiveIntentClassification (aze-Latn)": 28.92, - "MassiveIntentClassification (mon-Cyrl)": 19.65, - "MassiveIntentClassification (ell-Grek)": 24.52, - "MassiveIntentClassification (rus-Cyrl)": 23.98, - "MassiveIntentClassification (kor-Kore)": 13.35, - "MassiveIntentClassification (cmo-Hans)": 24.36, - "MassiveIntentClassification (isl-Latn)": 31.46, - "MassiveIntentClassification (swe-Latn)": 39.02, - "MassiveIntentClassification (tel-Telu)": 2.26, - "MassiveIntentClassification (vie-Latn)": 31.47, - "MassiveIntentClassification (tgl-Latn)": 36.33, - "MassiveIntentClassification (amh-Ethi)": 2.39, - "MassiveScenarioClassification (en)": 75.67, - "MassiveScenarioClassification (tur-Latn)": 39.11, - "MassiveScenarioClassification (kat-Geor)": 13.45, - "MassiveScenarioClassification (jpn-Jpan)": 40.57, - "MassiveScenarioClassification (spa-Latn)": 50.92, - "MassiveScenarioClassification (fas-Arab)": 27.8, - "MassiveScenarioClassification (hun-Latn)": 41.01, - "MassiveScenarioClassification (jav-Latn)": 40.0, - "MassiveScenarioClassification (por-Latn)": 52.06, - "MassiveScenarioClassification (sqi-Latn)": 44.67, - "MassiveScenarioClassification (lav-Latn)": 39.28, - "MassiveScenarioClassification (deu-Latn)": 54.09, - "MassiveScenarioClassification (nld-Latn)": 47.79, - "MassiveScenarioClassification (mon-Cyrl)": 25.58, - "MassiveScenarioClassification (swa-Latn)": 40.34, - "MassiveScenarioClassification (ben-Beng)": 17.49, - "MassiveScenarioClassification (cym-Latn)": 34.82, - "MassiveScenarioClassification (swe-Latn)": 44.53, - "MassiveScenarioClassification (rus-Cyrl)": 28.71, - "MassiveScenarioClassification (fra-Latn)": 54.26, - "MassiveScenarioClassification (dan-Latn)": 49.45, - "MassiveScenarioClassification (mya-Mymr)": 10.8, - "MassiveScenarioClassification (ron-Latn)": 47.86, - "MassiveScenarioClassification (cmo-Hans)": 35.33, - "MassiveScenarioClassification (hin-Deva)": 23.13, - "MassiveScenarioClassification (cmo-Hant)": 31.7, - "MassiveScenarioClassification (afr-Latn)": 43.63, - "MassiveScenarioClassification (aze-Latn)": 36.42, - "MassiveScenarioClassification (msa-Latn)": 37.28, - "MassiveScenarioClassification (ell-Grek)": 33.85, - "MassiveScenarioClassification (isl-Latn)": 39.36, - "MassiveScenarioClassification (fin-Latn)": 38.41, - "MassiveScenarioClassification (ind-Latn)": 43.05, - "MassiveScenarioClassification (pol-Latn)": 42.66, - "MassiveScenarioClassification (tam-Taml)": 14.55, - "MassiveScenarioClassification (ita-Latn)": 51.37, - "MassiveScenarioClassification (urd-Arab)": 20.0, - "MassiveScenarioClassification (kan-Knda)": 8.34, - "MassiveScenarioClassification (tel-Telu)": 7.81, - "MassiveScenarioClassification (mal-Mlym)": 7.69, - "MassiveScenarioClassification (ara-Arab)": 27.8, - "MassiveScenarioClassification (kor-Kore)": 17.28, - "MassiveScenarioClassification (vie-Latn)": 35.9, - "MassiveScenarioClassification (amh-Ethi)": 7.43, - "MassiveScenarioClassification (heb-Hebr)": 25.49, - "MassiveScenarioClassification (hye-Armn)": 16.86, - "MassiveScenarioClassification (khm-Khmr)": 9.63, - "MassiveScenarioClassification (slv-Latn)": 39.88, - "MassiveScenarioClassification (tgl-Latn)": 47.04, - "MassiveScenarioClassification (nob-Latn)": 45.75, - "MassiveScenarioClassification (tha-Thai)": 17.01, - "MultilingualSentiment (cmn-Hans)": 41.2, - "NoRecClassification (nob-Latn)": 38.34, - "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 50.15, - "OnlineShopping (cmn-Hans)": 56.94, - "PAC (pol-Latn)": 62.1, - "PolEmo2.0-IN (pol-Latn)": 41.63, - "PolEmo2.0-OUT (pol-Latn)": 25.0, - "TNews (cmn-Hans)": 21.05, - "ToxicConversationsClassification (default)": 61.05, - "TweetSentimentExtractionClassification (default)": 55.05, - "Waimai (cmn-Hans)": 63.31 - } - ] - }, - "Clustering": { - "v_measure": [ - { - "Model": "all-mpnet-base-v2", - "ArxivClusteringP2P": 48.38, - "ArxivClusteringS2S": 39.72, - "BiorxivClusteringP2P": 39.62, - "BiorxivClusteringS2S": 35.02, - "MasakhaNEWSClusteringP2P (amh-Ethi)": 42.49, - "MasakhaNEWSClusteringP2P (eng)": 67.24, - "MasakhaNEWSClusteringP2P (fra-Latn)": 61.99, - "MasakhaNEWSClusteringP2P (hau-Latn)": 37.17, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 52.0, - "MasakhaNEWSClusteringP2P (lin-Latn)": 69.68, - "MasakhaNEWSClusteringP2P (lug-Latn)": 50.96, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 28.42, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 64.01, - "MasakhaNEWSClusteringP2P (run-Latn)": 57.6, - "MasakhaNEWSClusteringP2P (sna-Latn)": 54.99, - "MasakhaNEWSClusteringP2P (som-Latn)": 31.16, - "MasakhaNEWSClusteringP2P (swa-Latn)": 28.29, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 41.85, - "MasakhaNEWSClusteringP2P (xho-Latn)": 35.24, - "MasakhaNEWSClusteringP2P (yor-Latn)": 42.15, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.48, - "MasakhaNEWSClusteringS2S (eng)": 35.69, - "MasakhaNEWSClusteringS2S (fra-Latn)": 41.05, - "MasakhaNEWSClusteringS2S (hau-Latn)": 16.64, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 38.63, - "MasakhaNEWSClusteringS2S (lin-Latn)": 70.72, - "MasakhaNEWSClusteringS2S (lug-Latn)": 46.97, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 23.85, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 68.7, - "MasakhaNEWSClusteringS2S (run-Latn)": 52.27, - "MasakhaNEWSClusteringS2S (sna-Latn)": 47.64, - "MasakhaNEWSClusteringS2S (som-Latn)": 30.94, - "MasakhaNEWSClusteringS2S (swa-Latn)": 17.12, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 42.01, - "MasakhaNEWSClusteringS2S (xho-Latn)": 24.16, - "MasakhaNEWSClusteringS2S (yor-Latn)": 35.04, - "MedrxivClusteringP2P": 35.58, - "MedrxivClusteringS2S": 32.87, - "RedditClustering": 54.82, - "RedditClusteringP2P": 56.77, - "StackExchangeClustering": 53.8, - "StackExchangeClusteringP2P": 34.28, - "TwentyNewsgroupsClustering": 49.74 + "Model": "gte-Qwen1.5-7B-instruct", + "AmazonCounterfactualClassification (en)": 83.16, + "AmazonPolarityClassification": 96.7, + "AmazonReviewsClassification (en)": 62.17, + "AmazonReviewsClassification (zh)": 52.95, + "Banking77Classification": 81.68, + "EmotionClassification": 54.53, + "IFlyTek": 53.77, + "ImdbClassification": 95.58, + "JDReview": 88.2, + "MTOPDomainClassification (en)": 95.75, + "MTOPIntentClassification (en)": 84.26, + "MassiveIntentClassification (zh-CN)": 76.25, + "MassiveIntentClassification (en)": 78.47, + "MassiveScenarioClassification (en)": 78.19, + "MassiveScenarioClassification (zh-CN)": 77.26, + "MultilingualSentiment": 77.42, + "OnlineShopping": 94.48, + "TNews": 51.24, + "ToxicConversationsClassification": 78.75, + "TweetSentimentExtractionClassification": 66.0, + "Waimai": 88.63 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "gte-Qwen1.5-7B-instruct", + "ArxivClusteringP2P": 56.4, + "ArxivClusteringS2S": 51.45, + "BiorxivClusteringP2P": 49.01, + "BiorxivClusteringS2S": 45.06, + "CLSClusteringP2P": 47.21, + "CLSClusteringS2S": 45.79, + "MedrxivClusteringP2P": 44.37, + "MedrxivClusteringS2S": 42.0, + "RedditClustering": 73.37, + "RedditClusteringP2P": 72.51, + "StackExchangeClustering": 79.07, + "StackExchangeClusteringP2P": 49.57, + "ThuNewsClusteringP2P": 87.43, + "ThuNewsClusteringS2S": 87.9, + "TwentyNewsgroupsClustering": 51.31 } ] }, "PairClassification": { "ap": [ { - "Model": "all-mpnet-base-v2", - "CDSC-E (pol-Latn)": 45.37, - "OpusparcusPC (deu-Latn)": 89.78, - "OpusparcusPC (en)": 97.75, - "OpusparcusPC (fin-Latn)": 85.82, - "OpusparcusPC (fra-Latn)": 86.61, - "OpusparcusPC (rus-Cyrl)": 79.85, - "OpusparcusPC (swe-Latn)": 81.81, - "PSC (pol-Latn)": 83.28, - "PawsXPairClassification (deu-Latn)": 52.17, - "PawsXPairClassification (en)": 61.99, - "PawsXPairClassification (spa-Latn)": 55.06, - "PawsXPairClassification (fra-Latn)": 56.42, - "PawsXPairClassification (jpn-Hira)": 47.43, - "PawsXPairClassification (kor-Hang)": 49.75, - "PawsXPairClassification (cmn-Hans)": 52.47, - "SICK-E-PL (pol-Latn)": 46.51, - "SprintDuplicateQuestions (default)": 90.15, - "TwitterSemEval2015 (default)": 73.85, - "TwitterURLCorpus (default)": 85.11 + "Model": "gte-Qwen1.5-7B-instruct", + "Cmnli": 91.81, + "Ocnli": 85.22, + "SprintDuplicateQuestions": 95.99, + "TwitterSemEval2015": 79.36, + "TwitterURLCorpus": 86.79 } ] }, "Reranking": { "map": [ { - "Model": "all-mpnet-base-v2", - "AlloprofReranking (fra-Latn)": 69.63, - "AskUbuntuDupQuestions (default)": 65.85, - "MMarcoReranking (cmn-Hans)": 4.65, - "MindSmallReranking (default)": 30.97, - "SciDocsRR (default)": 88.65, - "StackOverflowDupQuestions (default)": 51.98, - "SyntecReranking (fra-Latn)": 66.12, - "T2Reranking (cmn-Hans)": 58.3 + "Model": "gte-Qwen1.5-7B-instruct", + "AskUbuntuDupQuestions": 66.0, + "CMedQAv1": 86.37, + "CMedQAv2": 87.41, + "MindSmallReranking": 32.71, + "SciDocsRR": 87.89, + "StackOverflowDupQuestions": 53.93, + "T2Reranking": 68.11 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "all-mpnet-base-v2", - "AILACasedocs (default)": 22.51, - "AILAStatutes (default)": 21.27, - "AlloprofRetrieval (fra-Latn)": 34.27, - "ArguAna": 46.52, - "ArguAna (default)": 46.52, - "ArguAna-PL (pol-Latn)": 14.72, - "BSARDRetrieval (fra-Latn)": 6.98, - "BrightRetrieval (robotics)": 8.36, - "BrightRetrieval (psychology)": 22.63, - "BrightRetrieval (leetcode)": 26.4, - "BrightRetrieval (biology)": 15.52, - "BrightRetrieval (theoremqa_questions)": 18.49, - "BrightRetrieval (economics)": 16.64, - "BrightRetrieval (stackoverflow)": 9.48, - "BrightRetrieval (pony)": 6.95, - "BrightRetrieval (earth_science)": 20.11, - "BrightRetrieval (theoremqa_theorems)": 12.38, - "BrightRetrieval (sustainable_living)": 15.34, - "BrightRetrieval (aops)": 5.32, - "CQADupstackRetrieval": 44.96, - "ClimateFEVER": 21.97, - "CmedqaRetrieval (cmn-Hans)": 2.0, - "CovidRetrieval (cmn-Hans)": 3.7, - "DBPedia": 32.09, - "DuRetrieval (cmn-Hans)": 4.92, - "EcomRetrieval (cmn-Hans)": 3.94, - "FEVER": 50.86, - "FiQA-PL (pol-Latn)": 3.6, - "FiQA2018": 49.96, - "FiQA2018 (default)": 49.96, - "GerDaLIRSmall (deu-Latn)": 3.78, - "HotpotQA": 39.29, - "LEMBNarrativeQARetrieval (default)": 19.34, - "LEMBNeedleRetrieval": 16.0, - "LEMBPasskeyRetrieval": 24.5, - "LEMBQMSumRetrieval (default)": 21.54, - "LEMBSummScreenFDRetrieval (default)": 60.43, - "LEMBWikimQARetrieval (default)": 44.92, - "LeCaRDv2 (zho-Hans)": 18.09, - "LegalBenchConsumerContractsQA (default)": 75.25, - "LegalBenchCorporateLobbying (default)": 89.04, - "LegalQuAD (deu-Latn)": 10.67, - "LegalSummarization (default)": 58.55, - "MMarcoRetrieval (cmn-Hans)": 7.13, - "MSMARCO": 39.75, - "MedicalRetrieval (cmn-Hans)": 1.71, - "MintakaRetrieval (ara-Arab)": 1.97, - "MintakaRetrieval (deu-Latn)": 17.21, - "MintakaRetrieval (spa-Latn)": 10.11, - "MintakaRetrieval (fra-Latn)": 12.93, - "MintakaRetrieval (hin-Deva)": 2.05, - "MintakaRetrieval (ita-Latn)": 5.63, - "MintakaRetrieval (jpn-Hira)": 6.72, - "MintakaRetrieval (por-Latn)": 8.05, - "NFCorpus": 33.29, - "NFCorpus (default)": 33.29, - "NFCorpus-PL (pol-Latn)": 8.77, - "NQ": 50.45, - "QuoraRetrieval": 87.46, - "SCIDOCS": 23.76, - "SCIDOCS (default)": 23.76, - "SCIDOCS-PL (pol-Latn)": 4.02, - "SciFact": 65.57, - "SciFact (default)": 65.57, - "SciFact-PL (pol-Latn)": 13.31, - "SyntecRetrieval (fra-Latn)": 57.39, - "T2Retrieval (cmn-Hans)": 2.98, - "TRECCOVID": 51.33, - "TRECCOVID (default)": 51.33, - "TRECCOVID-PL (pol-Latn)": 12.12, - "Touche2020": 19.93, - "Touche2020 (default)": 19.93, - "VideoRetrieval (cmn-Hans)": 8.48, - "XPQARetrieval (ara-Arab_ara-Arab)": 9.42, - "XPQARetrieval (eng-Latn_ara-Arab)": 2.39, - "XPQARetrieval (ara-Arab_eng-Latn)": 8.98, - "XPQARetrieval (deu-Latn_deu-Latn)": 55.82, - "XPQARetrieval (eng-Latn_deu-Latn)": 11.74, - "XPQARetrieval (deu-Latn_eng-Latn)": 30.44, - "XPQARetrieval (spa-Latn_spa-Latn)": 40.01, - "XPQARetrieval (eng-Latn_spa-Latn)": 6.12, - "XPQARetrieval (spa-Latn_eng-Latn)": 29.44, - "XPQARetrieval (fra-Latn_fra-Latn)": 51.94, - "XPQARetrieval (eng-Latn_fra-Latn)": 11.48, - "XPQARetrieval (fra-Latn_eng-Latn)": 32.52, - "XPQARetrieval (hin-Deva_hin-Deva)": 37.48, - "XPQARetrieval (eng-Latn_hin-Deva)": 5.11, - "XPQARetrieval (hin-Deva_eng-Latn)": 7.37, - "XPQARetrieval (ita-Latn_ita-Latn)": 54.2, - "XPQARetrieval (eng-Latn_ita-Latn)": 6.08, - "XPQARetrieval (ita-Latn_eng-Latn)": 30.32, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 37.45, - "XPQARetrieval (eng-Latn_jpn-Hira)": 5.79, - "XPQARetrieval (jpn-Hira_eng-Latn)": 14.77, - "XPQARetrieval (kor-Hang_kor-Hang)": 10.4, - "XPQARetrieval (eng-Latn_kor-Hang)": 7.09, - "XPQARetrieval (kor-Hang_eng-Latn)": 6.95, - "XPQARetrieval (pol-Latn_pol-Latn)": 23.67, - "XPQARetrieval (eng-Latn_pol-Latn)": 8.83, - "XPQARetrieval (pol-Latn_eng-Latn)": 15.94, - "XPQARetrieval (por-Latn_por-Latn)": 33.56, - "XPQARetrieval (eng-Latn_por-Latn)": 3.76, - "XPQARetrieval (por-Latn_eng-Latn)": 23.45, - "XPQARetrieval (tam-Taml_tam-Taml)": 5.53, - "XPQARetrieval (eng-Latn_tam-Taml)": 3.3, - "XPQARetrieval (tam-Taml_eng-Latn)": 4.0, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 23.84, - "XPQARetrieval (eng-Latn_cmn-Hans)": 7.2, - "XPQARetrieval (cmn-Hans_eng-Latn)": 12.84 + "Model": "gte-Qwen1.5-7B-instruct", + "ArguAna": 62.65, + "BrightRetrieval (stackoverflow)": 19.85, + "BrightRetrieval (earth_science)": 36.22, + "BrightRetrieval (leetcode)": 25.46, + "BrightRetrieval (theoremqa_questions)": 26.97, + "BrightRetrieval (economics)": 17.72, + "BrightRetrieval (robotics)": 13.47, + "BrightRetrieval (pony)": 9.79, + "BrightRetrieval (aops)": 14.36, + "BrightRetrieval (psychology)": 24.61, + "BrightRetrieval (theoremqa_theorems)": 26.66, + "BrightRetrieval (biology)": 30.92, + "BrightRetrieval (sustainable_living)": 14.93, + "CQADupstackRetrieval": 40.64, + "ClimateFEVER": 44.0, + "CmedqaRetrieval": 43.47, + "CovidRetrieval": 80.87, + "DBPedia": 48.04, + "DuRetrieval": 86.01, + "EcomRetrieval": 66.46, + "FEVER": 93.35, + "FiQA2018": 55.31, + "HotpotQA": 72.25, + "MMarcoRetrieval": 73.83, + "MSMARCO": 41.68, + "MedicalRetrieval": 61.33, + "NFCorpus": 38.25, + "NQ": 61.79, + "QuoraRetrieval": 89.61, + "SCIDOCS": 27.69, + "SciFact": 75.31, + "T2Retrieval": 83.58, + "TRECCOVID": 72.72, + "Touche2020": 20.3, + "VideoRetrieval": 69.41 } ] }, "STS": { "spearman": [ { - "Model": "all-mpnet-base-v2", - "AFQMC (cmn-Hans)": 8.01, - "ATEC (cmn-Hans)": 14.03, - "BIOSSES (default)": 80.43, - "BQ (cmn-Hans)": 21.39, - "CDSC-R (pol-Latn)": 77.04, - "LCQMC (cmn-Hans)": 22.84, - "PAWSX (cmn-Hans)": 6.44, - "SICK-R (default)": 80.59, - "SICK-R-PL (pol-Latn)": 50.2, - "SICKFr (fra-Latn)": 67.05, - "STS12 (default)": 72.63, - "STS13 (default)": 83.48, - "STS14 (default)": 78.0, - "STS15 (default)": 85.66, - "STS16 (default)": 80.03, - "STS17 (en-en)": 90.6, - "STS17 (eng-Latn_ara-Arab)": 6.76, - "STS17 (fra-Latn_eng-Latn)": 41.64, - "STS17 (eng-Latn_tur-Latn)": -4.58, - "STS17 (eng-Latn_deu-Latn)": 35.5, - "STS17 (spa-Latn_eng-Latn)": 25.28, - "STS17 (ita-Latn_eng-Latn)": 31.8, - "STS17 (spa-Latn)": 78.4, - "STS17 (kor-Hang)": 39.11, - "STS17 (ara-Arab)": 55.42, - "STS17 (nld-Latn_eng-Latn)": 32.89, - "STS22 (en)": 68.39, - "STS22 (spa-Latn_eng-Latn)": 55.09, - "STS22 (deu-Latn_pol-Latn)": 23.53, - "STS22 (cmn-Hans_eng-Latn)": 40.47, - "STS22 (pol-Latn)": 24.21, - "STS22 (tur-Latn)": 29.35, - "STS22 (spa-Latn_ita-Latn)": 41.61, - "STS22 (fra-Latn_pol-Latn)": 73.25, - "STS22 (rus-Cyrl)": 15.83, - "STS22 (deu-Latn)": 27.0, - "STS22 (spa-Latn)": 55.98, - "STS22 (pol-Latn_eng-Latn)": 51.07, - "STS22 (fra-Latn)": 77.1, - "STS22 (deu-Latn_eng-Latn)": 49.73, - "STS22 (ara-Arab)": 38.96, - "STS22 (deu-Latn_fra-Latn)": 31.39, - "STS22 (ita-Latn)": 58.02, - "STS22 (cmn-Hans)": 42.24, - "STSB (cmn-Hans)": 37.7, - "STSBenchmark (default)": 83.42, - "STSBenchmarkMultilingualSTS (nld-Latn)": 57.01, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.54, - "STSBenchmarkMultilingualSTS (fra-Latn)": 65.15, - "STSBenchmarkMultilingualSTS (ita-Latn)": 62.72, - "STSBenchmarkMultilingualSTS (spa-Latn)": 65.78, - "STSBenchmarkMultilingualSTS (en)": 83.42, - "STSBenchmarkMultilingualSTS (deu-Latn)": 61.43, - "STSBenchmarkMultilingualSTS (por-Latn)": 62.12, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.43, - "STSBenchmarkMultilingualSTS (pol-Latn)": 52.36 + "Model": "gte-Qwen1.5-7B-instruct", + "AFQMC": 58.47, + "ATEC": 55.46, + "BIOSSES": 81.12, + "BQ": 77.59, + "LCQMC": 76.29, + "PAWSX": 50.22, + "QBQTC": 31.82, + "SICK-R": 79.15, + "STS12": 76.52, + "STS13": 88.63, + "STS14": 83.32, + "STS15": 87.5, + "STS16": 86.39, + "STS17 (en-en)": 87.79, + "STS22 (en)": 66.4, + "STS22 (zh)": 67.36, + "STSB": 81.37, + "STSBenchmark": 87.35 } ] }, "Summarization": { "spearman": [ { - "Model": "all-mpnet-base-v2", - "SummEval (default)": 27.49, - "SummEvalFr (fra-Latn)": 28.11 + "Model": "gte-Qwen1.5-7B-instruct", + "SummEval": 31.46 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "all-mpnet-base-v2" + "Model": "gte-Qwen1.5-7B-instruct" } ] } }, - "Cohere-embed-english-v3.0": { + "bm25": { "BitextMining": { "f1": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "bm25" } ] }, "Classification": { "accuracy": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "bm25" } ] }, "Clustering": { "v_measure": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "bm25" } ] }, "PairClassification": { "ap": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "bm25" } ] }, "Reranking": { "map": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "bm25" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "Cohere-embed-english-v3.0", - "AILACasedocs": 31.54, - "AILAStatutes": 27.15, - "BrightRetrieval (psychology)": 21.82, - "BrightRetrieval (economics)": 20.18, - "BrightRetrieval (robotics)": 16.21, - "BrightRetrieval (biology)": 18.98, - "BrightRetrieval (stackoverflow)": 16.47, - "BrightRetrieval (theoremqa_theorems)": 6.04, - "BrightRetrieval (pony)": 1.77, - "BrightRetrieval (sustainable_living)": 17.69, - "BrightRetrieval (aops)": 6.46, - "BrightRetrieval (theoremqa_questions)": 15.07, - "BrightRetrieval (leetcode)": 26.78, - "BrightRetrieval (earth_science)": 27.45, - "GerDaLIRSmall": 6.05, - "LeCaRDv2": 21.02, - "LegalBenchConsumerContractsQA": 77.12, - "LegalBenchCorporateLobbying": 93.68, - "LegalQuAD": 26.08, - "LegalSummarization": 61.7 + "Model": "bm25", + "BrightRetrieval (robotics)": 13.53, + "BrightRetrieval (pony)": 7.93, + "BrightRetrieval (leetcode)": 24.37, + "BrightRetrieval (earth_science)": 27.06, + "BrightRetrieval (stackoverflow)": 16.55, + "BrightRetrieval (economics)": 14.87, + "BrightRetrieval (theoremqa_questions)": 9.78, + "BrightRetrieval (theoremqa_theorems)": 4.25, + "BrightRetrieval (psychology)": 12.51, + "BrightRetrieval (sustainable_living)": 15.22, + "BrightRetrieval (biology)": 19.19, + "BrightRetrieval (aops)": 6.2 } ] }, "STS": { "spearman": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "bm25" } ] }, "Summarization": { "spearman": [ { - "Model": "Cohere-embed-english-v3.0" + "Model": "bm25" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "Cohere-embed-english-v3.0", - "Core17InstructionRetrieval": 2.8, - "News21InstructionRetrieval": 0.2, - "Robust04InstructionRetrieval": -3.63 + "Model": "bm25", + "Core17InstructionRetrieval": -1.06, + "News21InstructionRetrieval": -2.15, + "Robust04InstructionRetrieval": -3.06 } ] } }, - "bge-large-en-v1.5": { + "instructor-large": { "BitextMining": { "f1": [ { - "Model": "bge-large-en-v1.5" + "Model": "instructor-large" } ] }, "Classification": { "accuracy": [ { - "Model": "bge-large-en-v1.5" + "Model": "instructor-large" } ] }, "Clustering": { "v_measure": [ { - "Model": "bge-large-en-v1.5" + "Model": "instructor-large" } ] }, "PairClassification": { "ap": [ { - "Model": "bge-large-en-v1.5" + "Model": "instructor-large" } ] }, "Reranking": { "map": [ { - "Model": "bge-large-en-v1.5" + "Model": "instructor-large" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bge-large-en-v1.5", - "AILACasedocs": 25.15, - "AILAStatutes": 20.74, - "BrightRetrieval (stackoverflow)": 9.51, - "BrightRetrieval (earth_science)": 24.15, - "BrightRetrieval (aops)": 6.08, - "BrightRetrieval (sustainable_living)": 13.27, - "BrightRetrieval (psychology)": 17.44, - "BrightRetrieval (robotics)": 12.21, - "BrightRetrieval (theoremqa_theorems)": 5.51, - "BrightRetrieval (pony)": 5.64, - "BrightRetrieval (biology)": 11.96, - "BrightRetrieval (theoremqa_questions)": 12.56, - "BrightRetrieval (leetcode)": 26.68, - "BrightRetrieval (economics)": 16.59, - "GerDaLIRSmall": 3.96, - "LeCaRDv2": 22.68, - "LegalBenchConsumerContractsQA": 73.52, - "LegalBenchCorporateLobbying": 91.51, - "LegalQuAD": 16.22, - "LegalSummarization": 59.99 + "Model": "instructor-large", + "BrightRetrieval (pony)": 1.32, + "BrightRetrieval (sustainable_living)": 13.16, + "BrightRetrieval (aops)": 7.94, + "BrightRetrieval (biology)": 15.61, + "BrightRetrieval (stackoverflow)": 11.21, + "BrightRetrieval (theoremqa_theorems)": 9.29, + "BrightRetrieval (psychology)": 21.94, + "BrightRetrieval (economics)": 15.99, + "BrightRetrieval (robotics)": 11.45, + "BrightRetrieval (leetcode)": 20.0, + "BrightRetrieval (earth_science)": 21.52, + "BrightRetrieval (theoremqa_questions)": 20.07 } ] }, "STS": { "spearman": [ { - "Model": "bge-large-en-v1.5" + "Model": "instructor-large" } ] }, "Summarization": { "spearman": [ { - "Model": "bge-large-en-v1.5" + "Model": "instructor-large" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "bge-large-en-v1.5" + "Model": "instructor-large" } ] } }, - "GritLM-7B": { + "dragon-plus-instruct": { "BitextMining": { "f1": [ { - "Model": "GritLM-7B", - "BornholmBitextMining (dan-Latn)": 45.13, - "Tatoeba (csb-Latn_eng-Latn)": 50.13, - "Tatoeba (ceb-Latn_eng-Latn)": 33.5, - "Tatoeba (cmn-Hans_eng-Latn)": 94.08, - "Tatoeba (uzb-Latn_eng-Latn)": 41.69, - "Tatoeba (kur-Latn_eng-Latn)": 27.94, - "Tatoeba (ita-Latn_eng-Latn)": 91.2, - "Tatoeba (lvs-Latn_eng-Latn)": 53.54, - "Tatoeba (yid-Hebr_eng-Latn)": 17.13, - "Tatoeba (gle-Latn_eng-Latn)": 48.14, - "Tatoeba (ast-Latn_eng-Latn)": 79.11, - "Tatoeba (ang-Latn_eng-Latn)": 76.84, - "Tatoeba (jav-Latn_eng-Latn)": 26.6, - "Tatoeba (ina-Latn_eng-Latn)": 91.24, - "Tatoeba (nob-Latn_eng-Latn)": 93.53, - "Tatoeba (swe-Latn_eng-Latn)": 90.43, - "Tatoeba (lfn-Latn_eng-Latn)": 62.23, - "Tatoeba (fin-Latn_eng-Latn)": 85.76, - "Tatoeba (fry-Latn_eng-Latn)": 61.16, - "Tatoeba (gsw-Latn_eng-Latn)": 53.28, - "Tatoeba (rus-Cyrl_eng-Latn)": 91.82, - "Tatoeba (tat-Cyrl_eng-Latn)": 24.46, - "Tatoeba (mal-Mlym_eng-Latn)": 33.79, - "Tatoeba (hrv-Latn_eng-Latn)": 91.04, - "Tatoeba (ind-Latn_eng-Latn)": 90.05, - "Tatoeba (tam-Taml_eng-Latn)": 46.27, - "Tatoeba (kaz-Cyrl_eng-Latn)": 36.27, - "Tatoeba (uig-Arab_eng-Latn)": 22.6, - "Tatoeba (slv-Latn_eng-Latn)": 82.71, - "Tatoeba (pms-Latn_eng-Latn)": 50.41, - "Tatoeba (lit-Latn_eng-Latn)": 56.36, - "Tatoeba (cha-Latn_eng-Latn)": 34.69, - "Tatoeba (est-Latn_eng-Latn)": 46.73, - "Tatoeba (mhr-Cyrl_eng-Latn)": 10.8, - "Tatoeba (dan-Latn_eng-Latn)": 92.01, - "Tatoeba (pol-Latn_eng-Latn)": 95.6, - "Tatoeba (nov-Latn_eng-Latn)": 64.85, - "Tatoeba (swh-Latn_eng-Latn)": 46.09, - "Tatoeba (tha-Thai_eng-Latn)": 81.25, - "Tatoeba (arz-Arab_eng-Latn)": 52.97, - "Tatoeba (epo-Latn_eng-Latn)": 76.87, - "Tatoeba (deu-Latn_eng-Latn)": 98.02, - "Tatoeba (hye-Armn_eng-Latn)": 35.94, - "Tatoeba (afr-Latn_eng-Latn)": 79.17, - "Tatoeba (gla-Latn_eng-Latn)": 40.8, - "Tatoeba (isl-Latn_eng-Latn)": 74.94, - "Tatoeba (awa-Deva_eng-Latn)": 44.31, - "Tatoeba (ido-Latn_eng-Latn)": 65.69, - "Tatoeba (kor-Hang_eng-Latn)": 87.43, - "Tatoeba (amh-Ethi_eng-Latn)": 6.18, - "Tatoeba (eus-Latn_eng-Latn)": 31.88, - "Tatoeba (mkd-Cyrl_eng-Latn)": 73.82, - "Tatoeba (tur-Latn_eng-Latn)": 86.62, - "Tatoeba (pes-Arab_eng-Latn)": 78.98, - "Tatoeba (heb-Hebr_eng-Latn)": 61.75, - "Tatoeba (aze-Latn_eng-Latn)": 64.11, - "Tatoeba (hun-Latn_eng-Latn)": 88.54, - "Tatoeba (bul-Cyrl_eng-Latn)": 90.37, - "Tatoeba (kab-Latn_eng-Latn)": 2.9, - "Tatoeba (cat-Latn_eng-Latn)": 90.66, - "Tatoeba (dsb-Latn_eng-Latn)": 51.72, - "Tatoeba (kat-Geor_eng-Latn)": 38.42, - "Tatoeba (urd-Arab_eng-Latn)": 68.02, - "Tatoeba (wuu-Hans_eng-Latn)": 80.28, - "Tatoeba (oci-Latn_eng-Latn)": 58.12, - "Tatoeba (arq-Arab_eng-Latn)": 30.52, - "Tatoeba (ron-Latn_eng-Latn)": 90.29, - "Tatoeba (bos-Latn_eng-Latn)": 87.33, - "Tatoeba (nds-Latn_eng-Latn)": 64.54, - "Tatoeba (tgl-Latn_eng-Latn)": 83.24, - "Tatoeba (glg-Latn_eng-Latn)": 86.69, - "Tatoeba (ben-Beng_eng-Latn)": 61.32, - "Tatoeba (khm-Khmr_eng-Latn)": 16.4, - "Tatoeba (ukr-Cyrl_eng-Latn)": 90.19, - "Tatoeba (max-Deva_eng-Latn)": 51.87, - "Tatoeba (lat-Latn_eng-Latn)": 80.43, + "Model": "dragon-plus-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "dragon-plus-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "dragon-plus-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "dragon-plus-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "dragon-plus-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "dragon-plus-instruct", + "ARCChallenge": 8.24, + "AlphaNLI": 25.18, + "HellaSwag": 24.06, + "PIQA": 26.35, + "Quail": 4.2, + "RARbCode": 12.84, + "RARbMath": 36.15, + "SIQA": 1.75, + "SpartQA": 10.82, + "TempReasonL1": 1.54, + "TempReasonL2Fact": 16.11, + "TempReasonL2Pure": 0.57, + "TempReasonL3Fact": 14.81, + "TempReasonL3Pure": 7.46, + "WinoGrande": 60.84 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "dragon-plus-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "dragon-plus-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "dragon-plus-instruct" + } + ] + } + }, + "dragon-plus": { + "BitextMining": { + "f1": [ + { + "Model": "dragon-plus" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "dragon-plus" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "dragon-plus" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "dragon-plus" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "dragon-plus" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "dragon-plus", + "ARCChallenge": 8.91, + "AlphaNLI": 32.1, + "HellaSwag": 27.69, + "PIQA": 28.01, + "Quail": 4.09, + "RARbCode": 17.58, + "RARbMath": 45.09, + "SIQA": 2.0, + "SpartQA": 10.34, + "TempReasonL1": 1.82, + "TempReasonL2Fact": 17.45, + "TempReasonL2Pure": 0.55, + "TempReasonL3Fact": 15.71, + "TempReasonL3Pure": 7.97, + "WinoGrande": 67.18 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "dragon-plus" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "dragon-plus" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "dragon-plus" + } + ] + } + }, + "GritLM-7B": { + "BitextMining": { + "f1": [ + { + "Model": "GritLM-7B", + "BornholmBitextMining (dan-Latn)": 45.13, + "Tatoeba (csb-Latn_eng-Latn)": 50.13, + "Tatoeba (ceb-Latn_eng-Latn)": 33.5, + "Tatoeba (cmn-Hans_eng-Latn)": 94.08, + "Tatoeba (uzb-Latn_eng-Latn)": 41.69, + "Tatoeba (kur-Latn_eng-Latn)": 27.94, + "Tatoeba (ita-Latn_eng-Latn)": 91.2, + "Tatoeba (lvs-Latn_eng-Latn)": 53.54, + "Tatoeba (yid-Hebr_eng-Latn)": 17.13, + "Tatoeba (gle-Latn_eng-Latn)": 48.14, + "Tatoeba (ast-Latn_eng-Latn)": 79.11, + "Tatoeba (ang-Latn_eng-Latn)": 76.84, + "Tatoeba (jav-Latn_eng-Latn)": 26.6, + "Tatoeba (ina-Latn_eng-Latn)": 91.24, + "Tatoeba (nob-Latn_eng-Latn)": 93.53, + "Tatoeba (swe-Latn_eng-Latn)": 90.43, + "Tatoeba (lfn-Latn_eng-Latn)": 62.23, + "Tatoeba (fin-Latn_eng-Latn)": 85.76, + "Tatoeba (fry-Latn_eng-Latn)": 61.16, + "Tatoeba (gsw-Latn_eng-Latn)": 53.28, + "Tatoeba (rus-Cyrl_eng-Latn)": 91.82, + "Tatoeba (tat-Cyrl_eng-Latn)": 24.46, + "Tatoeba (mal-Mlym_eng-Latn)": 33.79, + "Tatoeba (hrv-Latn_eng-Latn)": 91.04, + "Tatoeba (ind-Latn_eng-Latn)": 90.05, + "Tatoeba (tam-Taml_eng-Latn)": 46.27, + "Tatoeba (kaz-Cyrl_eng-Latn)": 36.27, + "Tatoeba (uig-Arab_eng-Latn)": 22.6, + "Tatoeba (slv-Latn_eng-Latn)": 82.71, + "Tatoeba (pms-Latn_eng-Latn)": 50.41, + "Tatoeba (lit-Latn_eng-Latn)": 56.36, + "Tatoeba (cha-Latn_eng-Latn)": 34.69, + "Tatoeba (est-Latn_eng-Latn)": 46.73, + "Tatoeba (mhr-Cyrl_eng-Latn)": 10.8, + "Tatoeba (dan-Latn_eng-Latn)": 92.01, + "Tatoeba (pol-Latn_eng-Latn)": 95.6, + "Tatoeba (nov-Latn_eng-Latn)": 64.85, + "Tatoeba (swh-Latn_eng-Latn)": 46.09, + "Tatoeba (tha-Thai_eng-Latn)": 81.25, + "Tatoeba (arz-Arab_eng-Latn)": 52.97, + "Tatoeba (epo-Latn_eng-Latn)": 76.87, + "Tatoeba (deu-Latn_eng-Latn)": 98.02, + "Tatoeba (hye-Armn_eng-Latn)": 35.94, + "Tatoeba (afr-Latn_eng-Latn)": 79.17, + "Tatoeba (gla-Latn_eng-Latn)": 40.8, + "Tatoeba (isl-Latn_eng-Latn)": 74.94, + "Tatoeba (awa-Deva_eng-Latn)": 44.31, + "Tatoeba (ido-Latn_eng-Latn)": 65.69, + "Tatoeba (kor-Hang_eng-Latn)": 87.43, + "Tatoeba (amh-Ethi_eng-Latn)": 6.18, + "Tatoeba (eus-Latn_eng-Latn)": 31.88, + "Tatoeba (mkd-Cyrl_eng-Latn)": 73.82, + "Tatoeba (tur-Latn_eng-Latn)": 86.62, + "Tatoeba (pes-Arab_eng-Latn)": 78.98, + "Tatoeba (heb-Hebr_eng-Latn)": 61.75, + "Tatoeba (aze-Latn_eng-Latn)": 64.11, + "Tatoeba (hun-Latn_eng-Latn)": 88.54, + "Tatoeba (bul-Cyrl_eng-Latn)": 90.37, + "Tatoeba (kab-Latn_eng-Latn)": 2.9, + "Tatoeba (cat-Latn_eng-Latn)": 90.66, + "Tatoeba (dsb-Latn_eng-Latn)": 51.72, + "Tatoeba (kat-Geor_eng-Latn)": 38.42, + "Tatoeba (urd-Arab_eng-Latn)": 68.02, + "Tatoeba (wuu-Hans_eng-Latn)": 80.28, + "Tatoeba (oci-Latn_eng-Latn)": 58.12, + "Tatoeba (arq-Arab_eng-Latn)": 30.52, + "Tatoeba (ron-Latn_eng-Latn)": 90.29, + "Tatoeba (bos-Latn_eng-Latn)": 87.33, + "Tatoeba (nds-Latn_eng-Latn)": 64.54, + "Tatoeba (tgl-Latn_eng-Latn)": 83.24, + "Tatoeba (glg-Latn_eng-Latn)": 86.69, + "Tatoeba (ben-Beng_eng-Latn)": 61.32, + "Tatoeba (khm-Khmr_eng-Latn)": 16.4, + "Tatoeba (ukr-Cyrl_eng-Latn)": 90.19, + "Tatoeba (max-Deva_eng-Latn)": 51.87, + "Tatoeba (lat-Latn_eng-Latn)": 80.43, "Tatoeba (xho-Latn_eng-Latn)": 28.43, "Tatoeba (spa-Latn_eng-Latn)": 96.75, "Tatoeba (tzl-Latn_eng-Latn)": 42.85, @@ -17194,7 +16083,7 @@ "AmazonCounterfactualClassification (en)": 71.1, "AmazonCounterfactualClassification (deu-Latn)": 67.63, "AmazonCounterfactualClassification (jpn-Jpan)": 73.3, - "AmazonPolarityClassification (default)": 86.69, + "AmazonPolarityClassification": 86.69, "AmazonReviewsClassification (en)": 45.51, "AmazonReviewsClassification (deu-Latn)": 43.77, "AmazonReviewsClassification (spa-Latn)": 43.0, @@ -17202,12 +16091,12 @@ "AmazonReviewsClassification (jpn-Jpan)": 41.49, "AmazonReviewsClassification (cmn-Hans)": 35.34, "AngryTweetsClassification (dan-Latn)": 54.68, - "Banking77Classification (default)": 79.36, + "Banking77Classification": 79.36, "CBD (pol-Latn)": 70.98, "DanishPoliticalCommentsClassification (dan-Latn)": 37.69, - "EmotionClassification (default)": 48.79, + "EmotionClassification": 48.79, "IFlyTek (cmn-Hans)": 48.49, - "ImdbClassification (default)": 82.25, + "ImdbClassification": 82.25, "JDReview (cmn-Hans)": 84.02, "LccSentimentClassification (dan-Latn)": 57.2, "MTOPDomainClassification (en)": 92.67, @@ -17348,8 +16237,8 @@ "PolEmo2.0-IN (pol-Latn)": 66.07, "PolEmo2.0-OUT (pol-Latn)": 32.94, "TNews (cmn-Hans)": 49.94, - "ToxicConversationsClassification (default)": 63.9, - "TweetSentimentExtractionClassification (default)": 57.14, + "ToxicConversationsClassification": 63.9, + "TweetSentimentExtractionClassification": 57.14, "Waimai (cmn-Hans)": 84.92 } ] @@ -17357,992 +16246,4650 @@ "Clustering": { "v_measure": [ { - "Model": "GritLM-7B", - "MasakhaNEWSClusteringP2P (amh-Ethi)": 45.2, - "MasakhaNEWSClusteringP2P (eng)": 70.5, - "MasakhaNEWSClusteringP2P (fra-Latn)": 73.54, - "MasakhaNEWSClusteringP2P (hau-Latn)": 51.33, - "MasakhaNEWSClusteringP2P (ibo-Latn)": 66.75, - "MasakhaNEWSClusteringP2P (lin-Latn)": 59.57, - "MasakhaNEWSClusteringP2P (lug-Latn)": 58.93, - "MasakhaNEWSClusteringP2P (orm-Ethi)": 54.38, - "MasakhaNEWSClusteringP2P (pcm-Latn)": 92.67, - "MasakhaNEWSClusteringP2P (run-Latn)": 59.51, - "MasakhaNEWSClusteringP2P (sna-Latn)": 68.86, - "MasakhaNEWSClusteringP2P (som-Latn)": 41.42, - "MasakhaNEWSClusteringP2P (swa-Latn)": 33.61, - "MasakhaNEWSClusteringP2P (tir-Ethi)": 51.68, - "MasakhaNEWSClusteringP2P (xho-Latn)": 46.65, - "MasakhaNEWSClusteringP2P (yor-Latn)": 52.39, - "MasakhaNEWSClusteringS2S (amh-Ethi)": 43.39, - "MasakhaNEWSClusteringS2S (eng)": 65.85, - "MasakhaNEWSClusteringS2S (fra-Latn)": 68.87, - "MasakhaNEWSClusteringS2S (hau-Latn)": 33.02, - "MasakhaNEWSClusteringS2S (ibo-Latn)": 64.55, - "MasakhaNEWSClusteringS2S (lin-Latn)": 72.01, - "MasakhaNEWSClusteringS2S (lug-Latn)": 47.42, - "MasakhaNEWSClusteringS2S (orm-Ethi)": 32.59, - "MasakhaNEWSClusteringS2S (pcm-Latn)": 97.82, - "MasakhaNEWSClusteringS2S (run-Latn)": 59.41, - "MasakhaNEWSClusteringS2S (sna-Latn)": 71.58, - "MasakhaNEWSClusteringS2S (som-Latn)": 40.91, - "MasakhaNEWSClusteringS2S (swa-Latn)": 33.54, - "MasakhaNEWSClusteringS2S (tir-Ethi)": 45.32, - "MasakhaNEWSClusteringS2S (xho-Latn)": 28.94, - "MasakhaNEWSClusteringS2S (yor-Latn)": 63.26 + "Model": "GritLM-7B", + "MasakhaNEWSClusteringP2P (amh-Ethi)": 45.2, + "MasakhaNEWSClusteringP2P (eng)": 70.5, + "MasakhaNEWSClusteringP2P (fra-Latn)": 73.54, + "MasakhaNEWSClusteringP2P (hau-Latn)": 51.33, + "MasakhaNEWSClusteringP2P (ibo-Latn)": 66.75, + "MasakhaNEWSClusteringP2P (lin-Latn)": 59.57, + "MasakhaNEWSClusteringP2P (lug-Latn)": 58.93, + "MasakhaNEWSClusteringP2P (orm-Ethi)": 54.38, + "MasakhaNEWSClusteringP2P (pcm-Latn)": 92.67, + "MasakhaNEWSClusteringP2P (run-Latn)": 59.51, + "MasakhaNEWSClusteringP2P (sna-Latn)": 68.86, + "MasakhaNEWSClusteringP2P (som-Latn)": 41.42, + "MasakhaNEWSClusteringP2P (swa-Latn)": 33.61, + "MasakhaNEWSClusteringP2P (tir-Ethi)": 51.68, + "MasakhaNEWSClusteringP2P (xho-Latn)": 46.65, + "MasakhaNEWSClusteringP2P (yor-Latn)": 52.39, + "MasakhaNEWSClusteringS2S (amh-Ethi)": 43.39, + "MasakhaNEWSClusteringS2S (eng)": 65.85, + "MasakhaNEWSClusteringS2S (fra-Latn)": 68.87, + "MasakhaNEWSClusteringS2S (hau-Latn)": 33.02, + "MasakhaNEWSClusteringS2S (ibo-Latn)": 64.55, + "MasakhaNEWSClusteringS2S (lin-Latn)": 72.01, + "MasakhaNEWSClusteringS2S (lug-Latn)": 47.42, + "MasakhaNEWSClusteringS2S (orm-Ethi)": 32.59, + "MasakhaNEWSClusteringS2S (pcm-Latn)": 97.82, + "MasakhaNEWSClusteringS2S (run-Latn)": 59.41, + "MasakhaNEWSClusteringS2S (sna-Latn)": 71.58, + "MasakhaNEWSClusteringS2S (som-Latn)": 40.91, + "MasakhaNEWSClusteringS2S (swa-Latn)": 33.54, + "MasakhaNEWSClusteringS2S (tir-Ethi)": 45.32, + "MasakhaNEWSClusteringS2S (xho-Latn)": 28.94, + "MasakhaNEWSClusteringS2S (yor-Latn)": 63.26 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "GritLM-7B", + "CDSC-E (pol-Latn)": 72.65, + "OpusparcusPC (deu-Latn)": 96.65, + "OpusparcusPC (en)": 98.57, + "OpusparcusPC (fin-Latn)": 90.41, + "OpusparcusPC (fra-Latn)": 93.41, + "OpusparcusPC (rus-Cyrl)": 88.63, + "OpusparcusPC (swe-Latn)": 94.04, + "PSC (pol-Latn)": 99.43, + "PawsXPairClassification (deu-Latn)": 58.5, + "PawsXPairClassification (en)": 63.78, + "PawsXPairClassification (spa-Latn)": 59.15, + "PawsXPairClassification (fra-Latn)": 61.89, + "PawsXPairClassification (jpn-Hira)": 51.46, + "PawsXPairClassification (kor-Hang)": 52.15, + "PawsXPairClassification (cmn-Hans)": 57.66, + "SICK-E-PL (pol-Latn)": 75.98, + "SprintDuplicateQuestions": 93.06, + "TwitterSemEval2015": 71.24, + "TwitterURLCorpus": 84.54 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "GritLM-7B", + "AlloprofReranking (fra-Latn)": 77.95, + "AskUbuntuDupQuestions": 61.11, + "MMarcoReranking (cmn-Hans)": 21.7, + "MindSmallReranking": 31.53, + "SciDocsRR": 84.78, + "StackOverflowDupQuestions": 50.95, + "SyntecReranking (fra-Latn)": 83.32, + "T2Reranking (cmn-Hans)": 65.63 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "GritLM-7B", + "AILACasedocs": 35.31, + "AILAStatutes": 41.8, + "ARCChallenge": 26.68, + "AlloprofRetrieval (fra-Latn)": 55.42, + "AlphaNLI": 34.0, + "ArguAna": 63.17, + "ArguAna-PL (pol-Latn)": 48.89, + "BSARDRetrieval (fra-Latn)": 26.63, + "BrightRetrieval (pony)": 21.98, + "BrightRetrieval (robotics)": 17.31, + "BrightRetrieval (economics)": 19.0, + "BrightRetrieval (theoremqa_questions)": 23.34, + "BrightRetrieval (leetcode)": 29.85, + "BrightRetrieval (earth_science)": 32.77, + "BrightRetrieval (stackoverflow)": 11.62, + "BrightRetrieval (sustainable_living)": 18.04, + "BrightRetrieval (biology)": 25.04, + "BrightRetrieval (psychology)": 19.92, + "BrightRetrieval (theoremqa_theorems)": 17.41, + "BrightRetrieval (aops)": 8.91, + "CmedqaRetrieval (cmn-Hans)": 35.58, + "CovidRetrieval (cmn-Hans)": 73.47, + "DuRetrieval (cmn-Hans)": 88.18, + "EcomRetrieval (cmn-Hans)": 54.33, + "FiQA-PL (pol-Latn)": 38.04, + "FiQA2018": 59.91, + "GerDaLIRSmall (deu-Latn)": 20.61, + "HellaSwag": 39.45, + "LEMBNarrativeQARetrieval": 41.46, + "LEMBNeedleRetrieval": 33.25, + "LEMBPasskeyRetrieval": 38.25, + "LEMBQMSumRetrieval": 30.32, + "LEMBSummScreenFDRetrieval": 78.49, + "LEMBWikimQARetrieval": 60.8, + "LeCaRDv2 (zho-Hans)": 64.05, + "LegalBenchConsumerContractsQA": 82.1, + "LegalBenchCorporateLobbying": 95.0, + "LegalQuAD (deu-Latn)": 44.18, + "LegalSummarization": 70.64, + "MMarcoRetrieval (cmn-Hans)": 76.54, + "MedicalRetrieval (cmn-Hans)": 55.81, + "MintakaRetrieval (ara-Arab)": 25.88, + "MintakaRetrieval (deu-Latn)": 55.66, + "MintakaRetrieval (spa-Latn)": 53.36, + "MintakaRetrieval (fra-Latn)": 51.68, + "MintakaRetrieval (hin-Deva)": 26.06, + "MintakaRetrieval (ita-Latn)": 54.91, + "MintakaRetrieval (jpn-Hira)": 34.1, + "MintakaRetrieval (por-Latn)": 54.91, + "NFCorpus": 40.86, + "NFCorpus-PL (pol-Latn)": 32.88, + "PIQA": 44.35, + "Quail": 11.69, + "RARbCode": 84.0, + "RARbMath": 82.35, + "SCIDOCS": 24.4, + "SCIDOCS-PL (pol-Latn)": 18.39, + "SIQA": 7.23, + "SciFact": 79.13, + "SciFact-PL (pol-Latn)": 73.22, + "SpartQA": 9.29, + "SyntecRetrieval (fra-Latn)": 89.48, + "T2Retrieval (cmn-Hans)": 82.96, + "TRECCOVID": 74.36, + "TRECCOVID-PL (pol-Latn)": 58.01, + "TempReasonL1": 7.15, + "TempReasonL2Fact": 58.38, + "TempReasonL2Pure": 11.22, + "TempReasonL3Fact": 44.29, + "TempReasonL3Pure": 14.15, + "Touche2020": 27.81, + "VideoRetrieval (cmn-Hans)": 53.85, + "WinoGrande": 53.74, + "XPQARetrieval (ara-Arab_ara-Arab)": 45.21, + "XPQARetrieval (eng-Latn_ara-Arab)": 27.32, + "XPQARetrieval (ara-Arab_eng-Latn)": 39.43, + "XPQARetrieval (deu-Latn_deu-Latn)": 76.58, + "XPQARetrieval (eng-Latn_deu-Latn)": 55.44, + "XPQARetrieval (deu-Latn_eng-Latn)": 72.56, + "XPQARetrieval (spa-Latn_spa-Latn)": 64.55, + "XPQARetrieval (eng-Latn_spa-Latn)": 45.49, + "XPQARetrieval (spa-Latn_eng-Latn)": 61.03, + "XPQARetrieval (fra-Latn_fra-Latn)": 70.85, + "XPQARetrieval (eng-Latn_fra-Latn)": 48.14, + "XPQARetrieval (fra-Latn_eng-Latn)": 66.96, + "XPQARetrieval (hin-Deva_hin-Deva)": 74.75, + "XPQARetrieval (eng-Latn_hin-Deva)": 25.61, + "XPQARetrieval (hin-Deva_eng-Latn)": 63.9, + "XPQARetrieval (ita-Latn_ita-Latn)": 76.53, + "XPQARetrieval (eng-Latn_ita-Latn)": 46.88, + "XPQARetrieval (ita-Latn_eng-Latn)": 71.03, + "XPQARetrieval (jpn-Hira_jpn-Hira)": 72.27, + "XPQARetrieval (eng-Latn_jpn-Hira)": 41.94, + "XPQARetrieval (jpn-Hira_eng-Latn)": 69.42, + "XPQARetrieval (kor-Hang_kor-Hang)": 40.64, + "XPQARetrieval (eng-Latn_kor-Hang)": 32.68, + "XPQARetrieval (kor-Hang_eng-Latn)": 36.0, + "XPQARetrieval (pol-Latn_pol-Latn)": 50.74, + "XPQARetrieval (eng-Latn_pol-Latn)": 33.14, + "XPQARetrieval (pol-Latn_eng-Latn)": 48.06, + "XPQARetrieval (por-Latn_por-Latn)": 49.86, + "XPQARetrieval (eng-Latn_por-Latn)": 33.01, + "XPQARetrieval (por-Latn_eng-Latn)": 48.45, + "XPQARetrieval (tam-Taml_tam-Taml)": 41.78, + "XPQARetrieval (eng-Latn_tam-Taml)": 10.95, + "XPQARetrieval (tam-Taml_eng-Latn)": 21.28, + "XPQARetrieval (cmn-Hans_cmn-Hans)": 65.29, + "XPQARetrieval (eng-Latn_cmn-Hans)": 35.86, + "XPQARetrieval (cmn-Hans_eng-Latn)": 58.12 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "GritLM-7B", + "AFQMC (cmn-Hans)": 32.65, + "ATEC (cmn-Hans)": 37.34, + "BIOSSES": 85.01, + "BQ (cmn-Hans)": 38.03, + "CDSC-R (pol-Latn)": 92.23, + "LCQMC (cmn-Hans)": 71.38, + "PAWSX (cmn-Hans)": 16.4, + "SICK-R": 81.47, + "SICK-R-PL (pol-Latn)": 72.78, + "SICKFr (fra-Latn)": 76.91, + "STS12": 65.84, + "STS13": 78.37, + "STS14": 77.52, + "STS15": 85.43, + "STS16": 79.94, + "STS17 (ita-Latn_eng-Latn)": 88.42, + "STS17 (fra-Latn_eng-Latn)": 87.9, + "STS17 (kor-Hang)": 78.74, + "STS17 (en-en)": 90.12, + "STS17 (nld-Latn_eng-Latn)": 88.29, + "STS17 (ara-Arab)": 79.28, + "STS17 (eng-Latn_deu-Latn)": 88.92, + "STS17 (spa-Latn)": 87.12, + "STS17 (eng-Latn_tur-Latn)": 77.47, + "STS17 (spa-Latn_eng-Latn)": 87.47, + "STS17 (eng-Latn_ara-Arab)": 74.45, + "STS22 (spa-Latn_eng-Latn)": 80.76, + "STS22 (ara-Arab)": 55.45, + "STS22 (pol-Latn_eng-Latn)": 77.77, + "STS22 (deu-Latn_pol-Latn)": 55.09, + "STS22 (en)": 68.59, + "STS22 (rus-Cyrl)": 68.46, + "STS22 (deu-Latn_eng-Latn)": 62.33, + "STS22 (cmn-Hans)": 72.29, + "STS22 (pol-Latn)": 48.07, + "STS22 (fra-Latn)": 83.09, + "STS22 (cmn-Hans_eng-Latn)": 72.73, + "STS22 (deu-Latn_fra-Latn)": 62.14, + "STS22 (spa-Latn_ita-Latn)": 77.63, + "STS22 (fra-Latn_pol-Latn)": 84.52, + "STS22 (ita-Latn)": 77.58, + "STS22 (spa-Latn)": 72.24, + "STS22 (deu-Latn)": 59.34, + "STS22 (tur-Latn)": 70.83, + "STSB (cmn-Hans)": 74.11, + "STSBenchmark": 83.1, + "STSBenchmarkMultilingualSTS (spa-Latn)": 79.51, + "STSBenchmarkMultilingualSTS (ita-Latn)": 76.24, + "STSBenchmarkMultilingualSTS (por-Latn)": 76.61, + "STSBenchmarkMultilingualSTS (fra-Latn)": 77.48, + "STSBenchmarkMultilingualSTS (deu-Latn)": 77.57, + "STSBenchmarkMultilingualSTS (en)": 83.12, + "STSBenchmarkMultilingualSTS (nld-Latn)": 74.83, + "STSBenchmarkMultilingualSTS (pol-Latn)": 74.67, + "STSBenchmarkMultilingualSTS (cmn-Hans)": 75.27, + "STSBenchmarkMultilingualSTS (rus-Cyrl)": 76.19 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "GritLM-7B", + "SummEval": 30.26, + "SummEvalFr (fra-Latn)": 29.97 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "GritLM-7B", + "Core17InstructionRetrieval": 2.62, + "News21InstructionRetrieval": -1.01, + "Robust04InstructionRetrieval": -1.68 + } + ] + } + }, + "voyage-large-2-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "voyage-large-2-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "voyage-large-2-instruct", + "AmazonCounterfactualClassification (en)": 77.6, + "AmazonPolarityClassification": 96.58, + "AmazonReviewsClassification (en)": 50.77, + "Banking77Classification": 86.96, + "EmotionClassification": 59.81, + "ImdbClassification": 96.13, + "MTOPDomainClassification (en)": 98.86, + "MTOPIntentClassification (en)": 86.97, + "MassiveIntentClassification (en)": 81.08, + "MassiveScenarioClassification (en)": 87.95, + "ToxicConversationsClassification": 83.58, + "TweetSentimentExtractionClassification": 71.55 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "voyage-large-2-instruct", + "ArxivClusteringP2P": 51.81, + "ArxivClusteringS2S": 44.73, + "BiorxivClusteringP2P": 46.07, + "BiorxivClusteringS2S": 40.64, + "MedrxivClusteringP2P": 42.94, + "MedrxivClusteringS2S": 41.44, + "RedditClustering": 68.5, + "RedditClusteringP2P": 64.86, + "StackExchangeClustering": 74.16, + "StackExchangeClusteringP2P": 45.1, + "TwentyNewsgroupsClustering": 66.62 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "voyage-large-2-instruct", + "SprintDuplicateQuestions": 94.5, + "TwitterSemEval2015": 86.32, + "TwitterURLCorpus": 86.9 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "voyage-large-2-instruct", + "AskUbuntuDupQuestions": 64.92, + "MindSmallReranking": 30.97, + "SciDocsRR": 89.34, + "StackOverflowDupQuestions": 55.11 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "voyage-large-2-instruct", + "ArguAna": 64.06, + "BrightRetrieval (theoremqa_questions)": 26.06, + "BrightRetrieval (earth_science)": 25.09, + "BrightRetrieval (leetcode)": 30.6, + "BrightRetrieval (economics)": 19.85, + "BrightRetrieval (robotics)": 11.21, + "BrightRetrieval (psychology)": 24.79, + "BrightRetrieval (aops)": 7.45, + "BrightRetrieval (sustainable_living)": 15.58, + "BrightRetrieval (pony)": 1.48, + "BrightRetrieval (theoremqa_theorems)": 10.13, + "BrightRetrieval (biology)": 23.55, + "BrightRetrieval (stackoverflow)": 15.03, + "CQADupstackRetrieval": 46.6, + "ClimateFEVER": 32.65, + "DBPedia": 46.03, + "FEVER": 91.47, + "FiQA2018": 59.76, + "HotpotQA": 70.86, + "MSMARCO": 40.6, + "NFCorpus": 40.32, + "NQ": 65.92, + "QuoraRetrieval": 87.4, + "SCIDOCS": 24.32, + "SciFact": 79.99, + "TRECCOVID": 85.07, + "Touche2020": 39.16 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "voyage-large-2-instruct", + "BIOSSES": 89.24, + "SICK-R": 83.16, + "STS12": 73.34, + "STS13": 88.49, + "STS14": 86.49, + "STS15": 91.13, + "STS16": 85.68, + "STS17 (en-en)": 90.06, + "STS22 (en)": 66.32, + "STSBenchmark": 89.22 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "voyage-large-2-instruct", + "SummEval": 30.84 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "voyage-large-2-instruct" + } + ] + } + }, + "bge-m3": { + "BitextMining": { + "f1": [ + { + "Model": "bge-m3" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "bge-m3" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "bge-m3" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "bge-m3" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "bge-m3" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "bge-m3", + "ARCChallenge": 9.02, + "AlphaNLI": 24.73, + "HellaSwag": 25.67, + "LEMBNarrativeQARetrieval": 45.76, + "LEMBNeedleRetrieval": 40.25, + "LEMBPasskeyRetrieval": 46.0, + "LEMBQMSumRetrieval": 35.54, + "LEMBSummScreenFDRetrieval": 94.09, + "LEMBWikimQARetrieval": 77.73, + "PIQA": 22.93, + "Quail": 7.51, + "RARbCode": 38.8, + "RARbMath": 69.19, + "SIQA": 4.89, + "SpartQA": 7.49, + "TempReasonL1": 0.99, + "TempReasonL2Fact": 33.23, + "TempReasonL2Pure": 0.68, + "TempReasonL3Fact": 30.05, + "TempReasonL3Pure": 5.28, + "WinoGrande": 41.72 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "bge-m3" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "bge-m3" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "bge-m3" + } + ] + } + }, + "GritLM-7B-noinstruct": { + "BitextMining": { + "f1": [ + { + "Model": "GritLM-7B-noinstruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "GritLM-7B-noinstruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "GritLM-7B-noinstruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "GritLM-7B-noinstruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "GritLM-7B-noinstruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "GritLM-7B-noinstruct", + "ARCChallenge": 16.57, + "AlphaNLI": 29.56, + "HellaSwag": 36.03, + "PIQA": 35.8, + "Quail": 8.68, + "RARbCode": 83.14, + "RARbMath": 83.01, + "SIQA": 5.73, + "SpartQA": 1.56, + "TempReasonL1": 2.57, + "TempReasonL2Fact": 48.25, + "TempReasonL2Pure": 8.98, + "TempReasonL3Fact": 34.11, + "TempReasonL3Pure": 12.44, + "WinoGrande": 52.12 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "GritLM-7B-noinstruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "GritLM-7B-noinstruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "GritLM-7B-noinstruct" + } + ] + } + }, + "bge-small-en-v1.5": { + "BitextMining": { + "f1": [ + { + "Model": "bge-small-en-v1.5" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "bge-small-en-v1.5" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "bge-small-en-v1.5" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "bge-small-en-v1.5" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "bge-small-en-v1.5" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "bge-small-en-v1.5", + "ARCChallenge": 8.95, + "AlphaNLI": 11.64, + "HellaSwag": 25.44, + "PIQA": 23.92, + "Quail": 1.75, + "RARbCode": 42.36, + "RARbMath": 44.98, + "SIQA": 0.77, + "SpartQA": 3.55, + "TempReasonL1": 1.41, + "TempReasonL2Fact": 17.56, + "TempReasonL2Pure": 1.05, + "TempReasonL3Fact": 13.88, + "TempReasonL3Pure": 4.76, + "WinoGrande": 10.28 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "bge-small-en-v1.5" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "bge-small-en-v1.5" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "bge-small-en-v1.5" + } + ] + } + }, + "contriever-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "contriever-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "contriever-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "contriever-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "contriever-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "contriever-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "contriever-instruct", + "ARCChallenge": 7.63, + "AlphaNLI": 27.09, + "PIQA": 21.73, + "Quail": 4.92, + "RARbCode": 7.12, + "RARbMath": 21.83, + "SIQA": 0.88, + "SpartQA": 10.56, + "TempReasonL1": 1.8, + "TempReasonL2Fact": 22.03, + "TempReasonL2Pure": 0.94, + "TempReasonL3Fact": 20.82, + "TempReasonL3Pure": 7.15, + "WinoGrande": 26.3 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "contriever-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "contriever-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "contriever-instruct" + } + ] + } + }, + "bge-base-en-v1.5": { + "BitextMining": { + "f1": [ + { + "Model": "bge-base-en-v1.5" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "bge-base-en-v1.5" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "bge-base-en-v1.5", + "BiorxivClusteringP2P": 39.44, + "BiorxivClusteringS2S": 36.62, + "MedrxivClusteringP2P": 33.21, + "MedrxivClusteringS2S": 31.68, + "RedditClustering": 56.61, + "RedditClusteringP2P": 62.66, + "StackExchangeClustering": 66.11, + "StackExchangeClusteringP2P": 35.24, + "TwentyNewsgroupsClustering": 50.75 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "bge-base-en-v1.5" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "bge-base-en-v1.5" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "bge-base-en-v1.5", + "ARCChallenge": 9.66, + "AlphaNLI": 10.99, + "HellaSwag": 26.64, + "PIQA": 25.69, + "Quail": 1.42, + "RARbCode": 46.47, + "RARbMath": 46.86, + "SIQA": 0.94, + "SpartQA": 3.37, + "TempReasonL1": 1.07, + "TempReasonL2Fact": 17.23, + "TempReasonL2Pure": 1.29, + "TempReasonL3Fact": 13.36, + "TempReasonL3Pure": 5.2, + "WinoGrande": 13.76 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "bge-base-en-v1.5" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "bge-base-en-v1.5" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "bge-base-en-v1.5" + } + ] + } + }, + "e5-mistral-7b-instruct-noinstruct": { + "BitextMining": { + "f1": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct", + "ARCChallenge": 20.48, + "AlphaNLI": 18.88, + "HellaSwag": 32.25, + "PIQA": 32.8, + "Quail": 6.25, + "RARbCode": 79.84, + "RARbMath": 76.19, + "SIQA": 5.08, + "SpartQA": 10.87, + "TempReasonL1": 3.04, + "TempReasonL2Fact": 35.63, + "TempReasonL2Pure": 9.32, + "TempReasonL3Fact": 30.41, + "TempReasonL3Pure": 14.39, + "WinoGrande": 45.18 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "e5-mistral-7b-instruct-noinstruct" + } + ] + } + }, + "bge-large-en-v1.5-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "bge-large-en-v1.5-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "bge-large-en-v1.5-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "bge-large-en-v1.5-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "bge-large-en-v1.5-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "bge-large-en-v1.5-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "bge-large-en-v1.5-instruct", + "ARCChallenge": 8.86, + "AlphaNLI": 0.86, + "HellaSwag": 26.24, + "PIQA": 23.26, + "Quail": 2.72, + "RARbCode": 45.25, + "RARbMath": 49.82, + "SIQA": 0.59, + "SpartQA": 2.34, + "TempReasonL1": 1.17, + "TempReasonL2Fact": 21.19, + "TempReasonL2Pure": 2.1, + "TempReasonL3Fact": 17.59, + "TempReasonL3Pure": 5.99, + "WinoGrande": 10.31 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "bge-large-en-v1.5-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "bge-large-en-v1.5-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "bge-large-en-v1.5-instruct" + } + ] + } + }, + "all-mpnet-base-v2-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "all-mpnet-base-v2-instruct", + "ARCChallenge": 10.35, + "AlphaNLI": 1.96, + "HellaSwag": 13.01, + "PIQA": 27.18, + "Quail": 3.02, + "RARbCode": 48.95, + "RARbMath": 69.21, + "SIQA": 1.29, + "SpartQA": 1.01, + "TempReasonL1": 1.52, + "TempReasonL2Fact": 7.28, + "TempReasonL2Pure": 1.03, + "TempReasonL3Fact": 7.03, + "TempReasonL3Pure": 5.16, + "WinoGrande": 9.66 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "all-mpnet-base-v2-instruct" + } + ] + } + }, + "Cohere-embed-english-v3.0": { + "BitextMining": { + "f1": [ + { + "Model": "Cohere-embed-english-v3.0" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "Cohere-embed-english-v3.0" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "Cohere-embed-english-v3.0" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "Cohere-embed-english-v3.0" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "Cohere-embed-english-v3.0" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "Cohere-embed-english-v3.0", + "AILACasedocs": 31.54, + "AILAStatutes": 27.15, + "ARCChallenge": 9.89, + "AlphaNLI": 15.1, + "BrightRetrieval (psychology)": 21.82, + "BrightRetrieval (economics)": 20.18, + "BrightRetrieval (robotics)": 16.21, + "BrightRetrieval (biology)": 18.98, + "BrightRetrieval (stackoverflow)": 16.47, + "BrightRetrieval (theoremqa_theorems)": 6.04, + "BrightRetrieval (pony)": 1.77, + "BrightRetrieval (sustainable_living)": 17.69, + "BrightRetrieval (aops)": 6.46, + "BrightRetrieval (theoremqa_questions)": 15.07, + "BrightRetrieval (leetcode)": 26.78, + "BrightRetrieval (earth_science)": 27.45, + "GerDaLIRSmall": 6.05, + "HellaSwag": 26.35, + "LeCaRDv2": 21.02, + "LegalBenchConsumerContractsQA": 77.12, + "LegalBenchCorporateLobbying": 93.68, + "LegalQuAD": 26.08, + "LegalSummarization": 61.7, + "PIQA": 28.49, + "Quail": 4.1, + "RARbCode": 57.19, + "RARbMath": 72.26, + "SIQA": 4.26, + "SpartQA": 3.75, + "TempReasonL1": 1.5, + "TempReasonL2Fact": 35.91, + "TempReasonL2Pure": 1.89, + "TempReasonL3Fact": 27.51, + "TempReasonL3Pure": 8.53, + "WinoGrande": 58.01 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "Cohere-embed-english-v3.0" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "Cohere-embed-english-v3.0" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "Cohere-embed-english-v3.0", + "Core17InstructionRetrieval": 2.8, + "News21InstructionRetrieval": 0.2, + "Robust04InstructionRetrieval": -3.63 + } + ] + } + }, + "Cohere-embed-english-v3.0-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "Cohere-embed-english-v3.0-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "Cohere-embed-english-v3.0-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "Cohere-embed-english-v3.0-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "Cohere-embed-english-v3.0-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "Cohere-embed-english-v3.0-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "Cohere-embed-english-v3.0-instruct", + "ARCChallenge": 10.1, + "AlphaNLI": 18.75, + "HellaSwag": 29.02, + "PIQA": 27.89, + "Quail": 7.77, + "RARbCode": 56.56, + "RARbMath": 72.05, + "SIQA": 5.03, + "SpartQA": 3.33, + "TempReasonL1": 1.43, + "TempReasonL2Fact": 40.46, + "TempReasonL2Pure": 2.39, + "TempReasonL3Fact": 33.87, + "TempReasonL3Pure": 7.52, + "WinoGrande": 65.02 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "Cohere-embed-english-v3.0-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "Cohere-embed-english-v3.0-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "Cohere-embed-english-v3.0-instruct" + } + ] + } + }, + "text-embedding-ada-002-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "text-embedding-ada-002-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "text-embedding-ada-002-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "text-embedding-ada-002-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "text-embedding-ada-002-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "text-embedding-ada-002-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text-embedding-ada-002-instruct", + "ARCChallenge": 11.85, + "AlphaNLI": 10.62, + "HellaSwag": 24.8, + "PIQA": 23.87, + "Quail": 5.79, + "RARbCode": 82.36, + "RARbMath": 67.26, + "SIQA": 2.64, + "SpartQA": 4.75, + "TempReasonL1": 1.44, + "TempReasonL2Fact": 19.38, + "TempReasonL2Pure": 2.43, + "TempReasonL3Fact": 17.58, + "TempReasonL3Pure": 7.31, + "WinoGrande": 11.36 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "text-embedding-ada-002-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "text-embedding-ada-002-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "text-embedding-ada-002-instruct" + } + ] + } + }, + "bge-m3-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "bge-m3-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "bge-m3-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "bge-m3-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "bge-m3-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "bge-m3-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "bge-m3-instruct", + "ARCChallenge": 9.03, + "AlphaNLI": 24.69, + "HellaSwag": 25.55, + "PIQA": 19.03, + "Quail": 7.08, + "RARbCode": 39.58, + "RARbMath": 64.51, + "SIQA": 4.77, + "SpartQA": 7.0, + "TempReasonL1": 0.8, + "TempReasonL2Fact": 34.99, + "TempReasonL2Pure": 0.62, + "TempReasonL3Fact": 32.47, + "TempReasonL3Pure": 7.01, + "WinoGrande": 35.33 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "bge-m3-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "bge-m3-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "bge-m3-instruct" + } + ] + } + }, + "text-embedding-3-large-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "text-embedding-3-large-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "text-embedding-3-large-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "text-embedding-3-large-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "text-embedding-3-large-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "text-embedding-3-large-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text-embedding-3-large-instruct", + "ARCChallenge": 21.22, + "AlphaNLI": 34.23, + "HellaSwag": 31.4, + "PIQA": 37.52, + "Quail": 13.6, + "RARbCode": 89.41, + "RARbMath": 87.73, + "SIQA": 4.99, + "SpartQA": 7.45, + "TempReasonL1": 2.07, + "TempReasonL2Fact": 39.77, + "TempReasonL2Pure": 11.04, + "TempReasonL3Fact": 37.04, + "TempReasonL3Pure": 15.51, + "WinoGrande": 33.92 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "text-embedding-3-large-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "text-embedding-3-large-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "text-embedding-3-large-instruct" + } + ] + } + }, + "all-MiniLM-L6-v2": { + "BitextMining": { + "f1": [ + { + "Model": "all-MiniLM-L6-v2", + "BornholmBitextMining (dan-Latn)": 29.68, + "BornholmBitextMining": 29.68, + "Tatoeba (kab-Latn_eng-Latn)": 0.96, + "Tatoeba (aze-Latn_eng-Latn)": 1.04, + "Tatoeba (wuu-Hans_eng-Latn)": 0.6, + "Tatoeba (fra-Latn_eng-Latn)": 8.17, + "Tatoeba (nov-Latn_eng-Latn)": 13.97, + "Tatoeba (slk-Latn_eng-Latn)": 3.27, + "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0, + "Tatoeba (ukr-Cyrl_eng-Latn)": 0.3, + "Tatoeba (kur-Latn_eng-Latn)": 5.21, + "Tatoeba (hin-Deva_eng-Latn)": 0.0, + "Tatoeba (tgl-Latn_eng-Latn)": 2.69, + "Tatoeba (jav-Latn_eng-Latn)": 3.37, + "Tatoeba (nob-Latn_eng-Latn)": 4.34, + "Tatoeba (tam-Taml_eng-Latn)": 0.33, + "Tatoeba (hsb-Latn_eng-Latn)": 2.65, + "Tatoeba (srp-Cyrl_eng-Latn)": 1.28, + "Tatoeba (cat-Latn_eng-Latn)": 6.93, + "Tatoeba (jpn-Jpan_eng-Latn)": 0.97, + "Tatoeba (kzj-Latn_eng-Latn)": 2.78, + "Tatoeba (uig-Arab_eng-Latn)": 0.2, + "Tatoeba (max-Deva_eng-Latn)": 6.93, + "Tatoeba (dtp-Latn_eng-Latn)": 1.88, + "Tatoeba (cbk-Latn_eng-Latn)": 7.04, + "Tatoeba (bre-Latn_eng-Latn)": 3.22, + "Tatoeba (arz-Arab_eng-Latn)": 0.0, + "Tatoeba (heb-Hebr_eng-Latn)": 0.22, + "Tatoeba (kat-Geor_eng-Latn)": 0.3, + "Tatoeba (yid-Hebr_eng-Latn)": 0.14, + "Tatoeba (lit-Latn_eng-Latn)": 0.92, + "Tatoeba (ber-Tfng_eng-Latn)": 4.69, + "Tatoeba (hun-Latn_eng-Latn)": 3.56, + "Tatoeba (mhr-Cyrl_eng-Latn)": 0.0, + "Tatoeba (isl-Latn_eng-Latn)": 2.37, + "Tatoeba (ind-Latn_eng-Latn)": 3.86, + "Tatoeba (tuk-Latn_eng-Latn)": 3.52, + "Tatoeba (kor-Hang_eng-Latn)": 0.45, + "Tatoeba (ara-Arab_eng-Latn)": 0.0, + "Tatoeba (tzl-Latn_eng-Latn)": 4.58, + "Tatoeba (swe-Latn_eng-Latn)": 6.06, + "Tatoeba (ang-Latn_eng-Latn)": 15.64, + "Tatoeba (mon-Cyrl_eng-Latn)": 0.38, + "Tatoeba (urd-Arab_eng-Latn)": 0.1, + "Tatoeba (vie-Latn_eng-Latn)": 3.07, + "Tatoeba (ina-Latn_eng-Latn)": 17.63, + "Tatoeba (hrv-Latn_eng-Latn)": 3.83, + "Tatoeba (war-Latn_eng-Latn)": 4.94, + "Tatoeba (cor-Latn_eng-Latn)": 2.41, + "Tatoeba (tur-Latn_eng-Latn)": 3.59, + "Tatoeba (bul-Cyrl_eng-Latn)": 0.21, + "Tatoeba (spa-Latn_eng-Latn)": 5.63, + "Tatoeba (tel-Telu_eng-Latn)": 0.46, + "Tatoeba (nds-Latn_eng-Latn)": 9.56, + "Tatoeba (lvs-Latn_eng-Latn)": 2.61, + "Tatoeba (amh-Ethi_eng-Latn)": 0.25, + "Tatoeba (pms-Latn_eng-Latn)": 7.62, + "Tatoeba (xho-Latn_eng-Latn)": 4.01, + "Tatoeba (epo-Latn_eng-Latn)": 5.46, + "Tatoeba (por-Latn_eng-Latn)": 8.29, + "Tatoeba (ile-Latn_eng-Latn)": 13.54, + "Tatoeba (ell-Grek_eng-Latn)": 0.1, + "Tatoeba (oci-Latn_eng-Latn)": 6.55, + "Tatoeba (pes-Arab_eng-Latn)": 0.0, + "Tatoeba (tat-Cyrl_eng-Latn)": 0.44, + "Tatoeba (awa-Deva_eng-Latn)": 0.51, + "Tatoeba (fao-Latn_eng-Latn)": 5.33, + "Tatoeba (swg-Latn_eng-Latn)": 8.92, + "Tatoeba (uzb-Latn_eng-Latn)": 2.34, + "Tatoeba (cym-Latn_eng-Latn)": 6.09, + "Tatoeba (mar-Deva_eng-Latn)": 0.0, + "Tatoeba (fry-Latn_eng-Latn)": 11.22, + "Tatoeba (ces-Latn_eng-Latn)": 3.04, + "Tatoeba (afr-Latn_eng-Latn)": 5.89, + "Tatoeba (csb-Latn_eng-Latn)": 3.78, + "Tatoeba (pol-Latn_eng-Latn)": 2.58, + "Tatoeba (gla-Latn_eng-Latn)": 2.7, + "Tatoeba (deu-Latn_eng-Latn)": 7.89, + "Tatoeba (cmn-Hans_eng-Latn)": 1.92, + "Tatoeba (ita-Latn_eng-Latn)": 9.9, + "Tatoeba (ben-Beng_eng-Latn)": 0.0, + "Tatoeba (glg-Latn_eng-Latn)": 9.31, + "Tatoeba (dsb-Latn_eng-Latn)": 2.9, + "Tatoeba (pam-Latn_eng-Latn)": 3.54, + "Tatoeba (ast-Latn_eng-Latn)": 6.84, + "Tatoeba (bos-Latn_eng-Latn)": 5.58, + "Tatoeba (nld-Latn_eng-Latn)": 10.16, + "Tatoeba (bel-Cyrl_eng-Latn)": 0.5, + "Tatoeba (orv-Cyrl_eng-Latn)": 0.0, + "Tatoeba (gsw-Latn_eng-Latn)": 11.33, + "Tatoeba (dan-Latn_eng-Latn)": 7.84, + "Tatoeba (hye-Armn_eng-Latn)": 0.41, + "Tatoeba (mal-Mlym_eng-Latn)": 0.15, + "Tatoeba (arq-Arab_eng-Latn)": 0.11, + "Tatoeba (kaz-Cyrl_eng-Latn)": 0.42, + "Tatoeba (khm-Khmr_eng-Latn)": 0.42, + "Tatoeba (tha-Thai_eng-Latn)": 0.3, + "Tatoeba (swh-Latn_eng-Latn)": 5.8, + "Tatoeba (gle-Latn_eng-Latn)": 2.75, + "Tatoeba (ceb-Latn_eng-Latn)": 3.39, + "Tatoeba (sqi-Latn_eng-Latn)": 3.58, + "Tatoeba (slv-Latn_eng-Latn)": 3.25, + "Tatoeba (ido-Latn_eng-Latn)": 7.48, + "Tatoeba (yue-Hant_eng-Latn)": 0.86, + "Tatoeba (nno-Latn_eng-Latn)": 5.38, + "Tatoeba (est-Latn_eng-Latn)": 2.36, + "Tatoeba (lfn-Latn_eng-Latn)": 4.55, + "Tatoeba (lat-Latn_eng-Latn)": 5.04, + "Tatoeba (cha-Latn_eng-Latn)": 13.29, + "Tatoeba (eus-Latn_eng-Latn)": 5.54, + "Tatoeba (fin-Latn_eng-Latn)": 2.79, + "Tatoeba (rus-Cyrl_eng-Latn)": 0.07, + "Tatoeba (ron-Latn_eng-Latn)": 6.82, + "Tatoeba (zsm-Latn_eng-Latn)": 4.24 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "all-MiniLM-L6-v2", + "AllegroReviews (pol-Latn)": 24.64, + "AmazonCounterfactualClassification (en-ext)": 65.59, + "AmazonCounterfactualClassification (en)": 64.15, + "AmazonCounterfactualClassification (deu-Latn)": 57.82, + "AmazonCounterfactualClassification (jpn-Jpan)": 60.9, + "AmazonPolarityClassification": 62.58, + "AmazonReviewsClassification (en)": 31.79, + "AmazonReviewsClassification (deu-Latn)": 26.44, + "AmazonReviewsClassification (spa-Latn)": 27.35, + "AmazonReviewsClassification (fra-Latn)": 26.88, + "AmazonReviewsClassification (jpn-Jpan)": 23.78, + "AmazonReviewsClassification (cmn-Hans)": 23.67, + "AngryTweetsClassification (dan-Latn)": 42.48, + "AngryTweetsClassification": 42.49, + "Banking77Classification": 79.75, + "CBD (pol-Latn)": 50.9, + "DKHateClassification": 55.05, + "DanishPoliticalCommentsClassification (dan-Latn)": 26.7, + "DanishPoliticalCommentsClassification": 26.96, + "EmotionClassification": 38.43, + "IFlyTek (cmn-Hans)": 16.09, + "ImdbClassification": 60.66, + "JDReview (cmn-Hans)": 59.98, + "LccSentimentClassification (dan-Latn)": 38.53, + "LccSentimentClassification": 38.47, + "MTOPDomainClassification (en)": 91.56, + "MTOPDomainClassification (deu-Latn)": 70.47, + "MTOPDomainClassification (spa-Latn)": 72.99, + "MTOPDomainClassification (fra-Latn)": 75.1, + "MTOPDomainClassification (hin-Deva)": 40.74, + "MTOPDomainClassification (tha-Thai)": 15.66, + "MTOPIntentClassification (en)": 62.18, + "MTOPIntentClassification (deu-Latn)": 45.7, + "MTOPIntentClassification (spa-Latn)": 44.19, + "MTOPIntentClassification (fra-Latn)": 39.67, + "MTOPIntentClassification (hin-Deva)": 18.69, + "MTOPIntentClassification (tha-Thai)": 5.78, + "MasakhaNEWSClassification (amh-Ethi)": 33.03, + "MasakhaNEWSClassification (eng)": 77.11, + "MasakhaNEWSClassification (fra-Latn)": 68.84, + "MasakhaNEWSClassification (hau-Latn)": 50.49, + "MasakhaNEWSClassification (ibo-Latn)": 52.15, + "MasakhaNEWSClassification (lin-Latn)": 68.29, + "MasakhaNEWSClassification (lug-Latn)": 47.58, + "MasakhaNEWSClassification (orm-Ethi)": 50.68, + "MasakhaNEWSClassification (pcm-Latn)": 92.56, + "MasakhaNEWSClassification (run-Latn)": 54.81, + "MasakhaNEWSClassification (sna-Latn)": 65.58, + "MasakhaNEWSClassification (som-Latn)": 39.8, + "MasakhaNEWSClassification (swa-Latn)": 47.25, + "MasakhaNEWSClassification (tir-Ethi)": 28.97, + "MasakhaNEWSClassification (xho-Latn)": 54.14, + "MasakhaNEWSClassification (yor-Latn)": 55.01, + "MasakhaNEWSClassification (fra)": 74.05, + "MassiveIntentClassification (aze-Latn)": 30.63, + "MassiveIntentClassification (spa-Latn)": 39.88, + "MassiveIntentClassification (tam-Taml)": 11.31, + "MassiveIntentClassification (swe-Latn)": 38.09, + "MassiveIntentClassification (fas-Arab)": 19.1, + "MassiveIntentClassification (khm-Khmr)": 4.89, + "MassiveIntentClassification (mon-Cyrl)": 20.35, + "MassiveIntentClassification (hye-Armn)": 7.62, + "MassiveIntentClassification (kan-Knda)": 3.14, + "MassiveIntentClassification (cmo-Hans)": 24.4, + "MassiveIntentClassification (rus-Cyrl)": 27.58, + "MassiveIntentClassification (jpn-Jpan)": 31.87, + "MassiveIntentClassification (en)": 67.4, + "MassiveIntentClassification (deu-Latn)": 43.44, + "MassiveIntentClassification (ind-Latn)": 39.02, + "MassiveIntentClassification (cym-Latn)": 34.54, + "MassiveIntentClassification (nld-Latn)": 40.2, + "MassiveIntentClassification (hin-Deva)": 17.7, + "MassiveIntentClassification (afr-Latn)": 37.45, + "MassiveIntentClassification (ell-Grek)": 24.19, + "MassiveIntentClassification (mal-Mlym)": 2.87, + "MassiveIntentClassification (por-Latn)": 43.76, + "MassiveIntentClassification (sqi-Latn)": 40.7, + "MassiveIntentClassification (urd-Arab)": 14.42, + "MassiveIntentClassification (vie-Latn)": 37.09, + "MassiveIntentClassification (hun-Latn)": 35.69, + "MassiveIntentClassification (ron-Latn)": 40.54, + "MassiveIntentClassification (ara-Arab)": 19.05, + "MassiveIntentClassification (nob-Latn)": 39.36, + "MassiveIntentClassification (slv-Latn)": 36.7, + "MassiveIntentClassification (lav-Latn)": 36.97, + "MassiveIntentClassification (heb-Hebr)": 22.48, + "MassiveIntentClassification (pol-Latn)": 36.07, + "MassiveIntentClassification (ita-Latn)": 41.59, + "MassiveIntentClassification (msa-Latn)": 35.07, + "MassiveIntentClassification (mya-Mymr)": 4.24, + "MassiveIntentClassification (isl-Latn)": 29.95, + "MassiveIntentClassification (tel-Telu)": 2.46, + "MassiveIntentClassification (swa-Latn)": 34.98, + "MassiveIntentClassification (amh-Ethi)": 2.62, + "MassiveIntentClassification (cmo-Hant)": 22.56, + "MassiveIntentClassification (tha-Thai)": 11.26, + "MassiveIntentClassification (ben-Beng)": 13.1, + "MassiveIntentClassification (fin-Latn)": 38.37, + "MassiveIntentClassification (fra-Latn)": 42.55, + "MassiveIntentClassification (kor-Kore)": 16.05, + "MassiveIntentClassification (kat-Geor)": 9.07, + "MassiveIntentClassification (dan-Latn)": 41.0, + "MassiveIntentClassification (tur-Latn)": 33.76, + "MassiveIntentClassification (tgl-Latn)": 37.92, + "MassiveIntentClassification (jav-Latn)": 35.91, + "MassiveIntentClassification (da)": 40.99, + "MassiveIntentClassification (nb)": 39.34, + "MassiveIntentClassification (sv)": 38.1, + "MassiveScenarioClassification (mal-Mlym)": 7.67, + "MassiveScenarioClassification (khm-Khmr)": 9.25, + "MassiveScenarioClassification (deu-Latn)": 51.47, + "MassiveScenarioClassification (msa-Latn)": 43.67, + "MassiveScenarioClassification (heb-Hebr)": 24.01, + "MassiveScenarioClassification (mon-Cyrl)": 25.47, + "MassiveScenarioClassification (mya-Mymr)": 10.61, + "MassiveScenarioClassification (ind-Latn)": 43.46, + "MassiveScenarioClassification (nob-Latn)": 44.67, + "MassiveScenarioClassification (fra-Latn)": 51.14, + "MassiveScenarioClassification (tgl-Latn)": 45.69, + "MassiveScenarioClassification (amh-Ethi)": 7.57, + "MassiveScenarioClassification (fas-Arab)": 23.97, + "MassiveScenarioClassification (vie-Latn)": 40.47, + "MassiveScenarioClassification (sqi-Latn)": 47.21, + "MassiveScenarioClassification (dan-Latn)": 47.02, + "MassiveScenarioClassification (spa-Latn)": 49.0, + "MassiveScenarioClassification (pol-Latn)": 43.82, + "MassiveScenarioClassification (tel-Telu)": 7.95, + "MassiveScenarioClassification (tha-Thai)": 19.5, + "MassiveScenarioClassification (kor-Kore)": 20.3, + "MassiveScenarioClassification (cmo-Hans)": 33.65, + "MassiveScenarioClassification (urd-Arab)": 23.73, + "MassiveScenarioClassification (aze-Latn)": 35.59, + "MassiveScenarioClassification (ron-Latn)": 48.23, + "MassiveScenarioClassification (jav-Latn)": 43.59, + "MassiveScenarioClassification (slv-Latn)": 41.9, + "MassiveScenarioClassification (kat-Geor)": 14.92, + "MassiveScenarioClassification (lav-Latn)": 40.43, + "MassiveScenarioClassification (cym-Latn)": 39.0, + "MassiveScenarioClassification (swe-Latn)": 42.95, + "MassiveScenarioClassification (rus-Cyrl)": 30.46, + "MassiveScenarioClassification (ben-Beng)": 20.56, + "MassiveScenarioClassification (por-Latn)": 50.72, + "MassiveScenarioClassification (hye-Armn)": 13.03, + "MassiveScenarioClassification (jpn-Jpan)": 37.3, + "MassiveScenarioClassification (nld-Latn)": 48.43, + "MassiveScenarioClassification (swa-Latn)": 43.32, + "MassiveScenarioClassification (tam-Taml)": 17.37, + "MassiveScenarioClassification (isl-Latn)": 36.12, + "MassiveScenarioClassification (kan-Knda)": 7.85, + "MassiveScenarioClassification (ell-Grek)": 31.3, + "MassiveScenarioClassification (tur-Latn)": 38.85, + "MassiveScenarioClassification (cmo-Hant)": 31.18, + "MassiveScenarioClassification (en)": 75.76, + "MassiveScenarioClassification (fin-Latn)": 42.38, + "MassiveScenarioClassification (hin-Deva)": 23.71, + "MassiveScenarioClassification (ara-Arab)": 25.99, + "MassiveScenarioClassification (hun-Latn)": 41.61, + "MassiveScenarioClassification (afr-Latn)": 43.87, + "MassiveScenarioClassification (ita-Latn)": 49.8, + "MassiveScenarioClassification (da)": 47.01, + "MassiveScenarioClassification (nb)": 44.67, + "MassiveScenarioClassification (sv)": 42.93, + "MultilingualSentiment (cmn-Hans)": 41.28, + "NoRecClassification (nob-Latn)": 37.93, + "NoRecClassification": 40.02, + "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 54.7, + "NordicLangClassification": 54.71, + "NorwegianParliament": 54.8, + "OnlineShopping (cmn-Hans)": 57.74, + "PAC (pol-Latn)": 59.78, + "PolEmo2.0-IN (pol-Latn)": 40.29, + "PolEmo2.0-OUT (pol-Latn)": 25.0, + "ScalaDaClassification": 50.03, + "ScalaNbClassification": 50.17, + "TNews (cmn-Hans)": 20.12, + "ToxicConversationsClassification": 66.99, + "TweetSentimentExtractionClassification": 55.41, + "Waimai (cmn-Hans)": 62.72 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "all-MiniLM-L6-v2", + "AlloProfClusteringP2P": 51.83, + "AlloProfClusteringS2S": 32.07, + "ArxivClusteringP2P": 46.55, + "ArxivClusteringS2S": 37.86, + "BiorxivClusteringP2P": 38.48, + "BiorxivClusteringS2S": 33.17, + "HALClusteringS2S": 18.84, + "MLSUMClusteringP2P": 36.74, + "MLSUMClusteringS2S": 28.12, + "MasakhaNEWSClusteringP2P (amh-Ethi)": 43.85, + "MasakhaNEWSClusteringP2P (eng)": 48.88, + "MasakhaNEWSClusteringP2P (fra-Latn)": 34.92, + "MasakhaNEWSClusteringP2P (hau-Latn)": 24.77, + "MasakhaNEWSClusteringP2P (ibo-Latn)": 45.94, + "MasakhaNEWSClusteringP2P (lin-Latn)": 69.56, + "MasakhaNEWSClusteringP2P (lug-Latn)": 49.4, + "MasakhaNEWSClusteringP2P (orm-Ethi)": 25.34, + "MasakhaNEWSClusteringP2P (pcm-Latn)": 85.57, + "MasakhaNEWSClusteringP2P (run-Latn)": 50.75, + "MasakhaNEWSClusteringP2P (sna-Latn)": 41.68, + "MasakhaNEWSClusteringP2P (som-Latn)": 29.02, + "MasakhaNEWSClusteringP2P (swa-Latn)": 21.87, + "MasakhaNEWSClusteringP2P (tir-Ethi)": 42.93, + "MasakhaNEWSClusteringP2P (xho-Latn)": 28.58, + "MasakhaNEWSClusteringP2P (yor-Latn)": 31.45, + "MasakhaNEWSClusteringP2P (fra)": 34.92, + "MasakhaNEWSClusteringS2S (amh-Ethi)": 45.44, + "MasakhaNEWSClusteringS2S (eng)": 41.09, + "MasakhaNEWSClusteringS2S (fra-Latn)": 40.58, + "MasakhaNEWSClusteringS2S (hau-Latn)": 15.42, + "MasakhaNEWSClusteringS2S (ibo-Latn)": 37.02, + "MasakhaNEWSClusteringS2S (lin-Latn)": 65.14, + "MasakhaNEWSClusteringS2S (lug-Latn)": 44.21, + "MasakhaNEWSClusteringS2S (orm-Ethi)": 24.79, + "MasakhaNEWSClusteringS2S (pcm-Latn)": 61.48, + "MasakhaNEWSClusteringS2S (run-Latn)": 51.25, + "MasakhaNEWSClusteringS2S (sna-Latn)": 42.74, + "MasakhaNEWSClusteringS2S (som-Latn)": 30.08, + "MasakhaNEWSClusteringS2S (swa-Latn)": 9.55, + "MasakhaNEWSClusteringS2S (tir-Ethi)": 46.04, + "MasakhaNEWSClusteringS2S (xho-Latn)": 27.08, + "MasakhaNEWSClusteringS2S (yor-Latn)": 31.04, + "MasakhaNEWSClusteringS2S (fra)": 40.58, + "MedrxivClusteringP2P": 34.41, + "MedrxivClusteringS2S": 32.29, + "RedditClustering": 50.67, + "RedditClusteringP2P": 54.15, + "StackExchangeClustering": 53.36, + "StackExchangeClusteringP2P": 38.0, + "TwentyNewsgroupsClustering": 46.86 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "all-MiniLM-L6-v2", + "CDSC-E (pol-Latn)": 47.27, + "OpusparcusPC (deu-Latn)": 89.91, + "OpusparcusPC (en)": 97.46, + "OpusparcusPC (fin-Latn)": 85.44, + "OpusparcusPC (fra-Latn)": 86.53, + "OpusparcusPC (rus-Cyrl)": 79.28, + "OpusparcusPC (swe-Latn)": 83.78, + "OpusparcusPC (fr)": 86.53, + "PSC (pol-Latn)": 81.87, + "PawsXPairClassification (deu-Latn)": 51.22, + "PawsXPairClassification (en)": 59.1, + "PawsXPairClassification (spa-Latn)": 52.21, + "PawsXPairClassification (fra-Latn)": 55.41, + "PawsXPairClassification (jpn-Hira)": 48.97, + "PawsXPairClassification (kor-Hang)": 50.53, + "PawsXPairClassification (cmn-Hans)": 53.11, + "PawsXPairClassification (fr)": 55.4, + "SICK-E-PL (pol-Latn)": 47.32, + "SprintDuplicateQuestions": 94.55, + "TwitterSemEval2015": 67.86, + "TwitterURLCorpus": 84.7 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "all-MiniLM-L6-v2", + "AlloprofReranking (fra-Latn)": 62.62, + "AlloprofReranking": 31.69, + "AskUbuntuDupQuestions": 63.48, + "MMarcoReranking (cmn-Hans)": 4.74, + "MindSmallReranking": 30.8, + "SciDocsRR": 87.12, + "StackOverflowDupQuestions": 50.76, + "SyntecReranking (fra-Latn)": 67.31, + "SyntecReranking": 59.57, + "T2Reranking (cmn-Hans)": 56.26 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "all-MiniLM-L6-v2", + "AILACasedocs": 19.72, + "AILAStatutes": 20.52, + "ARCChallenge": 9.48, + "AlloprofRetrieval (fra-Latn)": 28.41, + "AlloprofRetrieval": 28.41, + "AlphaNLI": 28.19, + "ArguAna": 50.17, + "ArguAna-PL (pol-Latn)": 11.5, + "BSARDRetrieval (fra-Latn)": 4.8, + "BSARDRetrieval": 0.0, + "CQADupstackRetrieval": 41.32, + "ClimateFEVER": 20.27, + "CmedqaRetrieval (cmn-Hans)": 2.03, + "CovidRetrieval (cmn-Hans)": 0.8, + "DBPedia": 32.33, + "DuRetrieval (cmn-Hans)": 3.03, + "EcomRetrieval (cmn-Hans)": 3.7, + "FEVER": 51.93, + "FiQA-PL (pol-Latn)": 2.29, + "FiQA2018": 36.87, + "GerDaLIRSmall (deu-Latn)": 2.41, + "HellaSwag": 24.21, + "HotpotQA": 46.51, + "LEMBNarrativeQARetrieval": 18.27, + "LEMBNeedleRetrieval": 20.0, + "LEMBPasskeyRetrieval": 23.25, + "LEMBQMSumRetrieval": 16.32, + "LEMBSummScreenFDRetrieval": 54.8, + "LEMBWikimQARetrieval": 46.23, + "LeCaRDv2 (zho-Hans)": 17.5, + "LegalBenchConsumerContractsQA": 65.6, + "LegalBenchCorporateLobbying": 86.41, + "LegalQuAD (deu-Latn)": 11.81, + "LegalSummarization": 59.0, + "MMarcoRetrieval (cmn-Hans)": 6.21, + "MSMARCO": 36.54, + "MedicalRetrieval (cmn-Hans)": 1.76, + "MintakaRetrieval (ara-Arab)": 2.22, + "MintakaRetrieval (deu-Latn)": 15.43, + "MintakaRetrieval (spa-Latn)": 7.72, + "MintakaRetrieval (fra-Latn)": 9.19, + "MintakaRetrieval (hin-Deva)": 2.65, + "MintakaRetrieval (ita-Latn)": 8.48, + "MintakaRetrieval (jpn-Hira)": 6.7, + "MintakaRetrieval (por-Latn)": 9.76, + "MintakaRetrieval (fr)": 9.19, + "NFCorpus": 31.59, + "NFCorpus-PL (pol-Latn)": 10.62, + "NQ": 43.87, + "PIQA": 25.28, + "Quail": 3.92, + "QuoraRetrieval": 87.56, + "RARbCode": 44.27, + "RARbMath": 68.19, + "SCIDOCS": 21.64, + "SCIDOCS-PL (pol-Latn)": 3.75, + "SIQA": 1.56, + "SciFact": 64.51, + "SciFact-PL (pol-Latn)": 16.14, + "SpartQA": 1.65, + "SyntecRetrieval (fra-Latn)": 60.15, + "SyntecRetrieval": 60.15, + "T2Retrieval (cmn-Hans)": 1.6, + "TRECCOVID": 47.25, + "TRECCOVID-PL (pol-Latn)": 8.66, + "TempReasonL1": 1.53, + "TempReasonL2Fact": 17.65, + "TempReasonL2Pure": 0.46, + "TempReasonL3Fact": 14.16, + "TempReasonL3Pure": 6.33, + "Touche2020": 16.9, + "VideoRetrieval (cmn-Hans)": 9.79, + "WinoGrande": 47.33, + "XPQARetrieval (ara-Arab_ara-Arab)": 8.03, + "XPQARetrieval (eng-Latn_ara-Arab)": 1.86, + "XPQARetrieval (ara-Arab_eng-Latn)": 6.87, + "XPQARetrieval (deu-Latn_deu-Latn)": 53.25, + "XPQARetrieval (eng-Latn_deu-Latn)": 10.99, + "XPQARetrieval (deu-Latn_eng-Latn)": 27.59, + "XPQARetrieval (spa-Latn_spa-Latn)": 38.87, + "XPQARetrieval (eng-Latn_spa-Latn)": 5.46, + "XPQARetrieval (spa-Latn_eng-Latn)": 22.2, + "XPQARetrieval (fra-Latn_fra-Latn)": 51.79, + "XPQARetrieval (eng-Latn_fra-Latn)": 8.57, + "XPQARetrieval (fra-Latn_eng-Latn)": 31.36, + "XPQARetrieval (hin-Deva_hin-Deva)": 35.3, + "XPQARetrieval (eng-Latn_hin-Deva)": 6.28, + "XPQARetrieval (hin-Deva_eng-Latn)": 6.0, + "XPQARetrieval (ita-Latn_ita-Latn)": 54.57, + "XPQARetrieval (eng-Latn_ita-Latn)": 6.79, + "XPQARetrieval (ita-Latn_eng-Latn)": 24.13, + "XPQARetrieval (jpn-Hira_jpn-Hira)": 39.23, + "XPQARetrieval (eng-Latn_jpn-Hira)": 4.1, + "XPQARetrieval (jpn-Hira_eng-Latn)": 13.05, + "XPQARetrieval (kor-Hang_kor-Hang)": 10.24, + "XPQARetrieval (eng-Latn_kor-Hang)": 5.72, + "XPQARetrieval (kor-Hang_eng-Latn)": 6.37, + "XPQARetrieval (pol-Latn_pol-Latn)": 22.33, + "XPQARetrieval (eng-Latn_pol-Latn)": 7.58, + "XPQARetrieval (pol-Latn_eng-Latn)": 14.43, + "XPQARetrieval (por-Latn_por-Latn)": 31.93, + "XPQARetrieval (eng-Latn_por-Latn)": 5.9, + "XPQARetrieval (por-Latn_eng-Latn)": 20.74, + "XPQARetrieval (tam-Taml_tam-Taml)": 7.43, + "XPQARetrieval (eng-Latn_tam-Taml)": 3.42, + "XPQARetrieval (tam-Taml_eng-Latn)": 2.91, + "XPQARetrieval (cmn-Hans_cmn-Hans)": 19.39, + "XPQARetrieval (eng-Latn_cmn-Hans)": 5.05, + "XPQARetrieval (cmn-Hans_eng-Latn)": 8.77, + "XPQARetrieval (fr)": 51.79 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "all-MiniLM-L6-v2", + "AFQMC (cmn-Hans)": 8.59, + "ATEC (cmn-Hans)": 13.52, + "BIOSSES": 81.64, + "BQ (cmn-Hans)": 23.84, + "CDSC-R (pol-Latn)": 79.45, + "LCQMC (cmn-Hans)": 23.85, + "PAWSX (cmn-Hans)": 7.21, + "SICK-R": 77.58, + "SICK-R-PL (pol-Latn)": 52.43, + "SICKFr (fra-Latn)": 62.48, + "SICKFr": 62.48, + "STS12": 72.37, + "STS13": 80.6, + "STS14": 75.59, + "STS15": 85.39, + "STS16": 78.99, + "STS17 (ara-Arab)": 50.89, + "STS17 (spa-Latn_eng-Latn)": 16.31, + "STS17 (en-en)": 87.59, + "STS17 (kor-Hang)": 43.39, + "STS17 (eng-Latn_tur-Latn)": 4.5, + "STS17 (fra-Latn_eng-Latn)": 37.09, + "STS17 (nld-Latn_eng-Latn)": 29.0, + "STS17 (eng-Latn_ara-Arab)": -4.28, + "STS17 (spa-Latn)": 76.12, + "STS17 (eng-Latn_deu-Latn)": 35.82, + "STS17 (ita-Latn_eng-Latn)": 24.45, + "STS17 (ar-ar)": 50.89, + "STS17 (en-ar)": -4.28, + "STS17 (en-de)": 35.82, + "STS17 (en-tr)": 4.5, + "STS17 (es-en)": 16.31, + "STS17 (es-es)": 76.12, + "STS17 (fr-en)": 37.09, + "STS17 (it-en)": 24.45, + "STS17 (ko-ko)": 43.39, + "STS17 (nl-en)": 29.0, + "STS22 (tur-Latn)": 33.69, + "STS22 (spa-Latn)": 54.78, + "STS22 (ara-Arab)": 22.64, + "STS22 (deu-Latn_pol-Latn)": -4.93, + "STS22 (spa-Latn_eng-Latn)": 53.42, + "STS22 (cmn-Hans_eng-Latn)": 41.64, + "STS22 (rus-Cyrl)": 14.72, + "STS22 (spa-Latn_ita-Latn)": 44.27, + "STS22 (deu-Latn_fra-Latn)": 30.07, + "STS22 (deu-Latn)": 31.04, + "STS22 (fra-Latn_pol-Latn)": 50.71, + "STS22 (en)": 67.21, + "STS22 (pol-Latn)": 26.77, + "STS22 (pol-Latn_eng-Latn)": 32.8, + "STS22 (deu-Latn_eng-Latn)": 44.04, + "STS22 (ita-Latn)": 60.4, + "STS22 (fra-Latn)": 77.0, + "STS22 (cmn-Hans)": 44.93, + "STS22 (ar)": 22.64, + "STS22 (de)": 31.04, + "STS22 (de-en)": 44.04, + "STS22 (de-fr)": 30.07, + "STS22 (de-pl)": 4.93, + "STS22 (es)": 54.78, + "STS22 (es-en)": 53.42, + "STS22 (es-it)": 44.27, + "STS22 (fr)": 77.0, + "STS22 (fr-pl)": 50.71, + "STS22 (it)": 60.4, + "STS22 (pl)": 26.77, + "STS22 (pl-en)": 32.8, + "STS22 (ru)": 14.72, + "STS22 (tr)": 33.69, + "STS22 (zh)": 44.93, + "STS22 (zh-en)": 41.64, + "STSB (cmn-Hans)": 37.8, + "STSBenchmark": 82.03, + "STSBenchmarkMultilingualSTS (pol-Latn)": 56.42, + "STSBenchmarkMultilingualSTS (por-Latn)": 61.56, + "STSBenchmarkMultilingualSTS (ita-Latn)": 59.24, + "STSBenchmarkMultilingualSTS (fra-Latn)": 64.93, + "STSBenchmarkMultilingualSTS (deu-Latn)": 62.4, + "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.74, + "STSBenchmarkMultilingualSTS (spa-Latn)": 61.62, + "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.55, + "STSBenchmarkMultilingualSTS (en)": 82.03, + "STSBenchmarkMultilingualSTS (nld-Latn)": 55.46, + "STSBenchmarkMultilingualSTS (fr)": 64.93 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "all-MiniLM-L6-v2", + "SummEval": 30.81, + "SummEvalFr (fra-Latn)": 28.29, + "SummEvalFr": 28.28 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "all-MiniLM-L6-v2" + } + ] + } + }, + "contriever": { + "BitextMining": { + "f1": [ + { + "Model": "contriever" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "contriever" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "contriever" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "contriever" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "contriever" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "contriever", + "ARCChallenge": 8.62, + "AlphaNLI": 31.77, + "HellaSwag": 14.42, + "PIQA": 24.64, + "Quail": 4.97, + "RARbCode": 9.28, + "RARbMath": 30.76, + "SIQA": 1.27, + "SpartQA": 10.94, + "TempReasonL1": 1.93, + "TempReasonL2Fact": 22.68, + "TempReasonL2Pure": 1.12, + "TempReasonL3Fact": 20.62, + "TempReasonL3Pure": 7.8, + "WinoGrande": 47.15 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "contriever" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "contriever" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "contriever" + } + ] + } + }, + "text-embedding-3-small": { + "BitextMining": { + "f1": [ + { + "Model": "text-embedding-3-small" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "text-embedding-3-small", + "AmazonCounterfactualClassification (en)": 76.42, + "AmazonPolarityClassification": 90.84, + "AmazonReviewsClassification (en)": 45.73, + "Banking77Classification": 83.01, + "EmotionClassification": 50.63, + "ImdbClassification": 83.66, + "MTOPDomainClassification (en)": 93.91, + "MTOPIntentClassification (en)": 70.98, + "MassiveIntentClassification (en)": 72.86, + "MassiveScenarioClassification (en)": 76.84, + "ToxicConversationsClassification": 71.91, + "TweetSentimentExtractionClassification": 61.72 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "text-embedding-3-small", + "ArxivClusteringP2P": 46.57, + "ArxivClusteringS2S": 39.35, + "BiorxivClusteringP2P": 37.77, + "BiorxivClusteringS2S": 34.68, + "MedrxivClusteringP2P": 32.77, + "MedrxivClusteringS2S": 31.85, + "RedditClustering": 64.09, + "RedditClusteringP2P": 65.12, + "StackExchangeClustering": 72.05, + "StackExchangeClusteringP2P": 34.04, + "TwentyNewsgroupsClustering": 54.81 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "text-embedding-3-small", + "OpusparcusPC (fr)": 94.45, + "SprintDuplicateQuestions": 94.58, + "TwitterSemEval2015": 73.33, + "TwitterURLCorpus": 87.21 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "text-embedding-3-small", + "AskUbuntuDupQuestions": 62.18, + "MindSmallReranking": 29.93, + "SciDocsRR": 83.25, + "StackOverflowDupQuestions": 51.53 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text-embedding-3-small", + "ARCChallenge": 14.63, + "AlphaNLI": 30.61, + "ArguAna": 55.49, + "CQADupstackRetrieval": 42.58, + "ClimateFEVER": 26.86, + "DBPedia": 39.97, + "FEVER": 79.42, + "FiQA2018": 44.91, + "HellaSwag": 30.94, + "HotpotQA": 63.63, + "MSMARCO": 37.02, + "NFCorpus": 38.33, + "NQ": 52.86, + "PIQA": 33.69, + "Quail": 6.11, + "QuoraRetrieval": 88.83, + "RARbCode": 72.03, + "RARbMath": 71.07, + "SCIDOCS": 20.8, + "SIQA": 3.03, + "SciFact": 73.37, + "SpartQA": 6.63, + "TRECCOVID": 77.9, + "TempReasonL1": 2.35, + "TempReasonL2Fact": 25.68, + "TempReasonL2Pure": 2.76, + "TempReasonL3Fact": 22.09, + "TempReasonL3Pure": 9.79, + "Touche2020": 24.28, + "WinoGrande": 31.53 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "text-embedding-3-small", + "BIOSSES": 88.72, + "SICK-R": 76.73, + "STS12": 73.09, + "STS13": 84.92, + "STS14": 79.81, + "STS15": 88.01, + "STS16": 84.41, + "STS17 (en-en)": 90.94, + "STS22 (en)": 64.96, + "STSBenchmark": 84.24 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "text-embedding-3-small", + "SummEval": 31.12 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "text-embedding-3-small" + } + ] + } + }, + "bge-large-en-v1.5": { + "BitextMining": { + "f1": [ + { + "Model": "bge-large-en-v1.5" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "bge-large-en-v1.5" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "bge-large-en-v1.5" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "bge-large-en-v1.5" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "bge-large-en-v1.5" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "bge-large-en-v1.5", + "AILACasedocs": 25.15, + "AILAStatutes": 20.74, + "ARCChallenge": 9.99, + "AlphaNLI": 13.13, + "BrightRetrieval (stackoverflow)": 9.51, + "BrightRetrieval (earth_science)": 24.15, + "BrightRetrieval (aops)": 6.08, + "BrightRetrieval (sustainable_living)": 13.27, + "BrightRetrieval (psychology)": 17.44, + "BrightRetrieval (robotics)": 12.21, + "BrightRetrieval (theoremqa_theorems)": 5.51, + "BrightRetrieval (pony)": 5.64, + "BrightRetrieval (biology)": 11.96, + "BrightRetrieval (theoremqa_questions)": 12.56, + "BrightRetrieval (leetcode)": 26.68, + "BrightRetrieval (economics)": 16.59, + "GerDaLIRSmall": 3.96, + "HellaSwag": 28.5, + "LeCaRDv2": 22.68, + "LegalBenchConsumerContractsQA": 73.52, + "LegalBenchCorporateLobbying": 91.51, + "LegalQuAD": 16.22, + "LegalSummarization": 59.99, + "PIQA": 27.99, + "Quail": 1.83, + "RARbCode": 48.12, + "RARbMath": 57.36, + "SIQA": 1.04, + "SpartQA": 2.99, + "TempReasonL1": 1.46, + "TempReasonL2Fact": 24.25, + "TempReasonL2Pure": 2.35, + "TempReasonL3Fact": 20.64, + "TempReasonL3Pure": 6.67, + "WinoGrande": 19.18 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "bge-large-en-v1.5" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "bge-large-en-v1.5" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "bge-large-en-v1.5" + } + ] + } + }, + "text-embedding-3-small-instruct": { + "BitextMining": { + "f1": [ + { + "Model": "text-embedding-3-small-instruct" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "text-embedding-3-small-instruct" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "text-embedding-3-small-instruct" + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "text-embedding-3-small-instruct" + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "text-embedding-3-small-instruct" + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text-embedding-3-small-instruct", + "ARCChallenge": 13.76, + "AlphaNLI": 21.14, + "HellaSwag": 27.2, + "PIQA": 29.59, + "Quail": 6.64, + "RARbCode": 72.14, + "RARbMath": 64.31, + "SIQA": 2.98, + "SpartQA": 3.58, + "TempReasonL1": 2.29, + "TempReasonL2Fact": 26.34, + "TempReasonL2Pure": 3.17, + "TempReasonL3Fact": 22.72, + "TempReasonL3Pure": 9.98, + "WinoGrande": 25.49 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "text-embedding-3-small-instruct" + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "text-embedding-3-small-instruct" + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "text-embedding-3-small-instruct" + } + ] + } + }, + "all-MiniLM-L12-v2": { + "BitextMining": { + "f1": [ + { + "Model": "all-MiniLM-L12-v2", + "BornholmBitextMining (dan-Latn)": 35.25, + "Tatoeba (spa-Latn_eng-Latn)": 11.26, + "Tatoeba (bos-Latn_eng-Latn)": 7.05, + "Tatoeba (xho-Latn_eng-Latn)": 3.66, + "Tatoeba (fry-Latn_eng-Latn)": 14.53, + "Tatoeba (tur-Latn_eng-Latn)": 3.69, + "Tatoeba (fao-Latn_eng-Latn)": 5.92, + "Tatoeba (vie-Latn_eng-Latn)": 5.06, + "Tatoeba (ind-Latn_eng-Latn)": 5.3, + "Tatoeba (pol-Latn_eng-Latn)": 4.29, + "Tatoeba (swe-Latn_eng-Latn)": 7.31, + "Tatoeba (ita-Latn_eng-Latn)": 12.57, + "Tatoeba (dtp-Latn_eng-Latn)": 3.31, + "Tatoeba (ron-Latn_eng-Latn)": 8.77, + "Tatoeba (isl-Latn_eng-Latn)": 3.44, + "Tatoeba (hrv-Latn_eng-Latn)": 5.68, + "Tatoeba (cha-Latn_eng-Latn)": 13.07, + "Tatoeba (cor-Latn_eng-Latn)": 2.47, + "Tatoeba (cym-Latn_eng-Latn)": 5.13, + "Tatoeba (jpn-Jpan_eng-Latn)": 2.18, + "Tatoeba (lfn-Latn_eng-Latn)": 7.52, + "Tatoeba (hun-Latn_eng-Latn)": 3.93, + "Tatoeba (lat-Latn_eng-Latn)": 7.14, + "Tatoeba (tgl-Latn_eng-Latn)": 3.34, + "Tatoeba (kur-Latn_eng-Latn)": 7.3, + "Tatoeba (war-Latn_eng-Latn)": 6.18, + "Tatoeba (kab-Latn_eng-Latn)": 0.91, + "Tatoeba (kaz-Cyrl_eng-Latn)": 0.82, + "Tatoeba (slv-Latn_eng-Latn)": 4.52, + "Tatoeba (nds-Latn_eng-Latn)": 11.35, + "Tatoeba (pam-Latn_eng-Latn)": 4.73, + "Tatoeba (bul-Cyrl_eng-Latn)": 0.23, + "Tatoeba (ces-Latn_eng-Latn)": 4.2, + "Tatoeba (nno-Latn_eng-Latn)": 7.45, + "Tatoeba (ben-Beng_eng-Latn)": 0.02, + "Tatoeba (amh-Ethi_eng-Latn)": 0.01, + "Tatoeba (lit-Latn_eng-Latn)": 1.56, + "Tatoeba (pes-Arab_eng-Latn)": 0.3, + "Tatoeba (jav-Latn_eng-Latn)": 3.5, + "Tatoeba (mal-Mlym_eng-Latn)": 0.24, + "Tatoeba (lvs-Latn_eng-Latn)": 3.45, + "Tatoeba (gsw-Latn_eng-Latn)": 9.9, + "Tatoeba (fra-Latn_eng-Latn)": 17.53, + "Tatoeba (orv-Cyrl_eng-Latn)": 0.15, + "Tatoeba (kat-Geor_eng-Latn)": 0.45, + "Tatoeba (awa-Deva_eng-Latn)": 0.44, + "Tatoeba (epo-Latn_eng-Latn)": 8.5, + "Tatoeba (mhr-Cyrl_eng-Latn)": 0.0, + "Tatoeba (dan-Latn_eng-Latn)": 10.21, + "Tatoeba (bel-Cyrl_eng-Latn)": 0.85, + "Tatoeba (nld-Latn_eng-Latn)": 12.56, + "Tatoeba (mkd-Cyrl_eng-Latn)": 0.01, + "Tatoeba (mon-Cyrl_eng-Latn)": 0.06, + "Tatoeba (ast-Latn_eng-Latn)": 9.99, + "Tatoeba (cat-Latn_eng-Latn)": 11.79, + "Tatoeba (oci-Latn_eng-Latn)": 8.72, + "Tatoeba (khm-Khmr_eng-Latn)": 0.42, + "Tatoeba (urd-Arab_eng-Latn)": 0.0, + "Tatoeba (tzl-Latn_eng-Latn)": 6.87, + "Tatoeba (arq-Arab_eng-Latn)": 0.28, + "Tatoeba (uig-Arab_eng-Latn)": 0.4, + "Tatoeba (dsb-Latn_eng-Latn)": 3.06, + "Tatoeba (hsb-Latn_eng-Latn)": 2.89, + "Tatoeba (kzj-Latn_eng-Latn)": 3.64, + "Tatoeba (cbk-Latn_eng-Latn)": 9.76, + "Tatoeba (afr-Latn_eng-Latn)": 7.59, + "Tatoeba (gle-Latn_eng-Latn)": 3.08, + "Tatoeba (csb-Latn_eng-Latn)": 5.21, + "Tatoeba (mar-Deva_eng-Latn)": 0.04, + "Tatoeba (arz-Arab_eng-Latn)": 0.0, + "Tatoeba (tat-Cyrl_eng-Latn)": 0.75, + "Tatoeba (hin-Deva_eng-Latn)": 0.0, + "Tatoeba (ang-Latn_eng-Latn)": 14.63, + "Tatoeba (heb-Hebr_eng-Latn)": 0.3, + "Tatoeba (tuk-Latn_eng-Latn)": 2.66, + "Tatoeba (ile-Latn_eng-Latn)": 17.43, + "Tatoeba (zsm-Latn_eng-Latn)": 5.99, + "Tatoeba (kor-Hang_eng-Latn)": 0.9, + "Tatoeba (uzb-Latn_eng-Latn)": 2.2, + "Tatoeba (fin-Latn_eng-Latn)": 3.65, + "Tatoeba (hye-Armn_eng-Latn)": 0.5, + "Tatoeba (ukr-Cyrl_eng-Latn)": 0.57, + "Tatoeba (swh-Latn_eng-Latn)": 5.82, + "Tatoeba (gla-Latn_eng-Latn)": 2.58, + "Tatoeba (aze-Latn_eng-Latn)": 1.47, + "Tatoeba (ara-Arab_eng-Latn)": 0.43, + "Tatoeba (eus-Latn_eng-Latn)": 6.58, + "Tatoeba (deu-Latn_eng-Latn)": 13.89, + "Tatoeba (por-Latn_eng-Latn)": 11.36, + "Tatoeba (ber-Tfng_eng-Latn)": 4.72, + "Tatoeba (sqi-Latn_eng-Latn)": 5.86, + "Tatoeba (pms-Latn_eng-Latn)": 8.94, + "Tatoeba (ina-Latn_eng-Latn)": 25.36, + "Tatoeba (ido-Latn_eng-Latn)": 11.08, + "Tatoeba (slk-Latn_eng-Latn)": 4.2, + "Tatoeba (glg-Latn_eng-Latn)": 12.6, + "Tatoeba (nov-Latn_eng-Latn)": 19.45, + "Tatoeba (tel-Telu_eng-Latn)": 0.67, + "Tatoeba (tam-Taml_eng-Latn)": 0.33, + "Tatoeba (bre-Latn_eng-Latn)": 3.68, + "Tatoeba (tha-Thai_eng-Latn)": 0.67, + "Tatoeba (nob-Latn_eng-Latn)": 8.02, + "Tatoeba (est-Latn_eng-Latn)": 2.6, + "Tatoeba (wuu-Hans_eng-Latn)": 1.89, + "Tatoeba (swg-Latn_eng-Latn)": 11.9, + "Tatoeba (max-Deva_eng-Latn)": 8.4, + "Tatoeba (srp-Cyrl_eng-Latn)": 2.22, + "Tatoeba (yue-Hant_eng-Latn)": 1.89, + "Tatoeba (rus-Cyrl_eng-Latn)": 0.07, + "Tatoeba (ell-Grek_eng-Latn)": 0.2, + "Tatoeba (ceb-Latn_eng-Latn)": 3.95, + "Tatoeba (yid-Hebr_eng-Latn)": 0.19, + "Tatoeba (cmn-Hans_eng-Latn)": 2.45 + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "all-MiniLM-L12-v2", + "AllegroReviews (pol-Latn)": 23.85, + "AmazonCounterfactualClassification (de)": 57.1, + "AmazonCounterfactualClassification (en)": 65.28, + "AmazonCounterfactualClassification (en-ext)": 67.24, + "AmazonCounterfactualClassification (ja)": 59.91, + "AmazonCounterfactualClassification (deu-Latn)": 57.13, + "AmazonCounterfactualClassification (jpn-Jpan)": 59.94, + "AmazonPolarityClassification": 62.99, + "AmazonReviewsClassification (de)": 25.91, + "AmazonReviewsClassification (en)": 30.79, + "AmazonReviewsClassification (es)": 27.63, + "AmazonReviewsClassification (fr)": 27.54, + "AmazonReviewsClassification (ja)": 23.57, + "AmazonReviewsClassification (zh)": 22.99, + "AmazonReviewsClassification (deu-Latn)": 25.92, + "AmazonReviewsClassification (spa-Latn)": 27.64, + "AmazonReviewsClassification (fra-Latn)": 27.53, + "AmazonReviewsClassification (jpn-Jpan)": 23.57, + "AmazonReviewsClassification (cmn-Hans)": 22.99, + "AngryTweetsClassification (dan-Latn)": 42.87, + "Banking77Classification": 80.41, + "CBD (pol-Latn)": 48.46, + "DanishPoliticalCommentsClassification (dan-Latn)": 27.07, + "EmotionClassification": 41.17, + "IFlyTek (cmn-Hans)": 15.31, + "ImdbClassification": 59.78, + "JDReview (cmn-Hans)": 59.57, + "LccSentimentClassification (dan-Latn)": 41.93, + "MTOPDomainClassification (de)": 72.04, + "MTOPDomainClassification (en)": 91.88, + "MTOPDomainClassification (es)": 72.99, + "MTOPDomainClassification (fr)": 75.59, + "MTOPDomainClassification (hi)": 40.36, + "MTOPDomainClassification (th)": 17.1, + "MTOPDomainClassification (deu-Latn)": 72.04, + "MTOPDomainClassification (spa-Latn)": 72.99, + "MTOPDomainClassification (fra-Latn)": 75.57, + "MTOPDomainClassification (hin-Deva)": 40.4, + "MTOPDomainClassification (tha-Thai)": 16.36, + "MTOPIntentClassification (de)": 43.41, + "MTOPIntentClassification (en)": 62.83, + "MTOPIntentClassification (es)": 41.88, + "MTOPIntentClassification (fr)": 38.94, + "MTOPIntentClassification (hi)": 17.75, + "MTOPIntentClassification (th)": 5.63, + "MTOPIntentClassification (deu-Latn)": 43.42, + "MTOPIntentClassification (spa-Latn)": 41.91, + "MTOPIntentClassification (fra-Latn)": 38.96, + "MTOPIntentClassification (hin-Deva)": 17.76, + "MTOPIntentClassification (tha-Thai)": 6.13, + "MasakhaNEWSClassification (fra)": 72.2, + "MasakhaNEWSClassification (amh-Ethi)": 30.64, + "MasakhaNEWSClassification (eng)": 76.62, + "MasakhaNEWSClassification (fra-Latn)": 67.18, + "MasakhaNEWSClassification (hau-Latn)": 52.59, + "MasakhaNEWSClassification (ibo-Latn)": 54.26, + "MasakhaNEWSClassification (lin-Latn)": 62.23, + "MasakhaNEWSClassification (lug-Latn)": 47.62, + "MasakhaNEWSClassification (orm-Ethi)": 47.17, + "MasakhaNEWSClassification (pcm-Latn)": 91.77, + "MasakhaNEWSClassification (run-Latn)": 54.47, + "MasakhaNEWSClassification (sna-Latn)": 66.53, + "MasakhaNEWSClassification (som-Latn)": 40.27, + "MasakhaNEWSClassification (swa-Latn)": 47.77, + "MasakhaNEWSClassification (tir-Ethi)": 21.18, + "MasakhaNEWSClassification (xho-Latn)": 54.34, + "MasakhaNEWSClassification (yor-Latn)": 58.61, + "MassiveIntentClassification (af)": 38.94, + "MassiveIntentClassification (am)": 2.45, + "MassiveIntentClassification (ar)": 20.94, + "MassiveIntentClassification (az)": 34.25, + "MassiveIntentClassification (bn)": 13.67, + "MassiveIntentClassification (cy)": 35.71, + "MassiveIntentClassification (da)": 44.43, + "MassiveIntentClassification (de)": 44.17, + "MassiveIntentClassification (el)": 28.7, + "MassiveIntentClassification (en)": 67.11, + "MassiveIntentClassification (es)": 40.91, + "MassiveIntentClassification (fa)": 23.52, + "MassiveIntentClassification (fi)": 39.27, + "MassiveIntentClassification (fr)": 44.82, + "MassiveIntentClassification (he)": 23.65, + "MassiveIntentClassification (hi)": 17.98, + "MassiveIntentClassification (hu)": 38.0, + "MassiveIntentClassification (hy)": 8.69, + "MassiveIntentClassification (id)": 39.66, + "MassiveIntentClassification (is)": 35.14, + "MassiveIntentClassification (it)": 43.17, + "MassiveIntentClassification (ja)": 30.94, + "MassiveIntentClassification (jv)": 36.69, + "MassiveIntentClassification (ka)": 9.17, + "MassiveIntentClassification (km)": 4.99, + "MassiveIntentClassification (kn)": 3.08, + "MassiveIntentClassification (ko)": 19.97, + "MassiveIntentClassification (lv)": 38.61, + "MassiveIntentClassification (ml)": 2.85, + "MassiveIntentClassification (mn)": 23.25, + "MassiveIntentClassification (ms)": 36.21, + "MassiveIntentClassification (my)": 4.38, + "MassiveIntentClassification (nb)": 41.91, + "MassiveIntentClassification (nl)": 41.85, + "MassiveIntentClassification (pl)": 37.63, + "MassiveIntentClassification (pt)": 45.12, + "MassiveIntentClassification (ro)": 41.71, + "MassiveIntentClassification (ru)": 26.33, + "MassiveIntentClassification (sl)": 38.52, + "MassiveIntentClassification (sq)": 41.62, + "MassiveIntentClassification (sv)": 40.42, + "MassiveIntentClassification (sw)": 35.28, + "MassiveIntentClassification (ta)": 13.1, + "MassiveIntentClassification (te)": 2.56, + "MassiveIntentClassification (th)": 10.54, + "MassiveIntentClassification (tl)": 38.56, + "MassiveIntentClassification (tr)": 35.9, + "MassiveIntentClassification (ur)": 16.18, + "MassiveIntentClassification (vi)": 37.38, + "MassiveIntentClassification (zh-CN)": 23.74, + "MassiveIntentClassification (zh-TW)": 22.39, + "MassiveIntentClassification (jpn-Jpan)": 30.89, + "MassiveIntentClassification (khm-Khmr)": 4.99, + "MassiveIntentClassification (slv-Latn)": 38.48, + "MassiveIntentClassification (hye-Armn)": 8.69, + "MassiveIntentClassification (ita-Latn)": 43.16, + "MassiveIntentClassification (fin-Latn)": 39.19, + "MassiveIntentClassification (afr-Latn)": 38.84, + "MassiveIntentClassification (kor-Kore)": 19.97, + "MassiveIntentClassification (ben-Beng)": 13.7, + "MassiveIntentClassification (heb-Hebr)": 23.71, + "MassiveIntentClassification (dan-Latn)": 44.35, + "MassiveIntentClassification (fra-Latn)": 44.75, + "MassiveIntentClassification (pol-Latn)": 37.59, + "MassiveIntentClassification (por-Latn)": 45.08, + "MassiveIntentClassification (tha-Thai)": 10.46, + "MassiveIntentClassification (nob-Latn)": 41.79, + "MassiveIntentClassification (kat-Geor)": 9.17, + "MassiveIntentClassification (tgl-Latn)": 38.63, + "MassiveIntentClassification (swe-Latn)": 40.33, + "MassiveIntentClassification (hun-Latn)": 37.95, + "MassiveIntentClassification (cmo-Hant)": 22.38, + "MassiveIntentClassification (hin-Deva)": 18.0, + "MassiveIntentClassification (tur-Latn)": 35.93, + "MassiveIntentClassification (vie-Latn)": 37.35, + "MassiveIntentClassification (mal-Mlym)": 2.83, + "MassiveIntentClassification (aze-Latn)": 34.3, + "MassiveIntentClassification (amh-Ethi)": 2.45, + "MassiveIntentClassification (kan-Knda)": 3.07, + "MassiveIntentClassification (deu-Latn)": 44.12, + "MassiveIntentClassification (rus-Cyrl)": 26.29, + "MassiveIntentClassification (ara-Arab)": 21.02, + "MassiveIntentClassification (msa-Latn)": 36.16, + "MassiveIntentClassification (nld-Latn)": 41.77, + "MassiveIntentClassification (fas-Arab)": 23.56, + "MassiveIntentClassification (isl-Latn)": 35.17, + "MassiveIntentClassification (cym-Latn)": 35.65, + "MassiveIntentClassification (cmo-Hans)": 23.74, + "MassiveIntentClassification (ell-Grek)": 28.68, + "MassiveIntentClassification (spa-Latn)": 40.82, + "MassiveIntentClassification (ind-Latn)": 39.65, + "MassiveIntentClassification (jav-Latn)": 36.67, + "MassiveIntentClassification (mon-Cyrl)": 23.27, + "MassiveIntentClassification (mya-Mymr)": 4.36, + "MassiveIntentClassification (sqi-Latn)": 41.47, + "MassiveIntentClassification (tel-Telu)": 2.54, + "MassiveIntentClassification (ron-Latn)": 41.64, + "MassiveIntentClassification (tam-Taml)": 13.12, + "MassiveIntentClassification (swa-Latn)": 35.26, + "MassiveIntentClassification (urd-Arab)": 16.26, + "MassiveIntentClassification (lav-Latn)": 38.54, + "MassiveScenarioClassification (af)": 45.71, + "MassiveScenarioClassification (am)": 7.41, + "MassiveScenarioClassification (ar)": 27.62, + "MassiveScenarioClassification (az)": 39.58, + "MassiveScenarioClassification (bn)": 18.98, + "MassiveScenarioClassification (cy)": 41.4, + "MassiveScenarioClassification (da)": 49.47, + "MassiveScenarioClassification (de)": 52.07, + "MassiveScenarioClassification (el)": 35.51, + "MassiveScenarioClassification (en)": 74.57, + "MassiveScenarioClassification (es)": 50.74, + "MassiveScenarioClassification (fa)": 29.0, + "MassiveScenarioClassification (fi)": 45.8, + "MassiveScenarioClassification (fr)": 53.76, + "MassiveScenarioClassification (he)": 25.68, + "MassiveScenarioClassification (hi)": 23.02, + "MassiveScenarioClassification (hu)": 44.09, + "MassiveScenarioClassification (hy)": 14.83, + "MassiveScenarioClassification (id)": 44.35, + "MassiveScenarioClassification (is)": 43.08, + "MassiveScenarioClassification (it)": 51.71, + "MassiveScenarioClassification (ja)": 36.75, + "MassiveScenarioClassification (jv)": 44.57, + "MassiveScenarioClassification (ka)": 14.84, + "MassiveScenarioClassification (km)": 9.75, + "MassiveScenarioClassification (kn)": 8.32, + "MassiveScenarioClassification (ko)": 25.72, + "MassiveScenarioClassification (lv)": 42.75, + "MassiveScenarioClassification (ml)": 7.25, + "MassiveScenarioClassification (mn)": 29.03, + "MassiveScenarioClassification (ms)": 44.65, + "MassiveScenarioClassification (my)": 10.07, + "MassiveScenarioClassification (nb)": 47.36, + "MassiveScenarioClassification (nl)": 49.15, + "MassiveScenarioClassification (pl)": 44.72, + "MassiveScenarioClassification (pt)": 53.0, + "MassiveScenarioClassification (ro)": 49.97, + "MassiveScenarioClassification (ru)": 28.75, + "MassiveScenarioClassification (sl)": 42.26, + "MassiveScenarioClassification (sq)": 49.14, + "MassiveScenarioClassification (sv)": 46.83, + "MassiveScenarioClassification (sw)": 43.18, + "MassiveScenarioClassification (ta)": 19.38, + "MassiveScenarioClassification (te)": 7.74, + "MassiveScenarioClassification (th)": 18.32, + "MassiveScenarioClassification (tl)": 48.31, + "MassiveScenarioClassification (tr)": 41.79, + "MassiveScenarioClassification (ur)": 24.46, + "MassiveScenarioClassification (vi)": 40.94, + "MassiveScenarioClassification (zh-CN)": 33.18, + "MassiveScenarioClassification (zh-TW)": 31.16, + "MassiveScenarioClassification (jav-Latn)": 44.54, + "MassiveScenarioClassification (aze-Latn)": 39.62, + "MassiveScenarioClassification (cmo-Hans)": 33.19, + "MassiveScenarioClassification (swa-Latn)": 43.18, + "MassiveScenarioClassification (fra-Latn)": 53.77, + "MassiveScenarioClassification (mon-Cyrl)": 29.01, + "MassiveScenarioClassification (kat-Geor)": 14.85, + "MassiveScenarioClassification (ben-Beng)": 18.98, + "MassiveScenarioClassification (ind-Latn)": 44.37, + "MassiveScenarioClassification (kor-Kore)": 25.72, + "MassiveScenarioClassification (lav-Latn)": 42.75, + "MassiveScenarioClassification (deu-Latn)": 52.08, + "MassiveScenarioClassification (hun-Latn)": 44.1, + "MassiveScenarioClassification (tam-Taml)": 19.4, + "MassiveScenarioClassification (afr-Latn)": 45.72, + "MassiveScenarioClassification (nob-Latn)": 47.35, + "MassiveScenarioClassification (urd-Arab)": 24.45, + "MassiveScenarioClassification (tha-Thai)": 18.32, + "MassiveScenarioClassification (ita-Latn)": 51.7, + "MassiveScenarioClassification (sqi-Latn)": 49.12, + "MassiveScenarioClassification (mya-Mymr)": 10.06, + "MassiveScenarioClassification (ara-Arab)": 27.66, + "MassiveScenarioClassification (tur-Latn)": 41.8, + "MassiveScenarioClassification (khm-Khmr)": 9.75, + "MassiveScenarioClassification (cym-Latn)": 41.43, + "MassiveScenarioClassification (cmo-Hant)": 31.14, + "MassiveScenarioClassification (hye-Armn)": 14.87, + "MassiveScenarioClassification (ell-Grek)": 35.55, + "MassiveScenarioClassification (ron-Latn)": 49.94, + "MassiveScenarioClassification (kan-Knda)": 8.32, + "MassiveScenarioClassification (jpn-Jpan)": 36.77, + "MassiveScenarioClassification (fin-Latn)": 45.8, + "MassiveScenarioClassification (swe-Latn)": 46.81, + "MassiveScenarioClassification (dan-Latn)": 49.5, + "MassiveScenarioClassification (msa-Latn)": 44.67, + "MassiveScenarioClassification (hin-Deva)": 23.03, + "MassiveScenarioClassification (tgl-Latn)": 48.29, + "MassiveScenarioClassification (pol-Latn)": 44.74, + "MassiveScenarioClassification (isl-Latn)": 43.11, + "MassiveScenarioClassification (por-Latn)": 53.0, + "MassiveScenarioClassification (slv-Latn)": 42.24, + "MassiveScenarioClassification (rus-Cyrl)": 28.77, + "MassiveScenarioClassification (tel-Telu)": 7.74, + "MassiveScenarioClassification (heb-Hebr)": 25.73, + "MassiveScenarioClassification (fas-Arab)": 29.0, + "MassiveScenarioClassification (vie-Latn)": 40.97, + "MassiveScenarioClassification (nld-Latn)": 49.14, + "MassiveScenarioClassification (spa-Latn)": 50.73, + "MassiveScenarioClassification (mal-Mlym)": 7.25, + "MassiveScenarioClassification (amh-Ethi)": 7.41, + "MultilingualSentiment (cmn-Hans)": 40.52, + "NoRecClassification (nob-Latn)": 37.73, + "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 54.17, + "OnlineShopping (cmn-Hans)": 58.65, + "PAC (pol-Latn)": 59.53, + "PolEmo2.0-IN (pol-Latn)": 38.32, + "PolEmo2.0-OUT (pol-Latn)": 22.98, + "TNews (cmn-Hans)": 20.37, + "ToxicConversationsClassification": 63.34, + "TweetSentimentExtractionClassification": 54.24, + "Waimai (cmn-Hans)": 63.48 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "all-MiniLM-L12-v2", + "AlloProfClusteringP2P": 46.03, + "AlloProfClusteringS2S": 31.83, + "ArxivClusteringP2P": 46.07, + "ArxivClusteringS2S": 37.5, + "BiorxivClusteringP2P": 36.99, + "BiorxivClusteringS2S": 33.21, + "HALClusteringS2S": 19.58, + "MLSUMClusteringP2P": 34.35, + "MLSUMClusteringS2S": 29.3, + "MasakhaNEWSClusteringP2P (fra)": 42.72, + "MasakhaNEWSClusteringP2P (amh-Ethi)": 40.5, + "MasakhaNEWSClusteringP2P (eng)": 55.86, + "MasakhaNEWSClusteringP2P (fra-Latn)": 42.72, + "MasakhaNEWSClusteringP2P (hau-Latn)": 26.61, + "MasakhaNEWSClusteringP2P (ibo-Latn)": 44.26, + "MasakhaNEWSClusteringP2P (lin-Latn)": 54.52, + "MasakhaNEWSClusteringP2P (lug-Latn)": 43.87, + "MasakhaNEWSClusteringP2P (orm-Ethi)": 24.87, + "MasakhaNEWSClusteringP2P (pcm-Latn)": 74.42, + "MasakhaNEWSClusteringP2P (run-Latn)": 51.73, + "MasakhaNEWSClusteringP2P (sna-Latn)": 46.89, + "MasakhaNEWSClusteringP2P (som-Latn)": 31.17, + "MasakhaNEWSClusteringP2P (swa-Latn)": 23.72, + "MasakhaNEWSClusteringP2P (tir-Ethi)": 44.08, + "MasakhaNEWSClusteringP2P (xho-Latn)": 26.97, + "MasakhaNEWSClusteringP2P (yor-Latn)": 32.51, + "MasakhaNEWSClusteringS2S (fra)": 32.47, + "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.11, + "MasakhaNEWSClusteringS2S (eng)": 40.71, + "MasakhaNEWSClusteringS2S (fra-Latn)": 32.47, + "MasakhaNEWSClusteringS2S (hau-Latn)": 20.63, + "MasakhaNEWSClusteringS2S (ibo-Latn)": 35.33, + "MasakhaNEWSClusteringS2S (lin-Latn)": 54.52, + "MasakhaNEWSClusteringS2S (lug-Latn)": 51.42, + "MasakhaNEWSClusteringS2S (orm-Ethi)": 24.84, + "MasakhaNEWSClusteringS2S (pcm-Latn)": 70.72, + "MasakhaNEWSClusteringS2S (run-Latn)": 50.88, + "MasakhaNEWSClusteringS2S (sna-Latn)": 46.6, + "MasakhaNEWSClusteringS2S (som-Latn)": 29.87, + "MasakhaNEWSClusteringS2S (swa-Latn)": 10.82, + "MasakhaNEWSClusteringS2S (tir-Ethi)": 43.63, + "MasakhaNEWSClusteringS2S (xho-Latn)": 24.55, + "MasakhaNEWSClusteringS2S (yor-Latn)": 32.85, + "MedrxivClusteringP2P": 34.25, + "MedrxivClusteringS2S": 32.24, + "RedditClustering": 51.18, + "RedditClusteringP2P": 54.8, + "StackExchangeClustering": 53.05, + "StackExchangeClusteringP2P": 33.13, + "TwentyNewsgroupsClustering": 47.47 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "all-MiniLM-L12-v2", + "CDSC-E (pol-Latn)": 49.04, + "OpusparcusPC (deu-Latn)": 91.2, + "OpusparcusPC (en)": 97.41, + "OpusparcusPC (fin-Latn)": 85.99, + "OpusparcusPC (fra-Latn)": 87.35, + "OpusparcusPC (rus-Cyrl)": 79.23, + "OpusparcusPC (swe-Latn)": 84.87, + "PSC (pol-Latn)": 87.92, + "PawsXPairClassification (deu-Latn)": 50.83, + "PawsXPairClassification (en)": 58.62, + "PawsXPairClassification (spa-Latn)": 52.08, + "PawsXPairClassification (fra-Latn)": 55.54, + "PawsXPairClassification (jpn-Hira)": 47.75, + "PawsXPairClassification (kor-Hang)": 49.59, + "PawsXPairClassification (cmn-Hans)": 52.8, + "SICK-E-PL (pol-Latn)": 49.63, + "SprintDuplicateQuestions": 92.45, + "TwitterSemEval2015": 70.02, + "TwitterURLCorpus": 84.77 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "all-MiniLM-L12-v2", + "AlloprofReranking (fra-Latn)": 67.01, + "AskUbuntuDupQuestions": 64.06, + "MMarcoReranking (cmn-Hans)": 5.27, + "MindSmallReranking": 31.02, + "SciDocsRR": 87.2, + "StackOverflowDupQuestions": 51.47, + "SyntecReranking (fra-Latn)": 69.17, + "T2Reranking (cmn-Hans)": 60.32 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "all-MiniLM-L12-v2", + "AILACasedocs": 16.8, + "AILAStatutes": 20.71, + "ARCChallenge": 10.23, + "AlloprofRetrieval": 33.2, + "AlloprofRetrieval (fra-Latn)": 33.2, + "AlphaNLI": 25.35, + "ArguAna": 47.13, + "ArguAna-PL (pol-Latn)": 13.4, + "BSARDRetrieval (fra-Latn)": 6.24, + "CQADupstackRetrieval": 42.53, + "ClimateFEVER": 21.57, + "CmedqaRetrieval (cmn-Hans)": 2.58, + "CovidRetrieval (cmn-Hans)": 10.79, + "DBPedia": 33.35, + "DuRetrieval (cmn-Hans)": 6.62, + "EcomRetrieval (cmn-Hans)": 4.01, + "FEVER": 55.9, + "FiQA-PL (pol-Latn)": 5.82, + "FiQA2018": 37.27, + "GerDaLIRSmall (deu-Latn)": 1.35, + "HellaSwag": 24.08, + "HotpotQA": 44.59, + "LEMBNarrativeQARetrieval": 19.64, + "LEMBNeedleRetrieval": 12.25, + "LEMBPasskeyRetrieval": 14.75, + "LEMBQMSumRetrieval": 13.08, + "LEMBSummScreenFDRetrieval": 46.98, + "LEMBWikimQARetrieval": 44.88, + "LeCaRDv2 (zho-Hans)": 18.77, + "LegalBenchConsumerContractsQA": 60.21, + "LegalBenchCorporateLobbying": 88.69, + "LegalQuAD (deu-Latn)": 7.44, + "LegalSummarization": 57.43, + "MMarcoRetrieval (cmn-Hans)": 7.46, + "MSMARCO": 39.03, + "MedicalRetrieval (cmn-Hans)": 2.3, + "MintakaRetrieval (ara-Arab)": 2.74, + "MintakaRetrieval (deu-Latn)": 20.04, + "MintakaRetrieval (spa-Latn)": 11.76, + "MintakaRetrieval (fra-Latn)": 16.08, + "MintakaRetrieval (hin-Deva)": 3.04, + "MintakaRetrieval (ita-Latn)": 11.83, + "MintakaRetrieval (jpn-Hira)": 7.29, + "MintakaRetrieval (por-Latn)": 13.66, + "NFCorpus": 32.25, + "NFCorpus-PL (pol-Latn)": 15.43, + "NQ": 46.47, + "PIQA": 26.44, + "Quail": 3.08, + "QuoraRetrieval": 87.75, + "RARbCode": 42.44, + "RARbMath": 66.36, + "SCIDOCS": 21.82, + "SCIDOCS-PL (pol-Latn)": 5.34, + "SIQA": 2.09, + "SciFact": 62.64, + "SciFact-PL (pol-Latn)": 22.48, + "SpartQA": 2.67, + "SyntecRetrieval (fra-Latn)": 60.8, + "T2Retrieval (cmn-Hans)": 4.82, + "TRECCOVID": 50.82, + "TRECCOVID-PL (pol-Latn)": 16.54, + "TempReasonL1": 1.66, + "TempReasonL2Fact": 10.31, + "TempReasonL2Pure": 0.63, + "TempReasonL3Fact": 11.11, + "TempReasonL3Pure": 6.63, + "Touche2020": 17.22, + "VideoRetrieval (cmn-Hans)": 9.38, + "WinoGrande": 27.2, + "XPQARetrieval (fr)": 55.9, + "XPQARetrieval (ara-Arab_ara-Arab)": 7.83, + "XPQARetrieval (eng-Latn_ara-Arab)": 2.52, + "XPQARetrieval (ara-Arab_eng-Latn)": 8.88, + "XPQARetrieval (deu-Latn_deu-Latn)": 56.77, + "XPQARetrieval (eng-Latn_deu-Latn)": 18.2, + "XPQARetrieval (deu-Latn_eng-Latn)": 30.06, + "XPQARetrieval (spa-Latn_spa-Latn)": 42.22, + "XPQARetrieval (eng-Latn_spa-Latn)": 7.53, + "XPQARetrieval (spa-Latn_eng-Latn)": 26.27, + "XPQARetrieval (fra-Latn_fra-Latn)": 55.9, + "XPQARetrieval (eng-Latn_fra-Latn)": 14.89, + "XPQARetrieval (fra-Latn_eng-Latn)": 34.2, + "XPQARetrieval (hin-Deva_hin-Deva)": 33.26, + "XPQARetrieval (eng-Latn_hin-Deva)": 6.44, + "XPQARetrieval (hin-Deva_eng-Latn)": 6.98, + "XPQARetrieval (ita-Latn_ita-Latn)": 58.68, + "XPQARetrieval (eng-Latn_ita-Latn)": 8.56, + "XPQARetrieval (ita-Latn_eng-Latn)": 28.71, + "XPQARetrieval (jpn-Hira_jpn-Hira)": 39.53, + "XPQARetrieval (eng-Latn_jpn-Hira)": 5.7, + "XPQARetrieval (jpn-Hira_eng-Latn)": 13.75, + "XPQARetrieval (kor-Hang_kor-Hang)": 13.48, + "XPQARetrieval (eng-Latn_kor-Hang)": 7.43, + "XPQARetrieval (kor-Hang_eng-Latn)": 7.34, + "XPQARetrieval (pol-Latn_pol-Latn)": 28.07, + "XPQARetrieval (eng-Latn_pol-Latn)": 10.03, + "XPQARetrieval (pol-Latn_eng-Latn)": 16.58, + "XPQARetrieval (por-Latn_por-Latn)": 34.09, + "XPQARetrieval (eng-Latn_por-Latn)": 7.38, + "XPQARetrieval (por-Latn_eng-Latn)": 22.59, + "XPQARetrieval (tam-Taml_tam-Taml)": 9.13, + "XPQARetrieval (eng-Latn_tam-Taml)": 4.15, + "XPQARetrieval (tam-Taml_eng-Latn)": 3.76, + "XPQARetrieval (cmn-Hans_cmn-Hans)": 21.09, + "XPQARetrieval (eng-Latn_cmn-Hans)": 6.58, + "XPQARetrieval (cmn-Hans_eng-Latn)": 9.39 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "all-MiniLM-L12-v2", + "AFQMC (cmn-Hans)": 7.94, + "ATEC (cmn-Hans)": 12.97, + "BIOSSES": 83.57, + "BQ (cmn-Hans)": 23.31, + "CDSC-R (pol-Latn)": 82.5, + "LCQMC (cmn-Hans)": 21.04, + "PAWSX (cmn-Hans)": 7.31, + "SICK-R": 79.32, + "SICK-R-PL (pol-Latn)": 54.26, + "SICKFr (fra-Latn)": 63.16, + "STS12": 73.08, + "STS13": 82.13, + "STS14": 76.73, + "STS15": 85.58, + "STS16": 80.23, + "STS17 (ar-ar)": 58.71, + "STS17 (en-ar)": 0.54, + "STS17 (en-de)": 27.54, + "STS17 (en-en)": 88.63, + "STS17 (en-tr)": 0.43, + "STS17 (es-en)": 22.01, + "STS17 (es-es)": 78.37, + "STS17 (fr-en)": 30.7, + "STS17 (it-en)": 24.28, + "STS17 (ko-ko)": 43.37, + "STS17 (nl-en)": 24.51, + "STS17 (nld-Latn_eng-Latn)": 24.51, + "STS17 (eng-Latn_ara-Arab)": 0.54, + "STS17 (ara-Arab)": 58.71, + "STS17 (kor-Hang)": 43.37, + "STS17 (eng-Latn_tur-Latn)": 0.43, + "STS17 (ita-Latn_eng-Latn)": 24.28, + "STS17 (eng-Latn_deu-Latn)": 27.54, + "STS17 (fra-Latn_eng-Latn)": 30.7, + "STS17 (spa-Latn)": 78.37, + "STS17 (spa-Latn_eng-Latn)": 22.01, + "STS22 (ar)": 17.54, + "STS22 (de)": 22.53, + "STS22 (de-en)": 42.86, + "STS22 (de-fr)": 43.52, + "STS22 (de-pl)": 1.63, + "STS22 (en)": 66.0, + "STS22 (es)": 43.98, + "STS22 (es-en)": 53.99, + "STS22 (es-it)": 40.71, + "STS22 (fr)": 69.51, + "STS22 (fr-pl)": 16.9, + "STS22 (it)": 47.48, + "STS22 (pl)": 19.22, + "STS22 (pl-en)": 42.67, + "STS22 (ru)": 11.19, + "STS22 (tr)": 21.6, + "STS22 (zh)": 33.15, + "STS22 (zh-en)": 44.39, + "STS22 (ara-Arab)": 17.54, + "STS22 (cmn-Hans)": 33.15, + "STS22 (fra-Latn)": 69.51, + "STS22 (deu-Latn_eng-Latn)": 42.86, + "STS22 (pol-Latn)": 19.22, + "STS22 (spa-Latn_eng-Latn)": 53.99, + "STS22 (pol-Latn_eng-Latn)": 42.67, + "STS22 (tur-Latn)": 21.6, + "STS22 (deu-Latn_fra-Latn)": 43.52, + "STS22 (fra-Latn_pol-Latn)": 16.9, + "STS22 (deu-Latn)": 22.53, + "STS22 (deu-Latn_pol-Latn)": 1.63, + "STS22 (spa-Latn)": 43.98, + "STS22 (cmn-Hans_eng-Latn)": 44.39, + "STS22 (spa-Latn_ita-Latn)": 40.71, + "STS22 (ita-Latn)": 47.48, + "STS22 (rus-Cyrl)": 11.19, + "STSB (cmn-Hans)": 36.66, + "STSBenchmark": 83.09, + "STSBenchmarkMultilingualSTS (pol-Latn)": 60.2, + "STSBenchmarkMultilingualSTS (cmn-Hans)": 38.93, + "STSBenchmarkMultilingualSTS (en)": 83.09, + "STSBenchmarkMultilingualSTS (ita-Latn)": 60.71, + "STSBenchmarkMultilingualSTS (fra-Latn)": 66.68, + "STSBenchmarkMultilingualSTS (por-Latn)": 63.85, + "STSBenchmarkMultilingualSTS (nld-Latn)": 60.03, + "STSBenchmarkMultilingualSTS (rus-Cyrl)": 56.09, + "STSBenchmarkMultilingualSTS (deu-Latn)": 63.28, + "STSBenchmarkMultilingualSTS (spa-Latn)": 65.33 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "all-MiniLM-L12-v2", + "SummEval": 27.9, + "SummEvalFr (fra-Latn)": 26.63 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "all-MiniLM-L12-v2" + } + ] + } + }, + "text-embedding-3-large": { + "BitextMining": { + "f1": [ + { + "Model": "text-embedding-3-large" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "text-embedding-3-large", + "AmazonCounterfactualClassification (en)": 78.93, + "AmazonPolarityClassification": 92.85, + "AmazonReviewsClassification (en)": 48.7, + "Banking77Classification": 85.69, + "EmotionClassification": 51.58, + "ImdbClassification": 87.67, + "MTOPDomainClassification (en)": 95.36, + "MTOPIntentClassification (en)": 75.07, + "MassiveIntentClassification (en)": 74.64, + "MassiveScenarioClassification (en)": 79.79, + "ToxicConversationsClassification": 72.92, + "TweetSentimentExtractionClassification": 62.22 + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "text-embedding-3-large", + "ArxivClusteringP2P": 49.01, + "ArxivClusteringS2S": 44.45, + "BiorxivClusteringP2P": 38.03, + "BiorxivClusteringS2S": 36.53, + "MedrxivClusteringP2P": 32.7, + "MedrxivClusteringS2S": 31.27, + "RedditClustering": 67.84, + "RedditClusteringP2P": 67.96, + "StackExchangeClustering": 76.26, + "StackExchangeClusteringP2P": 36.88, + "TwentyNewsgroupsClustering": 58.14 + } + ] + }, + "PairClassification": { + "ap": [ + { + "Model": "text-embedding-3-large", + "SprintDuplicateQuestions": 92.25, + "TwitterSemEval2015": 77.13, + "TwitterURLCorpus": 87.78 + } + ] + }, + "Reranking": { + "map": [ + { + "Model": "text-embedding-3-large", + "AskUbuntuDupQuestions": 65.03, + "MindSmallReranking": 29.86, + "SciDocsRR": 86.66, + "StackOverflowDupQuestions": 55.08 + } + ] + }, + "Retrieval": { + "ndcg_at_10": [ + { + "Model": "text-embedding-3-large", + "AILACasedocs": 39.0, + "AILAStatutes": 41.31, + "ARCChallenge": 23.98, + "AlphaNLI": 37.27, + "ArguAna": 58.05, + "BrightRetrieval (theoremqa_questions)": 22.22, + "BrightRetrieval (leetcode)": 23.65, + "BrightRetrieval (earth_science)": 26.27, + "BrightRetrieval (psychology)": 27.52, + "BrightRetrieval (robotics)": 12.93, + "BrightRetrieval (economics)": 19.98, + "BrightRetrieval (stackoverflow)": 12.49, + "BrightRetrieval (biology)": 23.67, + "BrightRetrieval (theoremqa_theorems)": 9.25, + "BrightRetrieval (pony)": 2.45, + "BrightRetrieval (sustainable_living)": 20.32, + "BrightRetrieval (aops)": 8.45, + "CQADupstackRetrieval": 47.54, + "ClimateFEVER": 30.27, + "DBPedia": 44.76, + "FEVER": 87.94, + "FiQA2018": 55.0, + "GerDaLIRSmall": 32.77, + "HellaSwag": 34.12, + "HotpotQA": 71.58, + "LEMBNarrativeQARetrieval": 44.09, + "LEMBNeedleRetrieval": 29.25, + "LEMBPasskeyRetrieval": 63.0, + "LEMBQMSumRetrieval": 32.49, + "LEMBSummScreenFDRetrieval": 84.8, + "LEMBWikimQARetrieval": 54.16, + "LeCaRDv2": 57.2, + "LegalBenchConsumerContractsQA": 79.39, + "LegalBenchCorporateLobbying": 95.09, + "LegalQuAD": 57.47, + "LegalSummarization": 71.55, + "MSMARCO": 40.24, + "NFCorpus": 42.07, + "NQ": 61.27, + "PIQA": 41.96, + "Quail": 10.15, + "QuoraRetrieval": 89.05, + "RARbCode": 89.64, + "RARbMath": 90.08, + "SCIDOCS": 23.11, + "SIQA": 3.44, + "SciFact": 77.77, + "SpartQA": 7.51, + "TRECCOVID": 79.56, + "TempReasonL1": 2.13, + "TempReasonL2Fact": 28.65, + "TempReasonL2Pure": 10.34, + "TempReasonL3Fact": 25.52, + "TempReasonL3Pure": 15.28, + "Touche2020": 23.35, + "WinoGrande": 29.11 + } + ] + }, + "STS": { + "spearman": [ + { + "Model": "text-embedding-3-large", + "BIOSSES": 84.68, + "SICK-R": 79.0, + "STS12": 72.84, + "STS13": 86.1, + "STS14": 81.15, + "STS15": 88.49, + "STS16": 85.08, + "STS17 (en-en)": 90.22, + "STS22 (en)": 66.14, + "STSBenchmark": 83.56 + } + ] + }, + "Summarization": { + "spearman": [ + { + "Model": "text-embedding-3-large", + "SummEval": 29.92 + } + ] + }, + "InstructionRetrieval": { + "p-MRR": [ + { + "Model": "text-embedding-3-large", + "Core17InstructionRetrieval": -0.2, + "News21InstructionRetrieval": -2.03, + "Robust04InstructionRetrieval": -5.81 + } + ] + } + }, + "bm25s": { + "BitextMining": { + "f1": [ + { + "Model": "bm25s" + } + ] + }, + "Classification": { + "accuracy": [ + { + "Model": "bm25s" + } + ] + }, + "Clustering": { + "v_measure": [ + { + "Model": "bm25s" } ] }, "PairClassification": { "ap": [ { - "Model": "GritLM-7B", - "CDSC-E (pol-Latn)": 72.65, - "OpusparcusPC (deu-Latn)": 96.65, - "OpusparcusPC (en)": 98.57, - "OpusparcusPC (fin-Latn)": 90.41, - "OpusparcusPC (fra-Latn)": 93.41, - "OpusparcusPC (rus-Cyrl)": 88.63, - "OpusparcusPC (swe-Latn)": 94.04, - "PSC (pol-Latn)": 99.43, - "PawsXPairClassification (deu-Latn)": 58.5, - "PawsXPairClassification (en)": 63.78, - "PawsXPairClassification (spa-Latn)": 59.15, - "PawsXPairClassification (fra-Latn)": 61.89, - "PawsXPairClassification (jpn-Hira)": 51.46, - "PawsXPairClassification (kor-Hang)": 52.15, - "PawsXPairClassification (cmn-Hans)": 57.66, - "SICK-E-PL (pol-Latn)": 75.98, - "SprintDuplicateQuestions (default)": 93.06, - "TwitterSemEval2015 (default)": 71.24, - "TwitterURLCorpus (default)": 84.54 + "Model": "bm25s" } ] }, "Reranking": { "map": [ { - "Model": "GritLM-7B", - "AlloprofReranking (fra-Latn)": 77.95, - "AskUbuntuDupQuestions (default)": 61.11, - "MMarcoReranking (cmn-Hans)": 21.7, - "MindSmallReranking (default)": 31.53, - "SciDocsRR (default)": 84.78, - "StackOverflowDupQuestions (default)": 50.95, - "SyntecReranking (fra-Latn)": 83.32, - "T2Reranking (cmn-Hans)": 65.63 + "Model": "bm25s" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "GritLM-7B", - "AILACasedocs (default)": 35.31, - "AILAStatutes (default)": 41.8, - "AlloprofRetrieval (fra-Latn)": 55.42, - "ArguAna (default)": 63.17, - "ArguAna-PL (pol-Latn)": 48.89, - "BSARDRetrieval (fra-Latn)": 26.63, - "BrightRetrieval (pony)": 21.98, - "BrightRetrieval (robotics)": 17.31, - "BrightRetrieval (economics)": 19.0, - "BrightRetrieval (theoremqa_questions)": 23.34, - "BrightRetrieval (leetcode)": 29.85, - "BrightRetrieval (earth_science)": 32.77, - "BrightRetrieval (stackoverflow)": 11.62, - "BrightRetrieval (sustainable_living)": 18.04, - "BrightRetrieval (biology)": 25.04, - "BrightRetrieval (psychology)": 19.92, - "BrightRetrieval (theoremqa_theorems)": 17.41, - "BrightRetrieval (aops)": 8.91, - "CmedqaRetrieval (cmn-Hans)": 35.58, - "CovidRetrieval (cmn-Hans)": 73.47, - "DuRetrieval (cmn-Hans)": 88.18, - "EcomRetrieval (cmn-Hans)": 54.33, - "FiQA-PL (pol-Latn)": 38.04, - "FiQA2018 (default)": 59.91, - "GerDaLIRSmall (deu-Latn)": 20.61, - "LEMBNarrativeQARetrieval (default)": 41.46, - "LEMBNeedleRetrieval": 33.25, - "LEMBPasskeyRetrieval": 38.25, - "LEMBQMSumRetrieval (default)": 30.32, - "LEMBSummScreenFDRetrieval (default)": 78.49, - "LEMBWikimQARetrieval (default)": 60.8, - "LeCaRDv2 (zho-Hans)": 64.05, - "LegalBenchConsumerContractsQA (default)": 82.1, - "LegalBenchCorporateLobbying (default)": 95.0, - "LegalQuAD (deu-Latn)": 44.18, - "LegalSummarization (default)": 70.64, - "MMarcoRetrieval (cmn-Hans)": 76.54, - "MedicalRetrieval (cmn-Hans)": 55.81, - "MintakaRetrieval (ara-Arab)": 25.88, - "MintakaRetrieval (deu-Latn)": 55.66, - "MintakaRetrieval (spa-Latn)": 53.36, - "MintakaRetrieval (fra-Latn)": 51.68, - "MintakaRetrieval (hin-Deva)": 26.06, - "MintakaRetrieval (ita-Latn)": 54.91, - "MintakaRetrieval (jpn-Hira)": 34.1, - "MintakaRetrieval (por-Latn)": 54.91, - "NFCorpus (default)": 40.86, - "NFCorpus-PL (pol-Latn)": 32.88, - "SCIDOCS (default)": 24.4, - "SCIDOCS-PL (pol-Latn)": 18.39, - "SciFact (default)": 79.13, - "SciFact-PL (pol-Latn)": 73.22, - "SyntecRetrieval (fra-Latn)": 89.48, - "T2Retrieval (cmn-Hans)": 82.96, - "TRECCOVID (default)": 74.36, - "TRECCOVID-PL (pol-Latn)": 58.01, - "Touche2020 (default)": 27.81, - "VideoRetrieval (cmn-Hans)": 53.85, - "XPQARetrieval (ara-Arab_ara-Arab)": 45.21, - "XPQARetrieval (eng-Latn_ara-Arab)": 27.32, - "XPQARetrieval (ara-Arab_eng-Latn)": 39.43, - "XPQARetrieval (deu-Latn_deu-Latn)": 76.58, - "XPQARetrieval (eng-Latn_deu-Latn)": 55.44, - "XPQARetrieval (deu-Latn_eng-Latn)": 72.56, - "XPQARetrieval (spa-Latn_spa-Latn)": 64.55, - "XPQARetrieval (eng-Latn_spa-Latn)": 45.49, - "XPQARetrieval (spa-Latn_eng-Latn)": 61.03, - "XPQARetrieval (fra-Latn_fra-Latn)": 70.85, - "XPQARetrieval (eng-Latn_fra-Latn)": 48.14, - "XPQARetrieval (fra-Latn_eng-Latn)": 66.96, - "XPQARetrieval (hin-Deva_hin-Deva)": 74.75, - "XPQARetrieval (eng-Latn_hin-Deva)": 25.61, - "XPQARetrieval (hin-Deva_eng-Latn)": 63.9, - "XPQARetrieval (ita-Latn_ita-Latn)": 76.53, - "XPQARetrieval (eng-Latn_ita-Latn)": 46.88, - "XPQARetrieval (ita-Latn_eng-Latn)": 71.03, - "XPQARetrieval (jpn-Hira_jpn-Hira)": 72.27, - "XPQARetrieval (eng-Latn_jpn-Hira)": 41.94, - "XPQARetrieval (jpn-Hira_eng-Latn)": 69.42, - "XPQARetrieval (kor-Hang_kor-Hang)": 40.64, - "XPQARetrieval (eng-Latn_kor-Hang)": 32.68, - "XPQARetrieval (kor-Hang_eng-Latn)": 36.0, - "XPQARetrieval (pol-Latn_pol-Latn)": 50.74, - "XPQARetrieval (eng-Latn_pol-Latn)": 33.14, - "XPQARetrieval (pol-Latn_eng-Latn)": 48.06, - "XPQARetrieval (por-Latn_por-Latn)": 49.86, - "XPQARetrieval (eng-Latn_por-Latn)": 33.01, - "XPQARetrieval (por-Latn_eng-Latn)": 48.45, - "XPQARetrieval (tam-Taml_tam-Taml)": 41.78, - "XPQARetrieval (eng-Latn_tam-Taml)": 10.95, - "XPQARetrieval (tam-Taml_eng-Latn)": 21.28, - "XPQARetrieval (cmn-Hans_cmn-Hans)": 65.29, - "XPQARetrieval (eng-Latn_cmn-Hans)": 35.86, - "XPQARetrieval (cmn-Hans_eng-Latn)": 58.12 + "Model": "bm25s", + "ArguAna": 49.28, + "CQADupstackRetrieval": 31.86, + "ClimateFEVER": 13.62, + "DBPedia": 29.91, + "FEVER": 48.09, + "FiQA2018": 25.14, + "HotpotQA": 56.91, + "MSMARCO": 21.89, + "NFCorpus": 32.08, + "NQ": 28.5, + "QuoraRetrieval": 80.42, + "SCIDOCS": 15.78, + "SciFact": 68.7, + "TRECCOVID": 62.31, + "Touche2020": 33.05 } ] }, "STS": { "spearman": [ { - "Model": "GritLM-7B", - "AFQMC (cmn-Hans)": 32.65, - "ATEC (cmn-Hans)": 37.34, - "BIOSSES (default)": 85.01, - "BQ (cmn-Hans)": 38.03, - "CDSC-R (pol-Latn)": 92.23, - "LCQMC (cmn-Hans)": 71.38, - "PAWSX (cmn-Hans)": 16.4, - "SICK-R (default)": 81.47, - "SICK-R-PL (pol-Latn)": 72.78, - "SICKFr (fra-Latn)": 76.91, - "STS12 (default)": 65.84, - "STS13 (default)": 78.37, - "STS14 (default)": 77.52, - "STS15 (default)": 85.43, - "STS16 (default)": 79.94, - "STS17 (ita-Latn_eng-Latn)": 88.42, - "STS17 (fra-Latn_eng-Latn)": 87.9, - "STS17 (kor-Hang)": 78.74, - "STS17 (en-en)": 90.12, - "STS17 (nld-Latn_eng-Latn)": 88.29, - "STS17 (ara-Arab)": 79.28, - "STS17 (eng-Latn_deu-Latn)": 88.92, - "STS17 (spa-Latn)": 87.12, - "STS17 (eng-Latn_tur-Latn)": 77.47, - "STS17 (spa-Latn_eng-Latn)": 87.47, - "STS17 (eng-Latn_ara-Arab)": 74.45, - "STS22 (spa-Latn_eng-Latn)": 80.76, - "STS22 (ara-Arab)": 55.45, - "STS22 (pol-Latn_eng-Latn)": 77.77, - "STS22 (deu-Latn_pol-Latn)": 55.09, - "STS22 (en)": 68.59, - "STS22 (rus-Cyrl)": 68.46, - "STS22 (deu-Latn_eng-Latn)": 62.33, - "STS22 (cmn-Hans)": 72.29, - "STS22 (pol-Latn)": 48.07, - "STS22 (fra-Latn)": 83.09, - "STS22 (cmn-Hans_eng-Latn)": 72.73, - "STS22 (deu-Latn_fra-Latn)": 62.14, - "STS22 (spa-Latn_ita-Latn)": 77.63, - "STS22 (fra-Latn_pol-Latn)": 84.52, - "STS22 (ita-Latn)": 77.58, - "STS22 (spa-Latn)": 72.24, - "STS22 (deu-Latn)": 59.34, - "STS22 (tur-Latn)": 70.83, - "STSB (cmn-Hans)": 74.11, - "STSBenchmark (default)": 83.1, - "STSBenchmarkMultilingualSTS (spa-Latn)": 79.51, - "STSBenchmarkMultilingualSTS (ita-Latn)": 76.24, - "STSBenchmarkMultilingualSTS (por-Latn)": 76.61, - "STSBenchmarkMultilingualSTS (fra-Latn)": 77.48, - "STSBenchmarkMultilingualSTS (deu-Latn)": 77.57, - "STSBenchmarkMultilingualSTS (en)": 83.12, - "STSBenchmarkMultilingualSTS (nld-Latn)": 74.83, - "STSBenchmarkMultilingualSTS (pol-Latn)": 74.67, - "STSBenchmarkMultilingualSTS (cmn-Hans)": 75.27, - "STSBenchmarkMultilingualSTS (rus-Cyrl)": 76.19 + "Model": "bm25s" } ] }, "Summarization": { "spearman": [ { - "Model": "GritLM-7B", - "SummEval (default)": 30.26, - "SummEvalFr (fra-Latn)": 29.97 + "Model": "bm25s" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "GritLM-7B", - "Core17InstructionRetrieval": 2.62, - "News21InstructionRetrieval": -1.01, - "Robust04InstructionRetrieval": -1.68 + "Model": "bm25s" } ] } }, - "gte-Qwen1.5-7B-instruct": { + "all-MiniLM-L6-v2-instruct": { "BitextMining": { "f1": [ { - "Model": "gte-Qwen1.5-7B-instruct" + "Model": "all-MiniLM-L6-v2-instruct" } ] }, "Classification": { "accuracy": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "AmazonCounterfactualClassification (en)": 83.16, - "AmazonPolarityClassification": 96.7, - "AmazonReviewsClassification (en)": 62.17, - "AmazonReviewsClassification (zh)": 52.95, - "Banking77Classification": 81.68, - "EmotionClassification": 54.53, - "IFlyTek": 53.77, - "ImdbClassification": 95.58, - "JDReview": 88.2, - "MTOPDomainClassification (en)": 95.75, - "MTOPIntentClassification (en)": 84.26, - "MassiveIntentClassification (zh-CN)": 76.25, - "MassiveIntentClassification (en)": 78.47, - "MassiveScenarioClassification (en)": 78.19, - "MassiveScenarioClassification (zh-CN)": 77.26, - "MultilingualSentiment": 77.42, - "OnlineShopping": 94.48, - "TNews": 51.24, - "ToxicConversationsClassification": 78.75, - "TweetSentimentExtractionClassification": 66.0, - "Waimai": 88.63 + "Model": "all-MiniLM-L6-v2-instruct" } ] }, "Clustering": { "v_measure": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "ArxivClusteringP2P": 56.4, - "ArxivClusteringS2S": 51.45, - "BiorxivClusteringP2P": 49.01, - "BiorxivClusteringS2S": 45.06, - "CLSClusteringP2P": 47.21, - "CLSClusteringS2S": 45.79, - "MedrxivClusteringP2P": 44.37, - "MedrxivClusteringS2S": 42.0, - "RedditClustering": 73.37, - "RedditClusteringP2P": 72.51, - "StackExchangeClustering": 79.07, - "StackExchangeClusteringP2P": 49.57, - "ThuNewsClusteringP2P": 87.43, - "ThuNewsClusteringS2S": 87.9, - "TwentyNewsgroupsClustering": 51.31 + "Model": "all-MiniLM-L6-v2-instruct" } ] }, "PairClassification": { "ap": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "Cmnli": 91.81, - "Ocnli": 85.22, - "SprintDuplicateQuestions": 95.99, - "TwitterSemEval2015": 79.36, - "TwitterURLCorpus": 86.79 + "Model": "all-MiniLM-L6-v2-instruct" } ] }, "Reranking": { "map": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "AskUbuntuDupQuestions": 66.0, - "CMedQAv1": 86.37, - "CMedQAv2": 87.41, - "MindSmallReranking": 32.71, - "SciDocsRR": 87.89, - "StackOverflowDupQuestions": 53.93, - "T2Reranking": 68.11 + "Model": "all-MiniLM-L6-v2-instruct" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "ArguAna": 62.65, - "BrightRetrieval (stackoverflow)": 19.85, - "BrightRetrieval (earth_science)": 36.22, - "BrightRetrieval (leetcode)": 25.46, - "BrightRetrieval (theoremqa_questions)": 26.97, - "BrightRetrieval (economics)": 17.72, - "BrightRetrieval (robotics)": 13.47, - "BrightRetrieval (pony)": 9.79, - "BrightRetrieval (aops)": 14.36, - "BrightRetrieval (psychology)": 24.61, - "BrightRetrieval (theoremqa_theorems)": 26.66, - "BrightRetrieval (biology)": 30.92, - "BrightRetrieval (sustainable_living)": 14.93, - "CQADupstackRetrieval": 40.64, - "ClimateFEVER": 44.0, - "CmedqaRetrieval": 43.47, - "CovidRetrieval": 80.87, - "DBPedia": 48.04, - "DuRetrieval": 86.01, - "EcomRetrieval": 66.46, - "FEVER": 93.35, - "FiQA2018": 55.31, - "HotpotQA": 72.25, - "MMarcoRetrieval": 73.83, - "MSMARCO": 41.68, - "MedicalRetrieval": 61.33, - "NFCorpus": 38.25, - "NQ": 61.79, - "QuoraRetrieval": 89.61, - "SCIDOCS": 27.69, - "SciFact": 75.31, - "T2Retrieval": 83.58, - "TRECCOVID": 72.72, - "Touche2020": 20.3, - "VideoRetrieval": 69.41 + "Model": "all-MiniLM-L6-v2-instruct", + "ARCChallenge": 9.4, + "AlphaNLI": 15.09, + "HellaSwag": 20.51, + "PIQA": 24.68, + "Quail": 3.46, + "RARbCode": 42.47, + "RARbMath": 62.39, + "SIQA": 1.53, + "SpartQA": 0.57, + "TempReasonL1": 1.05, + "TempReasonL2Fact": 16.57, + "TempReasonL2Pure": 0.49, + "TempReasonL3Fact": 14.01, + "TempReasonL3Pure": 6.27, + "WinoGrande": 20.73 } ] }, "STS": { "spearman": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "AFQMC": 58.47, - "ATEC": 55.46, - "BIOSSES": 81.12, - "BQ": 77.59, - "LCQMC": 76.29, - "PAWSX": 50.22, - "QBQTC": 31.82, - "SICK-R": 79.15, - "STS12": 76.52, - "STS13": 88.63, - "STS14": 83.32, - "STS15": 87.5, - "STS16": 86.39, - "STS17 (en-en)": 87.79, - "STS22 (en)": 66.4, - "STS22 (zh)": 67.36, - "STSB": 81.37, - "STSBenchmark": 87.35 + "Model": "all-MiniLM-L6-v2-instruct" } ] }, "Summarization": { "spearman": [ { - "Model": "gte-Qwen1.5-7B-instruct", - "SummEval": 31.46 + "Model": "all-MiniLM-L6-v2-instruct" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "gte-Qwen1.5-7B-instruct" + "Model": "all-MiniLM-L6-v2-instruct" } ] } }, - "voyage-large-2-instruct": { + "all-mpnet-base-v2": { "BitextMining": { "f1": [ { - "Model": "voyage-large-2-instruct" + "Model": "all-mpnet-base-v2", + "BornholmBitextMining (dan-Latn)": 27.44, + "Tatoeba (pol-Latn_eng-Latn)": 4.09, + "Tatoeba (ita-Latn_eng-Latn)": 11.1, + "Tatoeba (cat-Latn_eng-Latn)": 9.44, + "Tatoeba (aze-Latn_eng-Latn)": 1.49, + "Tatoeba (eus-Latn_eng-Latn)": 3.94, + "Tatoeba (epo-Latn_eng-Latn)": 7.15, + "Tatoeba (lit-Latn_eng-Latn)": 1.02, + "Tatoeba (ast-Latn_eng-Latn)": 9.78, + "Tatoeba (bul-Cyrl_eng-Latn)": 0.35, + "Tatoeba (ceb-Latn_eng-Latn)": 4.41, + "Tatoeba (mkd-Cyrl_eng-Latn)": 0.0, + "Tatoeba (tzl-Latn_eng-Latn)": 3.55, + "Tatoeba (zsm-Latn_eng-Latn)": 4.75, + "Tatoeba (mhr-Cyrl_eng-Latn)": 0.17, + "Tatoeba (pam-Latn_eng-Latn)": 4.32, + "Tatoeba (amh-Ethi_eng-Latn)": 0.0, + "Tatoeba (slv-Latn_eng-Latn)": 3.73, + "Tatoeba (lvs-Latn_eng-Latn)": 2.98, + "Tatoeba (sqi-Latn_eng-Latn)": 3.45, + "Tatoeba (orv-Cyrl_eng-Latn)": 0.0, + "Tatoeba (vie-Latn_eng-Latn)": 4.96, + "Tatoeba (pes-Arab_eng-Latn)": 0.2, + "Tatoeba (por-Latn_eng-Latn)": 10.48, + "Tatoeba (dtp-Latn_eng-Latn)": 3.54, + "Tatoeba (yid-Hebr_eng-Latn)": 0.08, + "Tatoeba (isl-Latn_eng-Latn)": 3.86, + "Tatoeba (cha-Latn_eng-Latn)": 12.2, + "Tatoeba (ron-Latn_eng-Latn)": 7.34, + "Tatoeba (hye-Armn_eng-Latn)": 0.14, + "Tatoeba (mar-Deva_eng-Latn)": 0.11, + "Tatoeba (hin-Deva_eng-Latn)": 0.02, + "Tatoeba (kor-Hang_eng-Latn)": 0.32, + "Tatoeba (srp-Cyrl_eng-Latn)": 1.89, + "Tatoeba (csb-Latn_eng-Latn)": 4.19, + "Tatoeba (jpn-Jpan_eng-Latn)": 1.71, + "Tatoeba (ber-Tfng_eng-Latn)": 4.56, + "Tatoeba (wuu-Hans_eng-Latn)": 0.91, + "Tatoeba (jav-Latn_eng-Latn)": 3.17, + "Tatoeba (nob-Latn_eng-Latn)": 4.37, + "Tatoeba (bre-Latn_eng-Latn)": 3.65, + "Tatoeba (kzj-Latn_eng-Latn)": 3.62, + "Tatoeba (urd-Arab_eng-Latn)": 0.0, + "Tatoeba (ces-Latn_eng-Latn)": 3.56, + "Tatoeba (cbk-Latn_eng-Latn)": 9.33, + "Tatoeba (gla-Latn_eng-Latn)": 2.04, + "Tatoeba (war-Latn_eng-Latn)": 5.14, + "Tatoeba (swh-Latn_eng-Latn)": 6.01, + "Tatoeba (swg-Latn_eng-Latn)": 7.86, + "Tatoeba (glg-Latn_eng-Latn)": 12.0, + "Tatoeba (fao-Latn_eng-Latn)": 7.08, + "Tatoeba (gsw-Latn_eng-Latn)": 10.67, + "Tatoeba (rus-Cyrl_eng-Latn)": 0.14, + "Tatoeba (kaz-Cyrl_eng-Latn)": 0.52, + "Tatoeba (gle-Latn_eng-Latn)": 2.19, + "Tatoeba (slk-Latn_eng-Latn)": 3.4, + "Tatoeba (nno-Latn_eng-Latn)": 5.75, + "Tatoeba (cor-Latn_eng-Latn)": 2.42, + "Tatoeba (nov-Latn_eng-Latn)": 16.61, + "Tatoeba (swe-Latn_eng-Latn)": 6.55, + "Tatoeba (max-Deva_eng-Latn)": 6.46, + "Tatoeba (oci-Latn_eng-Latn)": 8.57, + "Tatoeba (lfn-Latn_eng-Latn)": 6.1, + "Tatoeba (fra-Latn_eng-Latn)": 16.9, + "Tatoeba (ben-Beng_eng-Latn)": 0.0, + "Tatoeba (bel-Cyrl_eng-Latn)": 0.65, + "Tatoeba (lat-Latn_eng-Latn)": 5.78, + "Tatoeba (cmn-Hans_eng-Latn)": 2.22, + "Tatoeba (kat-Geor_eng-Latn)": 0.43, + "Tatoeba (bos-Latn_eng-Latn)": 4.6, + "Tatoeba (xho-Latn_eng-Latn)": 3.3, + "Tatoeba (tha-Thai_eng-Latn)": 0.0, + "Tatoeba (cym-Latn_eng-Latn)": 4.88, + "Tatoeba (deu-Latn_eng-Latn)": 11.46, + "Tatoeba (awa-Deva_eng-Latn)": 0.44, + "Tatoeba (ido-Latn_eng-Latn)": 9.84, + "Tatoeba (tat-Cyrl_eng-Latn)": 0.24, + "Tatoeba (kab-Latn_eng-Latn)": 1.31, + "Tatoeba (uzb-Latn_eng-Latn)": 1.98, + "Tatoeba (heb-Hebr_eng-Latn)": 0.28, + "Tatoeba (ara-Arab_eng-Latn)": 0.1, + "Tatoeba (fry-Latn_eng-Latn)": 12.43, + "Tatoeba (afr-Latn_eng-Latn)": 6.08, + "Tatoeba (kur-Latn_eng-Latn)": 3.65, + "Tatoeba (pms-Latn_eng-Latn)": 7.63, + "Tatoeba (ell-Grek_eng-Latn)": 0.0, + "Tatoeba (spa-Latn_eng-Latn)": 10.12, + "Tatoeba (dsb-Latn_eng-Latn)": 2.96, + "Tatoeba (uig-Arab_eng-Latn)": 0.33, + "Tatoeba (nld-Latn_eng-Latn)": 9.29, + "Tatoeba (tel-Telu_eng-Latn)": 0.73, + "Tatoeba (hrv-Latn_eng-Latn)": 3.77, + "Tatoeba (nds-Latn_eng-Latn)": 10.96, + "Tatoeba (hun-Latn_eng-Latn)": 3.23, + "Tatoeba (est-Latn_eng-Latn)": 2.35, + "Tatoeba (mal-Mlym_eng-Latn)": 0.15, + "Tatoeba (khm-Khmr_eng-Latn)": 0.28, + "Tatoeba (hsb-Latn_eng-Latn)": 3.12, + "Tatoeba (tgl-Latn_eng-Latn)": 4.06, + "Tatoeba (ang-Latn_eng-Latn)": 9.77, + "Tatoeba (tur-Latn_eng-Latn)": 3.16, + "Tatoeba (tuk-Latn_eng-Latn)": 2.23, + "Tatoeba (ile-Latn_eng-Latn)": 17.84, + "Tatoeba (mon-Cyrl_eng-Latn)": 0.81, + "Tatoeba (yue-Hant_eng-Latn)": 1.16, + "Tatoeba (ina-Latn_eng-Latn)": 22.55, + "Tatoeba (tam-Taml_eng-Latn)": 0.73, + "Tatoeba (ukr-Cyrl_eng-Latn)": 0.5, + "Tatoeba (dan-Latn_eng-Latn)": 10.01, + "Tatoeba (arq-Arab_eng-Latn)": 0.33, + "Tatoeba (arz-Arab_eng-Latn)": 0.0, + "Tatoeba (fin-Latn_eng-Latn)": 3.82, + "Tatoeba (ind-Latn_eng-Latn)": 4.88 } ] }, "Classification": { "accuracy": [ { - "Model": "voyage-large-2-instruct", - "AmazonCounterfactualClassification (en)": 77.6, - "AmazonPolarityClassification": 96.58, - "AmazonReviewsClassification (en)": 50.77, - "Banking77Classification": 86.96, - "EmotionClassification": 59.81, - "ImdbClassification": 96.13, - "MTOPDomainClassification (en)": 98.86, - "MTOPIntentClassification (en)": 86.97, - "MassiveIntentClassification (en)": 81.08, - "MassiveScenarioClassification (en)": 87.95, - "ToxicConversationsClassification": 83.58, - "TweetSentimentExtractionClassification": 71.55 + "Model": "all-mpnet-base-v2", + "AllegroReviews (pol-Latn)": 22.99, + "AmazonCounterfactualClassification (en-ext)": 67.5, + "AmazonCounterfactualClassification (en)": 65.03, + "AmazonCounterfactualClassification (deu-Latn)": 55.66, + "AmazonCounterfactualClassification (jpn-Jpan)": 60.69, + "AmazonPolarityClassification": 67.14, + "AmazonReviewsClassification (en)": 31.44, + "AmazonReviewsClassification (deu-Latn)": 26.05, + "AmazonReviewsClassification (spa-Latn)": 27.73, + "AmazonReviewsClassification (fra-Latn)": 28.49, + "AmazonReviewsClassification (jpn-Jpan)": 23.65, + "AmazonReviewsClassification (cmn-Hans)": 23.62, + "AngryTweetsClassification (dan-Latn)": 44.13, + "Banking77Classification": 81.7, + "CBD (pol-Latn)": 50.25, + "DanishPoliticalCommentsClassification (dan-Latn)": 28.31, + "EmotionClassification": 42.22, + "IFlyTek (cmn-Hans)": 17.18, + "ImdbClassification": 71.17, + "JDReview (cmn-Hans)": 60.19, + "LccSentimentClassification (dan-Latn)": 39.27, + "MTOPDomainClassification (en)": 91.89, + "MTOPDomainClassification (deu-Latn)": 71.86, + "MTOPDomainClassification (spa-Latn)": 71.3, + "MTOPDomainClassification (fra-Latn)": 74.88, + "MTOPDomainClassification (hin-Deva)": 39.93, + "MTOPDomainClassification (tha-Thai)": 17.54, + "MTOPIntentClassification (en)": 68.27, + "MTOPIntentClassification (deu-Latn)": 44.36, + "MTOPIntentClassification (spa-Latn)": 39.48, + "MTOPIntentClassification (fra-Latn)": 37.57, + "MTOPIntentClassification (hin-Deva)": 18.63, + "MTOPIntentClassification (tha-Thai)": 5.42, + "MasakhaNEWSClassification (amh-Ethi)": 36.49, + "MasakhaNEWSClassification (eng)": 79.75, + "MasakhaNEWSClassification (fra-Latn)": 77.77, + "MasakhaNEWSClassification (hau-Latn)": 59.22, + "MasakhaNEWSClassification (ibo-Latn)": 61.64, + "MasakhaNEWSClassification (lin-Latn)": 74.0, + "MasakhaNEWSClassification (lug-Latn)": 58.43, + "MasakhaNEWSClassification (orm-Ethi)": 48.15, + "MasakhaNEWSClassification (pcm-Latn)": 92.2, + "MasakhaNEWSClassification (run-Latn)": 64.72, + "MasakhaNEWSClassification (sna-Latn)": 73.69, + "MasakhaNEWSClassification (som-Latn)": 49.97, + "MasakhaNEWSClassification (swa-Latn)": 55.15, + "MasakhaNEWSClassification (tir-Ethi)": 27.46, + "MasakhaNEWSClassification (xho-Latn)": 60.98, + "MasakhaNEWSClassification (yor-Latn)": 63.33, + "MassiveIntentClassification (en)": 69.76, + "MassiveIntentClassification (jav-Latn)": 31.75, + "MassiveIntentClassification (fra-Latn)": 44.27, + "MassiveIntentClassification (msa-Latn)": 30.53, + "MassiveIntentClassification (hun-Latn)": 34.38, + "MassiveIntentClassification (pol-Latn)": 34.26, + "MassiveIntentClassification (nld-Latn)": 38.49, + "MassiveIntentClassification (tha-Thai)": 8.51, + "MassiveIntentClassification (tur-Latn)": 32.02, + "MassiveIntentClassification (tam-Taml)": 9.25, + "MassiveIntentClassification (hye-Armn)": 10.11, + "MassiveIntentClassification (khm-Khmr)": 4.74, + "MassiveIntentClassification (lav-Latn)": 35.08, + "MassiveIntentClassification (deu-Latn)": 44.54, + "MassiveIntentClassification (spa-Latn)": 39.75, + "MassiveIntentClassification (ben-Beng)": 12.35, + "MassiveIntentClassification (por-Latn)": 42.83, + "MassiveIntentClassification (ara-Arab)": 20.42, + "MassiveIntentClassification (cym-Latn)": 30.82, + "MassiveIntentClassification (dan-Latn)": 42.36, + "MassiveIntentClassification (mya-Mymr)": 4.6, + "MassiveIntentClassification (heb-Hebr)": 23.6, + "MassiveIntentClassification (kan-Knda)": 3.76, + "MassiveIntentClassification (swa-Latn)": 31.82, + "MassiveIntentClassification (fas-Arab)": 22.45, + "MassiveIntentClassification (hin-Deva)": 17.68, + "MassiveIntentClassification (kat-Geor)": 7.66, + "MassiveIntentClassification (mal-Mlym)": 2.64, + "MassiveIntentClassification (fin-Latn)": 34.58, + "MassiveIntentClassification (slv-Latn)": 34.49, + "MassiveIntentClassification (afr-Latn)": 36.49, + "MassiveIntentClassification (urd-Arab)": 12.86, + "MassiveIntentClassification (ron-Latn)": 38.07, + "MassiveIntentClassification (sqi-Latn)": 37.26, + "MassiveIntentClassification (cmo-Hant)": 22.43, + "MassiveIntentClassification (ita-Latn)": 40.29, + "MassiveIntentClassification (ind-Latn)": 36.31, + "MassiveIntentClassification (nob-Latn)": 39.3, + "MassiveIntentClassification (jpn-Jpan)": 33.13, + "MassiveIntentClassification (aze-Latn)": 28.92, + "MassiveIntentClassification (mon-Cyrl)": 19.65, + "MassiveIntentClassification (ell-Grek)": 24.52, + "MassiveIntentClassification (rus-Cyrl)": 23.98, + "MassiveIntentClassification (kor-Kore)": 13.35, + "MassiveIntentClassification (cmo-Hans)": 24.36, + "MassiveIntentClassification (isl-Latn)": 31.46, + "MassiveIntentClassification (swe-Latn)": 39.02, + "MassiveIntentClassification (tel-Telu)": 2.26, + "MassiveIntentClassification (vie-Latn)": 31.47, + "MassiveIntentClassification (tgl-Latn)": 36.33, + "MassiveIntentClassification (amh-Ethi)": 2.39, + "MassiveScenarioClassification (en)": 75.67, + "MassiveScenarioClassification (tur-Latn)": 39.11, + "MassiveScenarioClassification (kat-Geor)": 13.45, + "MassiveScenarioClassification (jpn-Jpan)": 40.57, + "MassiveScenarioClassification (spa-Latn)": 50.92, + "MassiveScenarioClassification (fas-Arab)": 27.8, + "MassiveScenarioClassification (hun-Latn)": 41.01, + "MassiveScenarioClassification (jav-Latn)": 40.0, + "MassiveScenarioClassification (por-Latn)": 52.06, + "MassiveScenarioClassification (sqi-Latn)": 44.67, + "MassiveScenarioClassification (lav-Latn)": 39.28, + "MassiveScenarioClassification (deu-Latn)": 54.09, + "MassiveScenarioClassification (nld-Latn)": 47.79, + "MassiveScenarioClassification (mon-Cyrl)": 25.58, + "MassiveScenarioClassification (swa-Latn)": 40.34, + "MassiveScenarioClassification (ben-Beng)": 17.49, + "MassiveScenarioClassification (cym-Latn)": 34.82, + "MassiveScenarioClassification (swe-Latn)": 44.53, + "MassiveScenarioClassification (rus-Cyrl)": 28.71, + "MassiveScenarioClassification (fra-Latn)": 54.26, + "MassiveScenarioClassification (dan-Latn)": 49.45, + "MassiveScenarioClassification (mya-Mymr)": 10.8, + "MassiveScenarioClassification (ron-Latn)": 47.86, + "MassiveScenarioClassification (cmo-Hans)": 35.33, + "MassiveScenarioClassification (hin-Deva)": 23.13, + "MassiveScenarioClassification (cmo-Hant)": 31.7, + "MassiveScenarioClassification (afr-Latn)": 43.63, + "MassiveScenarioClassification (aze-Latn)": 36.42, + "MassiveScenarioClassification (msa-Latn)": 37.28, + "MassiveScenarioClassification (ell-Grek)": 33.85, + "MassiveScenarioClassification (isl-Latn)": 39.36, + "MassiveScenarioClassification (fin-Latn)": 38.41, + "MassiveScenarioClassification (ind-Latn)": 43.05, + "MassiveScenarioClassification (pol-Latn)": 42.66, + "MassiveScenarioClassification (tam-Taml)": 14.55, + "MassiveScenarioClassification (ita-Latn)": 51.37, + "MassiveScenarioClassification (urd-Arab)": 20.0, + "MassiveScenarioClassification (kan-Knda)": 8.34, + "MassiveScenarioClassification (tel-Telu)": 7.81, + "MassiveScenarioClassification (mal-Mlym)": 7.69, + "MassiveScenarioClassification (ara-Arab)": 27.8, + "MassiveScenarioClassification (kor-Kore)": 17.28, + "MassiveScenarioClassification (vie-Latn)": 35.9, + "MassiveScenarioClassification (amh-Ethi)": 7.43, + "MassiveScenarioClassification (heb-Hebr)": 25.49, + "MassiveScenarioClassification (hye-Armn)": 16.86, + "MassiveScenarioClassification (khm-Khmr)": 9.63, + "MassiveScenarioClassification (slv-Latn)": 39.88, + "MassiveScenarioClassification (tgl-Latn)": 47.04, + "MassiveScenarioClassification (nob-Latn)": 45.75, + "MassiveScenarioClassification (tha-Thai)": 17.01, + "MultilingualSentiment (cmn-Hans)": 41.2, + "NoRecClassification (nob-Latn)": 38.34, + "NordicLangClassification (nob-Latn_nno-Latn_dan-Latn_swe-Latn_isl-Latn_fao-Latn)": 50.15, + "OnlineShopping (cmn-Hans)": 56.94, + "PAC (pol-Latn)": 62.1, + "PolEmo2.0-IN (pol-Latn)": 41.63, + "PolEmo2.0-OUT (pol-Latn)": 25.0, + "TNews (cmn-Hans)": 21.05, + "ToxicConversationsClassification": 61.05, + "TweetSentimentExtractionClassification": 55.05, + "Waimai (cmn-Hans)": 63.31 } ] }, "Clustering": { "v_measure": [ { - "Model": "voyage-large-2-instruct", - "ArxivClusteringP2P": 51.81, - "ArxivClusteringS2S": 44.73, - "BiorxivClusteringP2P": 46.07, - "BiorxivClusteringS2S": 40.64, - "MedrxivClusteringP2P": 42.94, - "MedrxivClusteringS2S": 41.44, - "RedditClustering": 68.5, - "RedditClusteringP2P": 64.86, - "StackExchangeClustering": 74.16, - "StackExchangeClusteringP2P": 45.1, - "TwentyNewsgroupsClustering": 66.62 + "Model": "all-mpnet-base-v2", + "ArxivClusteringP2P": 48.38, + "ArxivClusteringS2S": 39.72, + "BiorxivClusteringP2P": 39.62, + "BiorxivClusteringS2S": 35.02, + "MasakhaNEWSClusteringP2P (amh-Ethi)": 42.49, + "MasakhaNEWSClusteringP2P (eng)": 67.24, + "MasakhaNEWSClusteringP2P (fra-Latn)": 61.99, + "MasakhaNEWSClusteringP2P (hau-Latn)": 37.17, + "MasakhaNEWSClusteringP2P (ibo-Latn)": 52.0, + "MasakhaNEWSClusteringP2P (lin-Latn)": 69.68, + "MasakhaNEWSClusteringP2P (lug-Latn)": 50.96, + "MasakhaNEWSClusteringP2P (orm-Ethi)": 28.42, + "MasakhaNEWSClusteringP2P (pcm-Latn)": 64.01, + "MasakhaNEWSClusteringP2P (run-Latn)": 57.6, + "MasakhaNEWSClusteringP2P (sna-Latn)": 54.99, + "MasakhaNEWSClusteringP2P (som-Latn)": 31.16, + "MasakhaNEWSClusteringP2P (swa-Latn)": 28.29, + "MasakhaNEWSClusteringP2P (tir-Ethi)": 41.85, + "MasakhaNEWSClusteringP2P (xho-Latn)": 35.24, + "MasakhaNEWSClusteringP2P (yor-Latn)": 42.15, + "MasakhaNEWSClusteringS2S (amh-Ethi)": 44.48, + "MasakhaNEWSClusteringS2S (eng)": 35.69, + "MasakhaNEWSClusteringS2S (fra-Latn)": 41.05, + "MasakhaNEWSClusteringS2S (hau-Latn)": 16.64, + "MasakhaNEWSClusteringS2S (ibo-Latn)": 38.63, + "MasakhaNEWSClusteringS2S (lin-Latn)": 70.72, + "MasakhaNEWSClusteringS2S (lug-Latn)": 46.97, + "MasakhaNEWSClusteringS2S (orm-Ethi)": 23.85, + "MasakhaNEWSClusteringS2S (pcm-Latn)": 68.7, + "MasakhaNEWSClusteringS2S (run-Latn)": 52.27, + "MasakhaNEWSClusteringS2S (sna-Latn)": 47.64, + "MasakhaNEWSClusteringS2S (som-Latn)": 30.94, + "MasakhaNEWSClusteringS2S (swa-Latn)": 17.12, + "MasakhaNEWSClusteringS2S (tir-Ethi)": 42.01, + "MasakhaNEWSClusteringS2S (xho-Latn)": 24.16, + "MasakhaNEWSClusteringS2S (yor-Latn)": 35.04, + "MedrxivClusteringP2P": 35.58, + "MedrxivClusteringS2S": 32.87, + "RedditClustering": 54.82, + "RedditClusteringP2P": 56.77, + "StackExchangeClustering": 53.8, + "StackExchangeClusteringP2P": 34.28, + "TwentyNewsgroupsClustering": 49.74 } ] }, "PairClassification": { "ap": [ { - "Model": "voyage-large-2-instruct", - "SprintDuplicateQuestions": 94.5, - "TwitterSemEval2015": 86.32, - "TwitterURLCorpus": 86.9 + "Model": "all-mpnet-base-v2", + "CDSC-E (pol-Latn)": 45.37, + "OpusparcusPC (deu-Latn)": 89.78, + "OpusparcusPC (en)": 97.75, + "OpusparcusPC (fin-Latn)": 85.82, + "OpusparcusPC (fra-Latn)": 86.61, + "OpusparcusPC (rus-Cyrl)": 79.85, + "OpusparcusPC (swe-Latn)": 81.81, + "PSC (pol-Latn)": 83.28, + "PawsXPairClassification (deu-Latn)": 52.17, + "PawsXPairClassification (en)": 61.99, + "PawsXPairClassification (spa-Latn)": 55.06, + "PawsXPairClassification (fra-Latn)": 56.42, + "PawsXPairClassification (jpn-Hira)": 47.43, + "PawsXPairClassification (kor-Hang)": 49.75, + "PawsXPairClassification (cmn-Hans)": 52.47, + "SICK-E-PL (pol-Latn)": 46.51, + "SprintDuplicateQuestions": 90.15, + "TwitterSemEval2015": 73.85, + "TwitterURLCorpus": 85.11 } ] }, "Reranking": { "map": [ { - "Model": "voyage-large-2-instruct", - "AskUbuntuDupQuestions": 64.92, + "Model": "all-mpnet-base-v2", + "AlloprofReranking (fra-Latn)": 69.63, + "AskUbuntuDupQuestions": 65.85, + "MMarcoReranking (cmn-Hans)": 4.65, "MindSmallReranking": 30.97, - "SciDocsRR": 89.34, - "StackOverflowDupQuestions": 55.11 + "SciDocsRR": 88.65, + "StackOverflowDupQuestions": 51.98, + "SyntecReranking (fra-Latn)": 66.12, + "T2Reranking (cmn-Hans)": 58.3 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "voyage-large-2-instruct", - "ArguAna": 64.06, - "BrightRetrieval (theoremqa_questions)": 26.06, - "BrightRetrieval (earth_science)": 25.09, - "BrightRetrieval (leetcode)": 30.6, - "BrightRetrieval (economics)": 19.85, - "BrightRetrieval (robotics)": 11.21, - "BrightRetrieval (psychology)": 24.79, - "BrightRetrieval (aops)": 7.45, - "BrightRetrieval (sustainable_living)": 15.58, - "BrightRetrieval (pony)": 1.48, - "BrightRetrieval (theoremqa_theorems)": 10.13, - "BrightRetrieval (biology)": 23.55, - "BrightRetrieval (stackoverflow)": 15.03, - "CQADupstackRetrieval": 46.6, - "ClimateFEVER": 32.65, - "DBPedia": 46.03, - "FEVER": 91.47, - "FiQA2018": 59.76, - "HotpotQA": 70.86, - "MSMARCO": 40.6, - "NFCorpus": 40.32, - "NQ": 65.92, - "QuoraRetrieval": 87.4, - "SCIDOCS": 24.32, - "SciFact": 79.99, - "TRECCOVID": 85.07, - "Touche2020": 39.16 + "Model": "all-mpnet-base-v2", + "AILACasedocs": 22.51, + "AILAStatutes": 21.27, + "ARCChallenge": 11.8, + "AlloprofRetrieval (fra-Latn)": 34.27, + "AlphaNLI": 22.41, + "ArguAna": 46.52, + "ArguAna-PL (pol-Latn)": 14.72, + "BSARDRetrieval (fra-Latn)": 6.98, + "BrightRetrieval (robotics)": 8.36, + "BrightRetrieval (psychology)": 22.63, + "BrightRetrieval (leetcode)": 26.4, + "BrightRetrieval (biology)": 15.52, + "BrightRetrieval (theoremqa_questions)": 18.49, + "BrightRetrieval (economics)": 16.64, + "BrightRetrieval (stackoverflow)": 9.48, + "BrightRetrieval (pony)": 6.95, + "BrightRetrieval (earth_science)": 20.11, + "BrightRetrieval (theoremqa_theorems)": 12.38, + "BrightRetrieval (sustainable_living)": 15.34, + "BrightRetrieval (aops)": 5.32, + "CQADupstackRetrieval": 44.96, + "ClimateFEVER": 21.97, + "CmedqaRetrieval (cmn-Hans)": 2.0, + "CovidRetrieval (cmn-Hans)": 3.7, + "DBPedia": 32.09, + "DuRetrieval (cmn-Hans)": 4.92, + "EcomRetrieval (cmn-Hans)": 3.94, + "FEVER": 50.86, + "FiQA-PL (pol-Latn)": 3.6, + "FiQA2018": 49.96, + "GerDaLIRSmall (deu-Latn)": 3.78, + "HellaSwag": 26.27, + "HotpotQA": 39.29, + "LEMBNarrativeQARetrieval": 19.34, + "LEMBNeedleRetrieval": 16.0, + "LEMBPasskeyRetrieval": 24.5, + "LEMBQMSumRetrieval": 21.54, + "LEMBSummScreenFDRetrieval": 60.43, + "LEMBWikimQARetrieval": 44.92, + "LeCaRDv2 (zho-Hans)": 18.09, + "LegalBenchConsumerContractsQA": 75.25, + "LegalBenchCorporateLobbying": 89.04, + "LegalQuAD (deu-Latn)": 10.67, + "LegalSummarization": 58.55, + "MMarcoRetrieval (cmn-Hans)": 7.13, + "MSMARCO": 39.75, + "MedicalRetrieval (cmn-Hans)": 1.71, + "MintakaRetrieval (ara-Arab)": 1.97, + "MintakaRetrieval (deu-Latn)": 17.21, + "MintakaRetrieval (spa-Latn)": 10.11, + "MintakaRetrieval (fra-Latn)": 12.93, + "MintakaRetrieval (hin-Deva)": 2.05, + "MintakaRetrieval (ita-Latn)": 5.63, + "MintakaRetrieval (jpn-Hira)": 6.72, + "MintakaRetrieval (por-Latn)": 8.05, + "NFCorpus": 33.29, + "NFCorpus-PL (pol-Latn)": 8.77, + "NQ": 50.45, + "PIQA": 29.03, + "Quail": 3.41, + "QuoraRetrieval": 87.46, + "RARbCode": 53.21, + "RARbMath": 71.85, + "SCIDOCS": 23.76, + "SCIDOCS-PL (pol-Latn)": 4.02, + "SIQA": 2.38, + "SciFact": 65.57, + "SciFact-PL (pol-Latn)": 13.31, + "SpartQA": 0.22, + "SyntecRetrieval (fra-Latn)": 57.39, + "T2Retrieval (cmn-Hans)": 2.98, + "TRECCOVID": 51.33, + "TRECCOVID-PL (pol-Latn)": 12.12, + "TempReasonL1": 1.77, + "TempReasonL2Fact": 11.2, + "TempReasonL2Pure": 1.15, + "TempReasonL3Fact": 9.42, + "TempReasonL3Pure": 5.59, + "Touche2020": 19.93, + "VideoRetrieval (cmn-Hans)": 8.48, + "WinoGrande": 20.8, + "XPQARetrieval (ara-Arab_ara-Arab)": 9.42, + "XPQARetrieval (eng-Latn_ara-Arab)": 2.39, + "XPQARetrieval (ara-Arab_eng-Latn)": 8.98, + "XPQARetrieval (deu-Latn_deu-Latn)": 55.82, + "XPQARetrieval (eng-Latn_deu-Latn)": 11.74, + "XPQARetrieval (deu-Latn_eng-Latn)": 30.44, + "XPQARetrieval (spa-Latn_spa-Latn)": 40.01, + "XPQARetrieval (eng-Latn_spa-Latn)": 6.12, + "XPQARetrieval (spa-Latn_eng-Latn)": 29.44, + "XPQARetrieval (fra-Latn_fra-Latn)": 51.94, + "XPQARetrieval (eng-Latn_fra-Latn)": 11.48, + "XPQARetrieval (fra-Latn_eng-Latn)": 32.52, + "XPQARetrieval (hin-Deva_hin-Deva)": 37.48, + "XPQARetrieval (eng-Latn_hin-Deva)": 5.11, + "XPQARetrieval (hin-Deva_eng-Latn)": 7.37, + "XPQARetrieval (ita-Latn_ita-Latn)": 54.2, + "XPQARetrieval (eng-Latn_ita-Latn)": 6.08, + "XPQARetrieval (ita-Latn_eng-Latn)": 30.32, + "XPQARetrieval (jpn-Hira_jpn-Hira)": 37.45, + "XPQARetrieval (eng-Latn_jpn-Hira)": 5.79, + "XPQARetrieval (jpn-Hira_eng-Latn)": 14.77, + "XPQARetrieval (kor-Hang_kor-Hang)": 10.4, + "XPQARetrieval (eng-Latn_kor-Hang)": 7.09, + "XPQARetrieval (kor-Hang_eng-Latn)": 6.95, + "XPQARetrieval (pol-Latn_pol-Latn)": 23.67, + "XPQARetrieval (eng-Latn_pol-Latn)": 8.83, + "XPQARetrieval (pol-Latn_eng-Latn)": 15.94, + "XPQARetrieval (por-Latn_por-Latn)": 33.56, + "XPQARetrieval (eng-Latn_por-Latn)": 3.76, + "XPQARetrieval (por-Latn_eng-Latn)": 23.45, + "XPQARetrieval (tam-Taml_tam-Taml)": 5.53, + "XPQARetrieval (eng-Latn_tam-Taml)": 3.3, + "XPQARetrieval (tam-Taml_eng-Latn)": 4.0, + "XPQARetrieval (cmn-Hans_cmn-Hans)": 23.84, + "XPQARetrieval (eng-Latn_cmn-Hans)": 7.2, + "XPQARetrieval (cmn-Hans_eng-Latn)": 12.84 } ] }, "STS": { "spearman": [ { - "Model": "voyage-large-2-instruct", - "BIOSSES": 89.12, - "BIOSSES (default)": 89.24, - "SICK-R": 83.16, - "STS12": 76.15, - "STS12 (default)": 73.34, - "STS13": 88.49, - "STS14": 86.49, - "STS15": 91.13, - "STS16": 85.68, - "STS17 (en-en)": 90.06, - "STS22 (en)": 66.32, - "STSBenchmark": 89.22 + "Model": "all-mpnet-base-v2", + "AFQMC (cmn-Hans)": 8.01, + "ATEC (cmn-Hans)": 14.03, + "BIOSSES": 80.43, + "BQ (cmn-Hans)": 21.39, + "CDSC-R (pol-Latn)": 77.04, + "LCQMC (cmn-Hans)": 22.84, + "PAWSX (cmn-Hans)": 6.44, + "SICK-R": 80.59, + "SICK-R-PL (pol-Latn)": 50.2, + "SICKFr (fra-Latn)": 67.05, + "STS12": 72.63, + "STS13": 83.48, + "STS14": 78.0, + "STS15": 85.66, + "STS16": 80.03, + "STS17 (en-en)": 90.6, + "STS17 (eng-Latn_ara-Arab)": 6.76, + "STS17 (fra-Latn_eng-Latn)": 41.64, + "STS17 (eng-Latn_tur-Latn)": -4.58, + "STS17 (eng-Latn_deu-Latn)": 35.5, + "STS17 (spa-Latn_eng-Latn)": 25.28, + "STS17 (ita-Latn_eng-Latn)": 31.8, + "STS17 (spa-Latn)": 78.4, + "STS17 (kor-Hang)": 39.11, + "STS17 (ara-Arab)": 55.42, + "STS17 (nld-Latn_eng-Latn)": 32.89, + "STS22 (en)": 68.39, + "STS22 (spa-Latn_eng-Latn)": 55.09, + "STS22 (deu-Latn_pol-Latn)": 23.53, + "STS22 (cmn-Hans_eng-Latn)": 40.47, + "STS22 (pol-Latn)": 24.21, + "STS22 (tur-Latn)": 29.35, + "STS22 (spa-Latn_ita-Latn)": 41.61, + "STS22 (fra-Latn_pol-Latn)": 73.25, + "STS22 (rus-Cyrl)": 15.83, + "STS22 (deu-Latn)": 27.0, + "STS22 (spa-Latn)": 55.98, + "STS22 (pol-Latn_eng-Latn)": 51.07, + "STS22 (fra-Latn)": 77.1, + "STS22 (deu-Latn_eng-Latn)": 49.73, + "STS22 (ara-Arab)": 38.96, + "STS22 (deu-Latn_fra-Latn)": 31.39, + "STS22 (ita-Latn)": 58.02, + "STS22 (cmn-Hans)": 42.24, + "STSB (cmn-Hans)": 37.7, + "STSBenchmark": 83.42, + "STSBenchmarkMultilingualSTS (nld-Latn)": 57.01, + "STSBenchmarkMultilingualSTS (rus-Cyrl)": 55.54, + "STSBenchmarkMultilingualSTS (fra-Latn)": 65.15, + "STSBenchmarkMultilingualSTS (ita-Latn)": 62.72, + "STSBenchmarkMultilingualSTS (spa-Latn)": 65.78, + "STSBenchmarkMultilingualSTS (en)": 83.42, + "STSBenchmarkMultilingualSTS (deu-Latn)": 61.43, + "STSBenchmarkMultilingualSTS (por-Latn)": 62.12, + "STSBenchmarkMultilingualSTS (cmn-Hans)": 39.43, + "STSBenchmarkMultilingualSTS (pol-Latn)": 52.36 } ] }, "Summarization": { "spearman": [ { - "Model": "voyage-large-2-instruct", - "SummEval": 30.84 + "Model": "all-mpnet-base-v2", + "SummEval": 27.49, + "SummEvalFr (fra-Latn)": 28.11 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "voyage-large-2-instruct" + "Model": "all-mpnet-base-v2" } ] } }, - "e5-mistral-7b-instruct": { + "bge-base-en-v1.5-instruct": { "BitextMining": { "f1": [ { - "Model": "e5-mistral-7b-instruct" + "Model": "bge-base-en-v1.5-instruct" } ] }, "Classification": { "accuracy": [ { - "Model": "e5-mistral-7b-instruct", - "AmazonReviewsClassification (fr)": 36.71, - "MTOPDomainClassification (fr)": 74.8, - "MTOPIntentClassification (fr)": 53.97, - "MasakhaNEWSClassification (fra)": 80.59, - "MassiveIntentClassification (fr)": 46.39, - "MassiveScenarioClassification (fr)": 53.86 + "Model": "bge-base-en-v1.5-instruct" } ] }, "Clustering": { "v_measure": [ { - "Model": "e5-mistral-7b-instruct", - "AlloProfClusteringP2P": 61.06, - "AlloProfClusteringS2S": 28.12, - "HALClusteringS2S": 19.69, - "MLSUMClusteringP2P": 45.59, - "MLSUMClusteringS2S": 32.0, - "MasakhaNEWSClusteringP2P (fra)": 52.47, - "MasakhaNEWSClusteringS2S (fra)": 49.2 + "Model": "bge-base-en-v1.5-instruct" } ] }, "PairClassification": { "ap": [ { - "Model": "e5-mistral-7b-instruct", - "OpusparcusPC (fr)": 88.5, - "PawsXPairClassification (fr)": 63.65 + "Model": "bge-base-en-v1.5-instruct" } ] }, "Reranking": { "map": [ { - "Model": "e5-mistral-7b-instruct", - "AlloprofReranking": 47.36, - "SyntecReranking": 77.05 + "Model": "bge-base-en-v1.5-instruct" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "e5-mistral-7b-instruct", - "AILACasedocs": 38.76, - "AILAStatutes": 38.07, - "AlloprofRetrieval": 16.46, - "BSARDRetrieval": 0.0, - "BrightRetrieval (sustainable_living)": 18.51, - "BrightRetrieval (economics)": 15.49, - "BrightRetrieval (theoremqa_theorems)": 23.78, - "BrightRetrieval (aops)": 7.1, - "BrightRetrieval (theoremqa_questions)": 23.94, - "BrightRetrieval (stackoverflow)": 9.83, - "BrightRetrieval (psychology)": 15.79, - "BrightRetrieval (pony)": 4.81, - "BrightRetrieval (leetcode)": 28.72, - "BrightRetrieval (biology)": 18.84, - "BrightRetrieval (earth_science)": 25.96, - "BrightRetrieval (robotics)": 16.37, - "GerDaLIRSmall": 37.18, - "LEMBNarrativeQARetrieval": 44.62, - "LEMBNeedleRetrieval": 48.25, - "LEMBPasskeyRetrieval": 71.0, - "LEMBQMSumRetrieval": 43.63, - "LEMBSummScreenFDRetrieval": 96.82, - "LEMBWikimQARetrieval": 82.11, - "LeCaRDv2": 68.56, - "LegalBenchConsumerContractsQA": 75.46, - "LegalBenchCorporateLobbying": 94.01, - "LegalQuAD": 59.64, - "LegalSummarization": 66.51, - "MintakaRetrieval (fr)": 3.57, - "SyntecRetrieval": 55.9, - "XPQARetrieval (fr)": 41.29 + "Model": "bge-base-en-v1.5-instruct", + "ARCChallenge": 8.85, + "AlphaNLI": 4.13, + "HellaSwag": 24.03, + "PIQA": 23.03, + "Quail": 1.25, + "RARbCode": 46.32, + "RARbMath": 45.62, + "SIQA": 0.24, + "SpartQA": 2.67, + "TempReasonL1": 0.8, + "TempReasonL2Fact": 16.56, + "TempReasonL2Pure": 1.33, + "TempReasonL3Fact": 12.68, + "TempReasonL3Pure": 5.08, + "WinoGrande": 10.27 } ] }, "STS": { "spearman": [ { - "Model": "e5-mistral-7b-instruct", - "SICKFr": 64.39, - "STS22 (fr)": 69.82, - "STSBenchmarkMultilingualSTS (fr)": 61.87 + "Model": "bge-base-en-v1.5-instruct" } ] }, "Summarization": { "spearman": [ { - "Model": "e5-mistral-7b-instruct", - "SummEvalFr": 32.22 + "Model": "bge-base-en-v1.5-instruct" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "e5-mistral-7b-instruct", - "Core17InstructionRetrieval": 0.09, - "News21InstructionRetrieval": -0.86, - "Robust04InstructionRetrieval": -9.59 + "Model": "bge-base-en-v1.5-instruct" } ] } }, - "bm25": { + "bge-small-en-v1.5-instruct": { "BitextMining": { "f1": [ { - "Model": "bm25" + "Model": "bge-small-en-v1.5-instruct" } ] }, "Classification": { "accuracy": [ { - "Model": "bm25" + "Model": "bge-small-en-v1.5-instruct" } ] }, "Clustering": { "v_measure": [ { - "Model": "bm25" + "Model": "bge-small-en-v1.5-instruct" } ] }, "PairClassification": { "ap": [ { - "Model": "bm25" + "Model": "bge-small-en-v1.5-instruct" } ] }, "Reranking": { "map": [ { - "Model": "bm25" + "Model": "bge-small-en-v1.5-instruct" } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "bm25", - "BrightRetrieval (robotics)": 13.53, - "BrightRetrieval (pony)": 7.93, - "BrightRetrieval (leetcode)": 24.37, - "BrightRetrieval (earth_science)": 27.06, - "BrightRetrieval (stackoverflow)": 16.55, - "BrightRetrieval (economics)": 14.87, - "BrightRetrieval (theoremqa_questions)": 9.78, - "BrightRetrieval (theoremqa_theorems)": 4.25, - "BrightRetrieval (psychology)": 12.51, - "BrightRetrieval (sustainable_living)": 15.22, - "BrightRetrieval (biology)": 19.19, - "BrightRetrieval (aops)": 6.2 + "Model": "bge-small-en-v1.5-instruct", + "ARCChallenge": 7.72, + "AlphaNLI": 1.26, + "HellaSwag": 23.41, + "PIQA": 20.79, + "Quail": 2.01, + "RARbCode": 41.52, + "RARbMath": 46.5, + "SIQA": 0.98, + "SpartQA": 2.86, + "TempReasonL1": 1.27, + "TempReasonL2Fact": 16.72, + "TempReasonL2Pure": 1.1, + "TempReasonL3Fact": 12.81, + "TempReasonL3Pure": 4.63, + "WinoGrande": 5.35 } ] }, "STS": { "spearman": [ { - "Model": "bm25" + "Model": "bge-small-en-v1.5-instruct" } ] }, "Summarization": { "spearman": [ { - "Model": "bm25" + "Model": "bge-small-en-v1.5-instruct" } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "bm25", - "Core17InstructionRetrieval": -1.06, - "News21InstructionRetrieval": -2.15, - "Robust04InstructionRetrieval": -3.06 + "Model": "bge-small-en-v1.5-instruct" } ] } }, - "text-embedding-3-large": { + "e5-mistral-7b-instruct": { "BitextMining": { "f1": [ { - "Model": "text-embedding-3-large" + "Model": "e5-mistral-7b-instruct" } ] }, "Classification": { "accuracy": [ { - "Model": "text-embedding-3-large", - "AmazonCounterfactualClassification (en)": 78.93, - "AmazonPolarityClassification": 92.85, - "AmazonReviewsClassification (en)": 48.7, - "Banking77Classification": 85.69, - "EmotionClassification": 51.58, - "ImdbClassification": 87.67, - "MTOPDomainClassification (en)": 95.36, - "MTOPIntentClassification (en)": 75.07, - "MassiveIntentClassification (en)": 74.64, - "MassiveScenarioClassification (en)": 79.79, - "ToxicConversationsClassification": 72.92, - "TweetSentimentExtractionClassification": 62.22 + "Model": "e5-mistral-7b-instruct", + "AmazonReviewsClassification (fr)": 36.71, + "MTOPDomainClassification (fr)": 74.8, + "MTOPIntentClassification (fr)": 53.97, + "MasakhaNEWSClassification (fra)": 80.59, + "MassiveIntentClassification (fr)": 46.39, + "MassiveScenarioClassification (fr)": 53.86 } ] }, "Clustering": { "v_measure": [ { - "Model": "text-embedding-3-large", - "ArxivClusteringP2P": 49.01, - "ArxivClusteringS2S": 44.45, - "BiorxivClusteringP2P": 38.03, - "BiorxivClusteringS2S": 36.53, - "MedrxivClusteringP2P": 32.7, - "MedrxivClusteringS2S": 31.27, - "RedditClustering": 67.84, - "RedditClusteringP2P": 67.96, - "StackExchangeClustering": 76.26, - "StackExchangeClusteringP2P": 36.88, - "TwentyNewsgroupsClustering": 58.14 + "Model": "e5-mistral-7b-instruct", + "AlloProfClusteringP2P": 61.06, + "AlloProfClusteringS2S": 28.12, + "HALClusteringS2S": 19.69, + "MLSUMClusteringP2P": 45.59, + "MLSUMClusteringS2S": 32.0, + "MasakhaNEWSClusteringP2P (fra)": 52.47, + "MasakhaNEWSClusteringS2S (fra)": 49.2 } ] }, "PairClassification": { "ap": [ { - "Model": "text-embedding-3-large", - "SprintDuplicateQuestions": 92.25, - "TwitterSemEval2015": 77.13, - "TwitterURLCorpus": 87.78 + "Model": "e5-mistral-7b-instruct", + "OpusparcusPC (fr)": 88.5, + "PawsXPairClassification (fr)": 63.65 } ] }, "Reranking": { "map": [ { - "Model": "text-embedding-3-large", - "AskUbuntuDupQuestions": 65.03, - "MindSmallReranking": 29.86, - "SciDocsRR": 86.66, - "StackOverflowDupQuestions": 55.08 + "Model": "e5-mistral-7b-instruct", + "AlloprofReranking": 47.36, + "SyntecReranking": 77.05 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "text-embedding-3-large", - "AILACasedocs": 39.0, - "AILAStatutes": 41.31, - "ArguAna": 58.05, - "BrightRetrieval (theoremqa_questions)": 22.22, - "BrightRetrieval (leetcode)": 23.65, - "BrightRetrieval (earth_science)": 26.27, - "BrightRetrieval (psychology)": 27.52, - "BrightRetrieval (robotics)": 12.93, - "BrightRetrieval (economics)": 19.98, - "BrightRetrieval (stackoverflow)": 12.49, - "BrightRetrieval (biology)": 23.67, - "BrightRetrieval (theoremqa_theorems)": 9.25, - "BrightRetrieval (pony)": 2.45, - "BrightRetrieval (sustainable_living)": 20.32, - "BrightRetrieval (aops)": 8.45, - "CQADupstackRetrieval": 47.54, - "ClimateFEVER": 30.27, - "DBPedia": 44.76, - "FEVER": 87.94, - "FiQA2018": 55.0, - "GerDaLIRSmall": 32.77, - "HotpotQA": 71.58, - "LEMBNarrativeQARetrieval": 44.09, - "LEMBNeedleRetrieval": 29.25, - "LEMBPasskeyRetrieval": 63.0, - "LEMBQMSumRetrieval": 32.49, - "LEMBSummScreenFDRetrieval": 84.8, - "LEMBWikimQARetrieval": 54.16, - "LeCaRDv2": 57.2, - "LegalBenchConsumerContractsQA": 79.39, - "LegalBenchCorporateLobbying": 95.09, - "LegalQuAD": 57.47, - "LegalSummarization": 71.55, - "MSMARCO": 40.24, - "NFCorpus": 42.07, - "NQ": 61.27, - "QuoraRetrieval": 89.05, - "SCIDOCS": 23.11, - "SciFact": 77.77, - "TRECCOVID": 79.56, - "Touche2020": 23.35 + "Model": "e5-mistral-7b-instruct", + "AILACasedocs": 38.76, + "AILAStatutes": 38.07, + "ARCChallenge": 17.81, + "AlloprofRetrieval": 16.46, + "AlphaNLI": 26.12, + "BSARDRetrieval": 0.0, + "BrightRetrieval (sustainable_living)": 18.51, + "BrightRetrieval (economics)": 15.49, + "BrightRetrieval (theoremqa_theorems)": 23.78, + "BrightRetrieval (aops)": 7.1, + "BrightRetrieval (theoremqa_questions)": 23.94, + "BrightRetrieval (stackoverflow)": 9.83, + "BrightRetrieval (psychology)": 15.79, + "BrightRetrieval (pony)": 4.81, + "BrightRetrieval (leetcode)": 28.72, + "BrightRetrieval (biology)": 18.84, + "BrightRetrieval (earth_science)": 25.96, + "BrightRetrieval (robotics)": 16.37, + "GerDaLIRSmall": 37.18, + "HellaSwag": 34.85, + "LEMBNarrativeQARetrieval": 44.62, + "LEMBNeedleRetrieval": 48.25, + "LEMBPasskeyRetrieval": 71.0, + "LEMBQMSumRetrieval": 43.63, + "LEMBSummScreenFDRetrieval": 96.82, + "LEMBWikimQARetrieval": 82.11, + "LeCaRDv2": 68.56, + "LegalBenchConsumerContractsQA": 75.46, + "LegalBenchCorporateLobbying": 94.01, + "LegalQuAD": 59.64, + "LegalSummarization": 66.51, + "MintakaRetrieval (fr)": 3.57, + "PIQA": 39.37, + "Quail": 7.01, + "RARbCode": 78.46, + "RARbMath": 72.16, + "SIQA": 5.42, + "SpartQA": 9.92, + "SyntecRetrieval": 55.9, + "TempReasonL1": 3.31, + "TempReasonL2Fact": 36.9, + "TempReasonL2Pure": 9.18, + "TempReasonL3Fact": 30.18, + "TempReasonL3Pure": 14.31, + "WinoGrande": 41.21, + "XPQARetrieval (fr)": 41.29 } ] }, "STS": { "spearman": [ { - "Model": "text-embedding-3-large", - "BIOSSES": 84.68, - "SICK-R": 79.0, - "STS12": 72.84, - "STS13": 86.1, - "STS14": 81.15, - "STS15": 88.49, - "STS16": 85.08, - "STS17 (en-en)": 90.22, - "STS22 (en)": 66.14, - "STSBenchmark": 83.56 + "Model": "e5-mistral-7b-instruct", + "SICKFr": 64.39, + "STS22 (fr)": 69.82, + "STSBenchmarkMultilingualSTS (fr)": 61.87 } ] }, "Summarization": { "spearman": [ { - "Model": "text-embedding-3-large", - "SummEval": 29.92 + "Model": "e5-mistral-7b-instruct", + "SummEvalFr": 32.22 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "text-embedding-3-large", - "Core17InstructionRetrieval": -0.2, - "News21InstructionRetrieval": -2.03, - "Robust04InstructionRetrieval": -5.81 + "Model": "e5-mistral-7b-instruct", + "Core17InstructionRetrieval": 0.09, + "News21InstructionRetrieval": -0.86, + "Robust04InstructionRetrieval": -9.59 } ] } }, - "instructor-large": { + "text-embedding-ada-002": { "BitextMining": { "f1": [ { - "Model": "instructor-large" + "Model": "text-embedding-ada-002" } ] }, "Classification": { "accuracy": [ { - "Model": "instructor-large" + "Model": "text-embedding-ada-002", + "AmazonCounterfactualClassification (en)": 75.94, + "AmazonPolarityClassification": 86.72, + "AmazonReviewsClassification (zh)": 38.3, + "AmazonReviewsClassification (en)": 44.78, + "AmazonReviewsClassification (fr)": 43.76, + "Banking77Classification": 80.66, + "EmotionClassification": 48.74, + "IFlyTek": 44.62, + "ImdbClassification": 77.98, + "JDReview": 74.6, + "MTOPDomainClassification (en)": 92.13, + "MTOPDomainClassification (fr)": 89.38, + "MTOPIntentClassification (en)": 64.68, + "MTOPIntentClassification (fr)": 64.45, + "MasakhaNEWSClassification (fra)": 81.52, + "MassiveIntentClassification (zh-CN)": 64.81, + "MassiveIntentClassification (en)": 70.15, + "MassiveIntentClassification (fr)": 65.42, + "MassiveScenarioClassification (zh-CN)": 71.4, + "MassiveScenarioClassification (en)": 75.33, + "MassiveScenarioClassification (fr)": 71.11, + "MultilingualSentiment": 67.99, + "OnlineShopping": 88.94, + "TNews": 45.77, + "ToxicConversationsClassification": 72.29, + "TweetSentimentExtractionClassification": 61.81, + "Waimai": 82.37 } ] }, "Clustering": { "v_measure": [ { - "Model": "instructor-large" + "Model": "text-embedding-ada-002", + "AlloProfClusteringP2P": 64.83, + "AlloProfClusteringS2S": 53.52, + "ArxivClusteringP2P": 45.01, + "ArxivClusteringS2S": 36.85, + "BiorxivClusteringP2P": 36.66, + "BiorxivClusteringS2S": 34.21, + "CLSClusteringP2P": 38.26, + "CLSClusteringS2S": 35.91, + "HALClusteringS2S": 26.18, + "MLSUMClusteringP2P": 44.59, + "MLSUMClusteringS2S": 41.67, + "MasakhaNEWSClusteringP2P (fra)": 68.35, + "MasakhaNEWSClusteringS2S (fra)": 48.58, + "MedrxivClusteringP2P": 32.6, + "MedrxivClusteringS2S": 30.8, + "RedditClustering": 61.42, + "RedditClusteringP2P": 64.13, + "StackExchangeClustering": 72.22, + "StackExchangeClusteringP2P": 38.49, + "ThuNewsClusteringP2P": 58.71, + "ThuNewsClusteringS2S": 49.86, + "TwentyNewsgroupsClustering": 52.56 } ] }, "PairClassification": { "ap": [ { - "Model": "instructor-large" + "Model": "text-embedding-ada-002", + "Cmnli": 76.03, + "Ocnli": 63.08, + "OpusparcusPC (fr)": 94.12, + "PawsXPairClassification (fr)": 60.16, + "SprintDuplicateQuestions": 92.17, + "TwitterSemEval2015": 75.28, + "TwitterURLCorpus": 87.22 } ] }, "Reranking": { "map": [ { - "Model": "instructor-large" + "Model": "text-embedding-ada-002", + "AskUbuntuDupQuestions": 62.05, + "CMedQAv1": 63.08, + "CMedQAv2": 64.02, + "MMarcoReranking": 23.39, + "MindSmallReranking": 31.45, + "SciDocsRR": 81.22, + "StackOverflowDupQuestions": 50.54, + "SyntecReranking": 89.87, + "T2Reranking": 66.65 } ] }, "Retrieval": { "ndcg_at_10": [ { - "Model": "instructor-large", - "BrightRetrieval (pony)": 1.32, - "BrightRetrieval (sustainable_living)": 13.16, - "BrightRetrieval (aops)": 7.94, - "BrightRetrieval (biology)": 15.61, - "BrightRetrieval (stackoverflow)": 11.21, - "BrightRetrieval (theoremqa_theorems)": 9.29, - "BrightRetrieval (psychology)": 21.94, - "BrightRetrieval (economics)": 15.99, - "BrightRetrieval (robotics)": 11.45, - "BrightRetrieval (leetcode)": 20.0, - "BrightRetrieval (earth_science)": 21.52, - "BrightRetrieval (theoremqa_questions)": 20.07 + "Model": "text-embedding-ada-002", + "ARCChallenge": 13.3, + "AlloprofRetrieval": 51.64, + "AlphaNLI": 25.65, + "ArguAna": 57.44, + "BSARDRetrieval": 0.61, + "CQADupstackRetrieval": 41.69, + "ClimateFEVER": 21.64, + "CmedqaRetrieval": 22.36, + "CovidRetrieval": 57.21, + "DBPedia": 39.39, + "DuRetrieval": 71.17, + "EcomRetrieval": 44.49, + "FEVER": 74.99, + "FiQA2018": 44.41, + "HellaSwag": 29.29, + "HotpotQA": 60.9, + "MMarcoRetrieval": 69.86, + "MSMARCO": 40.91, + "MedicalRetrieval": 37.92, + "MintakaRetrieval (fr)": 29.94, + "NFCorpus": 36.97, + "NQ": 51.58, + "PIQA": 31.02, + "Quail": 5.83, + "QuoraRetrieval": 87.6, + "RARbCode": 83.39, + "RARbMath": 73.21, + "SCIDOCS": 18.36, + "SIQA": 3.14, + "SciFact": 72.75, + "SpartQA": 4.23, + "SyntecRetrieval": 85.97, + "T2Retrieval": 69.14, + "TRECCOVID": 68.47, + "TempReasonL1": 1.68, + "TempReasonL2Fact": 19.93, + "TempReasonL2Pure": 2.6, + "TempReasonL3Fact": 18.02, + "TempReasonL3Pure": 7.58, + "Touche2020": 21.61, + "VideoRetrieval": 43.85, + "WinoGrande": 19.65, + "XPQARetrieval (fr)": 73.0 } ] }, "STS": { "spearman": [ { - "Model": "instructor-large" + "Model": "text-embedding-ada-002", + "AFQMC": 23.88, + "ATEC": 29.25, + "BIOSSES": 86.35, + "BQ": 45.33, + "LCQMC": 68.41, + "PAWSX": 16.55, + "QBQTC": 30.27, + "SICK-R": 80.6, + "SICKFr": 76.28, + "STS12": 69.8, + "STS13": 83.27, + "STS14": 76.09, + "STS15": 86.12, + "STS16": 85.96, + "STS17 (en-en)": 90.25, + "STS22 (zh)": 62.53, + "STS22 (en)": 68.12, + "STS22 (tr)": 64.5, + "STS22 (fr)": 81.09, + "STSB": 70.61, + "STSBenchmark": 83.17, + "STSBenchmarkMultilingualSTS (fr)": 77.55 } ] }, "Summarization": { "spearman": [ { - "Model": "instructor-large" + "Model": "text-embedding-ada-002", + "SummEval": 30.8, + "SummEvalFr": 30.5 } ] }, "InstructionRetrieval": { "p-MRR": [ { - "Model": "instructor-large" + "Model": "text-embedding-ada-002" } ] }